Skip to content

Commit

Permalink
16.ant.1 (#17)
Browse files Browse the repository at this point in the history
* #16 - Updates

* #16 - Updates

* Update CHANGELOG.md

#16
  • Loading branch information
antaenc authored Dec 21, 2023
1 parent df58376 commit d48198f
Show file tree
Hide file tree
Showing 33 changed files with 1,418 additions and 834 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
[markdownlint](https://dlaa.me/markdownlint/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [1.1.0] - 2023-12-21

### Changed in 1.1.0

- Updates

## [1.0.2] - 2023-03-08

### Removed in 1.0.2
Expand Down
94 changes: 56 additions & 38 deletions Python/Tasks/Deleting/DeleteFutures.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,88 +6,106 @@
import os
import sys
import time
from senzing import G2BadInputException, G2Engine, G2Exception, G2RetryableException, G2UnrecoverableException
from senzing import (
G2BadInputException,
G2Engine,
G2Exception,
G2RetryableException,
G2UnrecoverableException,
)

engine_config_json = os.getenv('SENZING_ENGINE_CONFIGURATION_JSON', None)
engine_config_json = os.getenv("SENZING_ENGINE_CONFIGURATION_JSON", None)


def mock_logger(level, exception, error_rec=None):
print(f'\n{level}: {exception}', file=sys.stderr)
print(f"\n{level}: {exception}", file=sys.stderr)
if error_rec:
print(f'{error_rec}', file=sys.stderr)
print(f"{error_rec}", file=sys.stderr)


def del_record(engine, rec_to_del):
record_dict = json.loads(rec_to_del)
data_source = record_dict.get('DATA_SOURCE', None)
record_id = record_dict.get('RECORD_ID', None)
data_source = record_dict.get("DATA_SOURCE", None)
record_id = record_dict.get("RECORD_ID", None)
engine.deleteRecord(data_source, record_id)


def engine_stats(engine):
response = bytearray()
try:
engine.stats(response)
print(f'\n{response.decode()}\n')
except G2RetryableException as ex:
mock_logger('WARN', ex)
except (G2UnrecoverableException, G2Exception) as ex:
mock_logger('CRITICAL', ex)
print(f"\n{response.decode()}\n")
except G2RetryableException as err:
mock_logger("WARN", err)
except G2Exception as err:
mock_logger("CRITICAL", err)
raise


def record_stats(success_recs, prev_time):
print(f'Processed {success_recs} deletes, {int(1000 / (time.time() - prev_time))} records per second')
def record_stats(success, error, prev_time):
print(
f"Processed {success:,} deletes,"
f" {int(1000 / (time.time() - prev_time)):,} records per second,"
f" {error} errors"
)
return time.time()


def futures_del(engine, input_file):
prev_time = time.time()
success_recs = error_recs = 0

with open(input_file, 'r') as in_file:
with open(input_file, "r") as in_file:
with concurrent.futures.ThreadPoolExecutor() as executor:
futures = {executor.submit(del_record, engine, record): record for record in itertools.islice(in_file, executor._max_workers)}
futures = {
executor.submit(del_record, engine, record): record
for record in itertools.islice(in_file, executor._max_workers)
}

while futures:
for f in concurrent.futures.as_completed(futures.keys()):
done, _ = concurrent.futures.wait(
futures, return_when=concurrent.futures.FIRST_COMPLETED
)
for f in done:
try:
f.result()
except G2BadInputException as ex:
mock_logger('ERROR', ex, futures[f])
except (G2BadInputException, json.JSONDecodeError) as err:
mock_logger("ERROR", err, futures[f])
error_recs += 1
except G2RetryableException as ex:
mock_logger('WARN', ex, futures[f])
except G2RetryableException as err:
mock_logger("WARN", err, futures[f])
error_recs += 1
except (G2UnrecoverableException, G2Exception) as ex:
mock_logger('CRITICAL', ex, futures[f])
except (G2UnrecoverableException, G2Exception) as err:
mock_logger("CRITICAL", err, futures[f])
raise
except json.JSONDecodeError as ex:
mock_logger('ERROR', ex, futures[f])
error_recs += 1
else:
success_recs += 1
record = in_file.readline()
if record:
futures[executor.submit(del_record, engine, record)] = (
record
)

success_recs += 1
if success_recs % 1000 == 0:
prev_time = record_stats(success_recs, prev_time)
prev_time = record_stats(
success_recs, error_recs, prev_time
)

if success_recs % 10000 == 0:
engine_stats(engine)
finally:
futures.pop(f)

record = in_file.readline()
if record:
futures[executor.submit(del_record, engine, record)] = record
del futures[f]

print(f'Successfully deleted {success_recs} records, with {error_recs} errors')
print(
f"Successfully deleted {success_recs:,} records, with"
f" {error_recs:,} errors"
)


try:
g2_engine = G2Engine()
g2_engine.init('G2Engine', engine_config_json, False)
futures_del(g2_engine, '../../../Resources/Data/del-10K.json')
g2_engine.init("G2Engine", engine_config_json, False)
futures_del(g2_engine, "../../../Resources/Data/del-10K.json")
g2_engine.destroy()
except (G2BadInputException, G2RetryableException, G2UnrecoverableException, G2Exception) as ex:
print(ex)
sys.exit(-1)
except G2Exception as err:
mock_logger("CRITICAL", err)
51 changes: 28 additions & 23 deletions Python/Tasks/Deleting/DeleteLoop.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,51 +3,56 @@
import json
import os
import sys
from senzing import G2BadInputException, G2Engine, G2Exception, G2RetryableException, G2UnrecoverableException
from senzing import (
G2BadInputException,
G2Engine,
G2Exception,
G2RetryableException,
G2UnrecoverableException,
)

engine_config_json = os.getenv('SENZING_ENGINE_CONFIGURATION_JSON', None)
engine_config_json = os.getenv("SENZING_ENGINE_CONFIGURATION_JSON", None)


def mock_logger(level, exception, error_rec=None):
print(f'\n{level}: {exception}', file=sys.stderr)
print(f"\n{level}: {exception}", file=sys.stderr)
if error_rec:
print(f'{error_rec}', file=sys.stderr)
print(f"{error_rec}", file=sys.stderr)


def del_records_from_file(engine, input_file):
success_recs = 0
success_recs = error_recs = 0

with open(input_file, 'r') as file:
with open(input_file, "r") as file:

for rec_to_add in file:
try:
record_dict = json.loads(rec_to_add)
data_source = record_dict.get('DATA_SOURCE', None)
record_id = record_dict.get('RECORD_ID', None)
data_source = record_dict.get("DATA_SOURCE", None)
record_id = record_dict.get("RECORD_ID", None)
engine.deleteRecord(data_source, record_id, rec_to_add)
except G2BadInputException as ex:
mock_logger('ERROR', ex, rec_to_add)
except G2RetryableException as ex:
mock_logger('WARN', ex, rec_to_add)
except (G2UnrecoverableException, G2Exception) as ex:
mock_logger('CRITICAL', ex, rec_to_add)
except (G2BadInputException, json.JSONDecodeError) as err:
mock_logger("ERROR", err, rec_to_add)
error_recs += 1
except G2RetryableException as err:
mock_logger("WARN", err, rec_to_add)
error_recs += 1
except (G2UnrecoverableException, G2Exception) as err:
mock_logger("CRITICAL", err, rec_to_add)
raise
except json.JSONDecodeError as ex:
mock_logger('ERROR', ex, rec_to_add)
else:
success_recs += 1

if success_recs % 1000 == 0:
print(f'Processed {success_recs} deletes')
print(f"Processed {success_recs:,} deletes, with {error_recs:,} errors")

print(f'Successfully deleted {success_recs} records')
print(f"Successfully deleted {success_recs:,} records, with {error_recs:,} errors")


try:
g2_engine = G2Engine()
g2_engine.init('G2Engine', engine_config_json, False)
del_records_from_file(g2_engine, '../../../Resources/Data/del-10K.json')
g2_engine.init("G2Engine", engine_config_json, False)
del_records_from_file(g2_engine, "../../../Resources/Data/del-10K.json")
g2_engine.destroy()
except (G2BadInputException, G2RetryableException, G2UnrecoverableException, G2Exception) as ex:
print(ex)
sys.exit(-1)
except G2Exception as err:
mock_logger("CRITICAL", err)
106 changes: 63 additions & 43 deletions Python/Tasks/Deleting/DeleteWithInfoFutures.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,96 +6,116 @@
import os
import sys
import time
from senzing import G2BadInputException, G2Engine, G2Exception, G2RetryableException, G2UnrecoverableException
from senzing import (
G2BadInputException,
G2Engine,
G2Exception,
G2RetryableException,
G2UnrecoverableException,
)

engine_config_json = os.getenv('SENZING_ENGINE_CONFIGURATION_JSON', None)
engine_config_json = os.getenv("SENZING_ENGINE_CONFIGURATION_JSON", None)


def mock_logger(level, exception, error_rec=None):
print(f'\n{level}: {exception}', file=sys.stderr)
print(f"\n{level}: {exception}", file=sys.stderr)
if error_rec:
print(f'{error_rec}', file=sys.stderr)
print(f"{error_rec}", file=sys.stderr)


def del_record(engine, rec_to_del):
with_info = bytearray()
record_dict = json.loads(rec_to_del)
data_source = record_dict.get('DATA_SOURCE', None)
record_id = record_dict.get('RECORD_ID', None)
data_source = record_dict.get("DATA_SOURCE", None)
record_id = record_dict.get("RECORD_ID", None)
engine.deleteRecordWithInfo(data_source, record_id, with_info)
return with_info.decode() + '\n'
return with_info.decode()


def engine_stats(engine):
response = bytearray()
try:
engine.stats(response)
print(f'\n{response.decode()}\n')
except G2RetryableException as ex:
mock_logger('WARN', ex)
except (G2UnrecoverableException, G2Exception) as ex:
mock_logger('CRITICAL', ex)
print(f"\n{response.decode()}\n")
except G2RetryableException as err:
mock_logger("WARN", err)
except G2Exception as err:
mock_logger("CRITICAL", err)
raise


def record_stats(success_recs, prev_time):
print(f'Processed {success_recs} deletes, {int(1000 / (time.time() - prev_time))} records per second')
def record_stats(success, error, prev_time):
print(
f"Processed {success:,} deletes,"
f" {int(1000 / (time.time() - prev_time)):,} records per second,"
f" {error} errors"
)
return time.time()


def futures_del(engine, input_file, output_file):
prev_time = time.time()
success_recs = error_recs = 0

with open(output_file, 'w') as out_file:
with open(input_file, 'r') as in_file:
with open(output_file, "w") as out_file:
with open(input_file, "r") as in_file:
with concurrent.futures.ThreadPoolExecutor() as executor:
futures = {executor.submit(del_record, engine, record): record for record in itertools.islice(in_file, executor._max_workers)}
futures = {
executor.submit(del_record, engine, record): record
for record in itertools.islice(in_file, executor._max_workers)
}

while futures:
for f in concurrent.futures.as_completed(futures.keys()):
done, _ = concurrent.futures.wait(
futures, return_when=concurrent.futures.FIRST_COMPLETED
)
for f in done:
try:
result = f.result()
except G2BadInputException as ex:
mock_logger('ERROR', ex, futures[f])
except (G2BadInputException, json.JSONDecodeError) as err:
mock_logger("ERROR", err, futures[f])
error_recs += 1
except G2RetryableException as ex:
mock_logger('WARN', ex, futures[f])
except G2RetryableException as err:
mock_logger("WARN", err, futures[f])
error_recs += 1
except (G2UnrecoverableException, G2Exception) as ex:
mock_logger('CRITICAL', ex, futures[f])
except (G2UnrecoverableException, G2Exception) as err:
mock_logger("CRITICAL", err, futures[f])
raise
except json.JSONDecodeError as ex:
mock_logger('ERROR', ex, futures[f])
error_recs += 1
else:
success_recs += 1
out_file.write(result)
record = in_file.readline()
if record:
futures[executor.submit(del_record, engine, record)] = (
record
)

out_file.write(f"{result}\n")

success_recs += 1
if success_recs % 1000 == 0:
prev_time = record_stats(success_recs, prev_time)
prev_time = record_stats(
success_recs, error_recs, prev_time
)

if success_recs % 10000 == 0:
engine_stats(engine)
finally:
futures.pop(f)

record = in_file.readline()
if record:
futures[executor.submit(del_record, engine, record)] = record
del futures[f]

print(f'Successfully deleted {success_recs} records, with {error_recs} errors')
print(f'With info responses written to {output_file}')
print(
f"Successfully deleted {success_recs:,} records, with"
f" {error_recs:,} errors"
)
print(f"With info responses written to {output_file}")


try:
g2_engine = G2Engine()
g2_engine.init('G2Engine', engine_config_json, False)
g2_engine.init("G2Engine", engine_config_json, False)
futures_del(
g2_engine,
'../../../Resources/Data/del-10K.json',
'../../../Resources/Output/Del_File_WithInfo.json')
"../../../Resources/Data/del-10K.json",
"../../../Resources/Output/Del_File_WithInfo.json",
)
g2_engine.destroy()
except (G2BadInputException, G2RetryableException, G2UnrecoverableException, G2Exception) as ex:
print(ex)
sys.exit(-1)
except G2Exception as err:
mock_logger("CRITICAL", err)
Loading

0 comments on commit d48198f

Please sign in to comment.