Skip to content

Commit

Permalink
Updated bulk deid method(using old code).
Browse files Browse the repository at this point in the history
  • Loading branch information
vladd-bit committed Mar 1, 2024
1 parent 673dcca commit 191ef61
Showing 1 changed file with 16 additions and 8 deletions.
24 changes: 16 additions & 8 deletions medcat_service/nlp_processor/medcat_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,8 @@ def __init__(self):

self.bulk_nproc = int(os.getenv("APP_BULK_NPROC", 8))
self.torch_threads = int(os.getenv("APP_TORCH_THREADS", -1))
self.DEID_MODE = os.getenv("DEID_MODE", "False")
self.DEID_REDACT = os.getenv("DEID_REDACT", True)
self.DEID_MODE = eval(os.getenv("DEID_MODE", "False"))
self.DEID_REDACT = eval(os.getenv("DEID_REDACT", "True"))
self.model_card_info = {}

# this is available to constrain torch threads when there
Expand Down Expand Up @@ -129,7 +129,7 @@ def process_content(self, content):
# when it contains any non-blank characters

start_time_ns = time.time_ns()

if self.DEID_MODE:
entities = self.cat.get_entities(text)["entities"]
text = self.cat.deid_text(text, redact=self.DEID_REDACT)
Expand Down Expand Up @@ -190,10 +190,12 @@ def process_content_bulk(self, content):
start_time_ns = time.time_ns()

try:
if eval(self.DEID_MODE):
if self.DEID_MODE:
_text_res = []
for text_record in content:
ann_res.append(text_record[0], self.cat.deid_text(text_record[1], redact=eval(self.DEID_REDACT)))
#ann_res = self.cat.deid_multi_texts(content, MedCatProcessor._generate_input_doc(content, invalid_doc_ids), batch_size=batch_size, nproc=nproc)
_text_res.append(self.cat.deid_text(text_record["text"], redact=self.DEID_REDACT))
content = [{"text": txt} for txt in _text_res]
self.log
else:
ann_res = self.cat.multiprocessing_batch_docs_size(
MedCatProcessor._generate_input_doc(content, invalid_doc_ids), batch_size=batch_size, nproc=nproc)
Expand Down Expand Up @@ -253,7 +255,7 @@ def _create_cat(self):
self.log.info("Loading model pack...")
cat = CAT.load_model_pack(model_pack_path)

if eval(self.DEID_MODE):
if self.DEID_MODE:
cat = DeIdModel.load_model_pack(model_pack_path)

# Apply CUI filter if provided
Expand Down Expand Up @@ -360,7 +362,7 @@ def _generate_result(self, in_documents, annotations, invalid_doc_idx, additiona

for i in range(len(in_documents)):
in_ct = in_documents[i]
if i in annotations.keys():
if not self.DEID_MODE and i in annotations.keys():
# generate output for valid annotations

entities = self.process_entities(annotations.get(i))
Expand All @@ -371,6 +373,12 @@ def _generate_result(self, in_documents, annotations, invalid_doc_idx, additiona
"success": True,
"timestamp": NlpProcessor._get_timestamp()}
out_res.update(additional_info)
elif self.DEID_MODE:
out_res = {"text": in_ct["text"],
"annotations": [],
"success": True,
"timestamp": NlpProcessor._get_timestamp()}
out_res.update(additional_info)
else:
# Don't fetch an annotation set
# as the document was invalid
Expand Down

0 comments on commit 191ef61

Please sign in to comment.