-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathtiming_accuracy_test.py
64 lines (49 loc) · 2.35 KB
/
timing_accuracy_test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
"""Extracts clinical entities from free text using LLMs."""
from entity_extractor import *
def disp(line, ann_line, entities, time_taken):
ann = [a.strip() for a in ann_line.split('\n')]
print(colorize_text(line, entities.keys(), COLOR_GREEN))
print(f'(overlapping matches not highlighted)\n{len(entities.keys())} entities found in {round(time_taken, 1)}s:')
print(ann)
for key, value in entities.items():
if value is None or value[2] == NO_MATCH:
match, rating, *method_info = value
print(f'{key} {COLOR_RED}No match{COLOR_RESET} {method_info}')
continue
match, rating, *method_info = value
# color = {5: COLOR_GREEN, 4: COLOR_YELLOW}.get(rating, COLOR_RED)
if rating < 3:
color = COLOR_BLUE
elif any(match['code'] in b for b in ann):
color = COLOR_GREEN
elif any(match['display'] in b for b in ann):
color = COLOR_YELLOW
else:
color = COLOR_RED
print(f'{color}{key}{COLOR_RESET}: {COLOR_BLUE}{match["display"]} |{match["code"]}| {color}(confidence {rating}){COLOR_RESET} {method_info}')
# Fail case where entities = {}
if not entities:
print(f'{COLOR_RED}Could not identify.{COLOR_RESET}')
print(COLOR_BLUE, '---', COLOR_RESET)
def main():
# Initialise the LLM we are using (if required)
# Read the test cases (hide blank lines)
with open("clinical_text_3.txt", "r") as file:
stripped_lines = map(str.rstrip, file.readlines())
# skip newlines and comments/titles
lines = [line for line in stripped_lines if line and not line.startswith('#')]
with open("clinical_text_3_ann.txt", "r") as file:
ann_lines = file.read().split(';;')
start_time_all_cases = time.time()
entities_per_line = []
language = identify_language(lines)
# Iterate over each line in the test cases
for ann_line, line in zip(ann_lines, lines):
start_time = time.time()
text = as_english(line, language) # translate text in other languages to english
entities = identify(text) # identify entities
entities_per_line.append(entities)
disp(text, ann_line, entities, time.time() - start_time)
print(COLOR_BLUE, time.time() - start_time_all_cases, 's', COLOR_RESET, sep='')
if __name__ == "__main__":
main()