-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathgene_regulation_relation.py
43 lines (27 loc) · 1.13 KB
/
gene_regulation_relation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
import spacy
import sys
import gpt3
tokenizer = AutoTokenizer.from_pretrained("alvaroalon2/biobert_genetic_ner")
model = AutoModelForTokenClassification.from_pretrained("alvaroalon2/biobert_genetic_ner")
model_infer = pipeline('ner',model=model,tokenizer=tokenizer)
nlp = spacy.load("en_core_web_sm")
verbs = ["up", "promot", "down", "suppress" "overexpress"]
if __name__ == "__main__":
input_file = sys.argv[1]
verb_file = sys.argv[2]
sentences = open(input_file).read()
verbs = open(verb_file).read().split("\n")
doc = nlp(sentences)
assert doc.has_annotation("SENT_START")
for sent in doc.sents:
#print(sent.text)
entities = model_infer(sent.text)
if len(entities) > 0:
has_verb = False
for v in verbs:
if v in sent.text:
has_verb = True
if has_verb == True:
res = gpt3.extract_relation("gpt3_training_gene_regulation.txt", sent.text.strip() + "\n")
print(res)