forked from clear-nus/edc
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun.py
109 lines (100 loc) · 4.03 KB
/
run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
from argparse import ArgumentParser
from edc.edc_framework import EDC
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"
if __name__ == "__main__":
parser = ArgumentParser()
# OIE module setting
parser.add_argument(
"--oie_llm", default="mistralai/Mistral-7B-Instruct-v0.2", help="LLM used for open information extraction."
)
parser.add_argument(
"--oie_prompt_template_file_path",
default="./prompt_templates/oie_template.txt",
help="Promp template used for open information extraction.",
)
parser.add_argument(
"--oie_few_shot_example_file_path",
default="./few_shot_examples/default/oie_few_shot_examples.txt",
help="Few shot examples used for open information extraction.",
)
# Schema Definition setting
parser.add_argument(
"--sd_llm", default="mistralai/Mistral-7B-Instruct-v0.2", help="LLM used for schema definition."
)
parser.add_argument(
"--sd_prompt_template_file_path",
default="./prompt_templates/sd_template.txt",
help="Prompt template used for schema definition.",
)
parser.add_argument(
"--sd_few_shot_example_file_path",
default="./few_shot_examples/default/sd_few_shot_examples.txt",
help="Few shot examples used for schema definition.",
)
# Schema Canonicalization setting
parser.add_argument(
"--sc_llm",
default="mistralai/Mistral-7B-Instruct-v0.2",
help="LLM used for schema canonicaliztion verification.",
)
parser.add_argument(
"--sc_prompt_template_file_path",
default="./prompt_templates/sc_template.txt",
help="Prompt template used for schema canonicalization verification.",
)
# Refinement setting
parser.add_argument("--sr_adapter_path", default=None, help="Path to adapter of schema retriever.")
parser.add_argument(
"--oie_refine_prompt_template_file_path",
default="./prompt_templates/oie_r_template.txt",
help="Prompt template used for refined open information extraction.",
)
parser.add_argument(
"--oie_refine_few_shot_example_file_path",
default="./few_shot_examples/default/oie_few_shot_refine_examples.txt",
help="Few shot examples used for refined open information extraction.",
)
parser.add_argument(
"--ee_llm", default="mistralai/Mistral-7B-Instruct-v0.2", help="LLM used for entity extraction."
)
parser.add_argument(
"--ee_prompt_template_file_path",
default="./prompt_templates/ee_template.txt",
help="Prompt templated used for entity extraction.",
)
parser.add_argument(
"--ee_few_shot_example_file_path",
default="./few_shot_examples/default/ee_few_shot_examples.txt",
help="Few shot examples used for entity extraction.",
)
parser.add_argument(
"--em_prompt_template_file_path",
default="./prompt_templates/em_template.txt",
help="Prompt template used for entity merging.",
)
# Input setting
parser.add_argument(
"--input_text_file_path",
default="./datasets/example.txt",
help="File containing input texts to extract KG from, each line contains one piece of text.",
)
parser.add_argument(
"--target_schema_path",
default=None,
help="File containing the target schema to align to.",
)
parser.add_argument("--refinement_iterations", default=0, type=int, help="Number of iteration to run.")
parser.add_argument(
"--enrich_schema",
action="store_true",
help="Whether un-canonicalizable relations should be added to the schema.",
)
# Output setting
parser.add_argument("--output_dir", default="./output/tmp", help="Directory to output to.")
args = parser.parse_args()
args = vars(args)
print(args)
edc = EDC(**args)
input_text_list = open(args["input_text_file_path"], "r").readlines()
output_kg = edc.extract_kg(input_text_list, args["output_dir"], refinement_iterations=args["refinement_iterations"])