-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcreate_knowledge_graph.py
63 lines (53 loc) · 1.83 KB
/
create_knowledge_graph.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
from biocypher import BioCypher
from patient_kg.adapters.clinical_dataset_adapter import (
ClinicalDatasetAdapter,
SnomedCTAdapterEdgeType,
SnomedCTAdapterNodeType,
)
# Instantiate the BioCypher interface
# You can use `config/biocypher_config.yaml` to configure the framework or
# supply settings via parameters below
bc = BioCypher(
# biocypher_config_path="config/biocypher_docker_config.yaml",
biocypher_config_path="config/biocypher_config.yaml",
# schema_config_path="config/old_schema_config.yaml",
schema_config_path="config/generated_schema_config_for_data.yaml",
)
# Choose node types to include in the knowledge graph.
# These are defined in the adapter (`adapter.py`).
node_types = [
SnomedCTAdapterNodeType["PATIENT"],
SnomedCTAdapterNodeType["SURVIVAL_TIME_DAYS"],
]
# Choose patient adapter fields to include in the knowledge graph.
# These are defined in the adapter (`adapter.py`).
node_fields = [
# Patients
# ExampleAdapterPatientField.ID,
# Survival time
# ExampleAdapterSurvivalTimeField.ID,
]
edge_types = [
SnomedCTAdapterEdgeType.PATIENT_TO_SURVIVAL_TIME,
]
base_data_path = "./data"
# Create a snomed ct adapter instance
adapter = ClinicalDatasetAdapter(
data_file_path=f"{base_data_path}/data.csv",
mapping_file_path=f"{base_data_path}/mapping.yaml",
node_types=node_types,
node_fields=node_fields,
edge_types=edge_types,
# TODO: fields and types are not checked for at the moment
# we can leave edge fields empty, defaulting to all fields in the adapter
)
# Create a knowledge graph from the adapter
bc.write_nodes(adapter.get_nodes())
bc.write_edges(adapter.get_edges())
# Write admin import statement
bc.write_import_call()
# Print summary
# bc.show_ontology_structure(full=True)
# bc.log_missing_input_labels()
# bc.log_duplicates()
bc.summary()