-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathneo4j_command.txt
142 lines (121 loc) · 5.43 KB
/
neo4j_command.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
aws configure
sudo aws s3 cp s3://neo4jrepo.sg/HCAI/import/ /var/lib/neo4j/import/ --recursive
CREATE CONSTRAINT ON (h:Hospital) ASSERT h.hospitalid IS UNIQUE;
CREATE CONSTRAINT ON (p:Patient) ASSERT p.patient_id IS UNIQUE;
CREATE CONSTRAINT ps_id_cstr ON (ps:PatientUnitStay) ASSERT ps.patientunitstayid IS UNIQUE;
CREATE CONSTRAINT treatment_id_cstr ON (t:Treatment) ASSERT t.treatmentid IS UNIQUE;
create index lab_psid_ind IF NOT EXISTS for (l:Lab) on (l.patientunitstayid);
CREATE CONSTRAINT ON (ds:DiagString) ASSERT ds.dx IS UNIQUE;
CREATE CONSTRAINT ON (dc:DiagCategory) ASSERT dc.category IS UNIQUE;
CREATE CONSTRAINT diag_id_cstr ON (d:Diagnosis) ASSERT d.diagnosisid IS UNIQUE;
CALL apoc.periodic.iterate(
"LOAD CSV WITH HEADERS FROM 'file:///icustay_detail.gz' AS row
return row",
"MERGE (p:Patient {patient_id: row.uniquepid})
set p.ethnicity = row.ethnicity,
p.gender = row.gender,
p.age = toInteger(row.age)
MERGE (ps:PatientUnitStay {patientunitstayid: row.patientunitstayid})
set ps.unittype = row.unittype,
ps.hospitaladmitoffset = toInteger(row.hospitaladmitoffset),
ps.hospitaldischargeoffset = toInteger(row.hospitaldischargeoffset),
ps.unitadmitoffset = toInteger(row.unitadmitoffset),
ps.unitdischargeoffset = toInteger(row.unitdischargeoffset),
ps.hospitaldischargeyear = toInteger(row.hospitaldischargeyear),
ps.hosp_mort = toInteger(row.hosp_mort),
ps.icu_los_hours = toInteger(row.icu_los_hours)
MERGE (h:Hospital {hospitalid: row.hospitalid})
set h.region = row.region
Merge(p)-[r1:HAS_STAY]->(ps)
Merge(ps)-[r2:WENT_HOSP]->(h)",
{batchSize: 5000, parallel:false}
);
CALL apoc.periodic.iterate(
"LOAD CSV WITH HEADERS FROM 'file:///treatment000000000000.csv.gz' AS row
return row",
"MERGE (t:Treatment { treatmentid: row.treatmentid })
set t.treatmentoffset = toInteger(row.treatmentoffset),
t.treatmentstring = row.treatmentstring,
t.activeupondischarge = toBoolean (row.activeupondischarge)
with t,row
match(ps:PatientUnitStay { patientunitstayid: row.patientunitstayid})
merge (ps)-[r:HAS_TREATMENT]->(t)",
{batchSize: 5000, parallel:false}
);
CALL apoc.periodic.iterate(
"LOAD CSV WITH HEADERS FROM 'file:///pivoted_lab.gz' AS row
return row",
"create (l:Lab { patientunitstayid: row.patientunitstayid })
set l.chartoffset = toInteger(coalesce(row.labresultoffset,0)),
l.albumin = toFloat(coalesce(row.albumin,0)),
l.bilirubin = toFloat(coalesce(row.bilirubin,0)),
l.BUN = toInteger(coalesce(row.BUN,0)),
l.calcium = toFloat(coalesce(row.calcium,0)),
l.creatinine = toFloat(coalesce(row.creatinine,0)),
l.glucose = toInteger(coalesce(row.glucose,0)),
l.bicarbonate = toInteger(coalesce(row.bicarbonate,0)),
l.TotalCO2 = toInteger(coalesce(row.TotalCO2,0)),
l.hematocrit = toFloat(coalesce(row.hematocrit,0)),
l.hemoglobin = toFloat(coalesce(row.hemoglobin,0)),
l.INR = toFloat(coalesce(row.INR,0)),
l.lactate = toFloat(coalesce(row.lactate,0)),
l.platelets = toInteger(coalesce(row.platelets,0)),
l.potassium = toFloat(coalesce(row.potassium,0)),
l.ptt = toInteger(coalesce(row.ptt,0)),
l.sodium = toInteger(coalesce(row.sodium,0)),
l.wbc = toFloat(coalesce(row.wbc,0)),
l.bands = toFloat(coalesce(row.bands,0)),
l.alt = toInteger(coalesce(row.alt,0)),
l.ast = toInteger(coalesce(row.ast,0)),
l.alp = toInteger(coalesce(row.alp,0))
with l,row
match(ps:PatientUnitStay { patientunitstayid: row.patientunitstayid})
merge (ps)-[r:HAS_LAB]->(l)",
{batchSize: 5000, parallel:false}
);
LOAD CSV WITH HEADERS FROM 'file:///diagnosis_categories.gz' AS row
Merge (dc:DiagCategory { category: row.category })
with dc,row.dx as dxrow
unwind split(dxrow,'|') as dxrow2
Match (ds:DiagString { dx: dxrow2})
Merge (ds)-[r:BELONGS]->(dc);
CALL apoc.periodic.iterate(
"LOAD CSV WITH HEADERS FROM 'file:///diagnosis000000000000.csv.gz' AS row
return row",
"Merge (d:Diagnosis { diagnosisid: row.diagnosisid })
set d.activeupondischarge=toBoolean(row.activeupondischarge),
d.diagnosisoffset=toInteger(row.diagnosisoffset),
d.diagnosisstring=row.diagnosisstring,
d.icd9code=row.icd9code,
d.diagnosispriority=row.diagnosispriority
with d,row.patientunitstayid as psid
match(ps:PatientUnitStay { patientunitstayid: psid})
merge (ps)-[r:HAS_DIAG]->(d)",
{batchSize: 5000, parallel:false}
);
//Link Diagnosis to DiagString
CALL apoc.periodic.iterate("match(d:Diagnosis) return d","
unwind split(d.diagnosisstring,'|') as dsrow
match (ds:DiagString {dx:dsrow})
merge (d)-[r:RELATED_DS]->(ds)",
{batchSize: 5000, parallel:false}
);
CALL apoc.periodic.iterate("match(d:Diagnosis)-[r:RELATED_DS]->(ds:DiagString)
return r","delete r",
{batchSize: 5000, parallel:false}
);
//Link Treatment to Diagnosis
CALL apoc.periodic.iterate("match(t:Treatment) return t","
unwind split(t.treatmentstring,'|') as dsrow
match (ds:DiagString {dx:dsrow})
merge (t)-[r:Tr_RELATED_DS]->(ds)",
{batchSize: 5000, parallel:false}
);
// import microLab
LOAD CSV WITH HEADERS FROM 'file:///microLab.csv.gz' AS row
MERGE (ml:MicroLab { microlabid: row.microlabid })
set ml.culturetakenoffset = toInteger(row.culturetakenoffset),
ml.organism = row.organism
with ml,row
match(ps:PatientUnitStay { patientunitstayid: row.patientunitstayid})
merge (ps)-[r:HAS_MICROLAB]->(ml)