-
Notifications
You must be signed in to change notification settings - Fork 75
/
Copy pathsnomed_g_template_tools.py
executable file
·146 lines (137 loc) · 7.97 KB
/
snomed_g_template_tools.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
#!/usr/bin/python
from __future__ import print_function
import sys, os, re, optparse
import snomed_g_lib_rf2
'''
Module: snomed_g_template_tools.py
Concept: translate template file using configuration file information
eg: replace <<<release_date>>> by 20140731, depending on config.
Supported tags:
<<<X>>>, where X is one of the keys in the config file,
such as <<<release_date>>> since "release_date" is configured.
Syntax: cmd <template_file> <output_file>
Version 1.1 Jay Pedersen, July 2016. Version created to match rf2_tools.
Author: Jay Pedersen, University of Nebraska, Jan 8, 2015
'''
if sys.platform not in ['cygwin']: # Ubuntu/Mac
def get_path(relpath, os_pathsep):
return os.path.abspath(os.path.expanduser(relpath)).rstrip(os_pathsep)+os_pathsep
else: # cygwin
def get_path(relpath, os_pathsep): # Need t:/neo/data/sc_20140731/snomed_g_rconcept_... from '/cygdrive/t/neo/data/...
s1 = os.path.realpath(os.path.expanduser(relpath)) # was abspath, realpath expands symlink
m = re.match(r'/cygdrive/(.)/(.*)', s1)
if m: # cygwin full path
s = '%s:/%s' % (m.group(1),m.group(2))
if s[-1] != os_pathsep: s += os_pathsep
else:
print('*** unable to translate path <%s> ***', relpath); sys.exit(1)
return s
def instantiate(arglist):
# instantiate:
# PARSE COMMAND
# syntax: instantiate <template-file> <outfile>
opt = optparse.OptionParser()
opt.add_option('--rf2',action='store',dest='rf2')
opt.add_option('--release_type', action='store', dest='release_type', choices=['delta','snapshot','full'])
opt.add_option('--verbose',action='store_true',dest='verbose')
opt.add_option('--action', action='store', dest='action', default='create', choices=['create','update'])
opts, args = opt.parse_args(arglist)
if not (len(args)==2 and opts.rf2 and opts.release_type):
print('Usage: intantiate <template-file> <output-file> --rf2 <dir> --release_type {Full,Snapshot,Delta}'); sys.exit(1)
template_file, output_file = args
# Connect to RF2 files
rf2_folders = snomed_g_lib_rf2.Rf2_Folders(opts.rf2, opts.release_type)
# Information for creating the CSV files
attributes_by_file = snomed_g_lib_rf2.Rf2_Attributes_per_File()
# CONFIGURATION
config = {}
config['terminology_dir'] = rf2_folders.get_terminology_dir()
config['release_date'] = rf2_folders.get_release_date()
config['release_center'] = rf2_folders.get_release_center()
config['output_dir'] = './'
# Process template file
fout = open(output_file, 'w')
release_date = config['release_date'].strip()
release_center = config.get('release_center', 'INT')
os_pathsep = config.get('os_pathsep', '/') # JGP 2015/10/07, no default previously
output_dir = get_path(config['output_dir'], os_pathsep)
if sys.platform=='win32': output_dir = output_dir.replace('\\','/') # JGP 2016/07/30 -- issue "c:\sno\build\us20160301/defining_rel_edge_rem.csv"
terminology_dir = get_path(config['terminology_dir'], os_pathsep)
config_file_suffix = '%s_%s' % (release_center, release_date)
file_protocol = 'file:///' if sys.platform in ['cygwin','win32','darwin'] else 'file:' # ubuntu is else case
# NOTE: can result in 'file:////Users/<rest>' on Mac, replace by 'file:///Users/<rest>'
# INSTANTIATION PT1 -- PROCESS FILES IN TEMPLATE, REPLACING TEMPLATES WITH INSTANTIATED VALUES
for line in [x.rstrip('\n').rstrip('\r') for x in open(template_file)]:
line = line.replace('<<<release_date>>>', release_date) \
.replace('<<<output_dir>>>', output_dir) \
.replace('<<<terminology_dir>>>', terminology_dir) \
.replace('<<<config_file_suffix>>>', config_file_suffix) \
.replace('<<<file_protocol>>>', file_protocol) \
.replace('file:////','file:///')
print(line, file=fout)
# INSTANTIATION PT2 -- DEFINING RELATIONSHIPS PROCESSING
# Handle NEW defining relationships
# Data source (for new defining relationships):
# <output_dir>/defining_rels_new_sorted.csv file
# id,active,sctid,rolegroup,typeId,rolename,destinationId,effectiveTime,
# moduleId,characteristicTypeId,modifierId,history
# 4661958023,1,471280008,1,FINDING_SITE,589001,20140731,
# 900000000000207008,900000000000011006,900000000000451002,
# Algorithm:
# NOTE: already sorted by rolename, so all FINDING_SITE elements together, etc
# ./snomed_sort_csv.py --fields 'rolename' --string
# defining_rels_new.csv defining_rels_new_sorted.csv
# ==> create separate files for each defining-relationship type that
# is found, eg: DR_<snomedct-code>_new.csv
# ==> add CYPHER code to process the created files and add the
# defining relationships.
with open(output_dir+'used_roles.csv') as f:
for idx,line in enumerate(x.rstrip('\n').rstrip('\r') for x in f):
if idx==0: continue # typeId,rolename
typeId, rolename = line.split(',')
# create CYPHER to load the file and add the relationships to ROLE_GROUP nodes
# JGP 2017-10-31. Use a 2-step procedure for creating the defining relationships,
# to support systems with smaller amounts of memory (use smaller transactions).
# The first step creates any necessary role groups, and the second step creates
# the defining relationship edges from role groups to the specified target concepts.
print('// %s defining relationships' % rolename,file=fout)
print('''RETURN 'NEW Defining relationships of type %s';''' % rolename,file=fout)
print(file=fout)
load_csv_line = ('LOAD CSV with headers from "%s%sDR_%s_new.csv" as line' % (('file:///' if sys.platform in ['cygwin','win32','darwin'] else 'file:'),output_dir,typeId)).replace('file:////','file:///')
print(load_csv_line,file=fout)
print('CALL {',file=fout)
print(' with line ',file=fout)
print(' MERGE (rg:RoleGroup { sctid: line.sctid, rolegroup: line.rolegroup })',file=fout)
print(' } IN TRANSACTIONS OF 200 ROWS;',file=fout)
print(file=fout)
print('// Add defining relationship edge in 2nd step, Java memory issue',file=fout)
load_csv_line = ('LOAD CSV with headers from "%s%sDR_%s_new.csv" as line' % (('file:///' if sys.platform in ['cygwin','win32','darwin'] else 'file:'),output_dir,typeId)).replace('file:////','file:///')
print(load_csv_line,file=fout)
print('CALL {',file=fout)
print(' with line ',file=fout)
print(' MATCH (rg:RoleGroup { sctid: line.sctid, rolegroup: line.rolegroup })',file=fout)
print( 'WITH line,rg ',file=fout)
print(' MATCH (c:ObjectConcept { sctid: line.destinationId })',file=fout)
print(' MERGE (rg)-[:%s { id: line.id, active: line.active, sctid: line.sctid,' % rolename,file=fout)
print(' typeId: line.typeId,',file=fout)
print(' rolegroup: line.rolegroup, effectiveTime: line.effectiveTime,',file=fout)
print(' moduleId: line.moduleId, characteristicTypeId: line.characteristicTypeId,',file=fout)
print(' modifierId: line.modifierId,',file=fout)
print(' history: line.history }]->(c)',file=fout)
print(' } IN TRANSACTIONS OF 200 ROWS;',file=fout)
# close CSV, wrap up
print('// Finito',file=fout)
fout.close()
return
def parse_and_interpret(arglist):
command_interpreters = [('instantiate',instantiate)]
command_names = [x[0] for x in command_interpreters]
if len(arglist) < 1: print('Usage: python <cmd> %s ...' % '[one of %s]' % ','.join(command_names)); sys.exit(1)
# DEMAND that arglist[0] be one of the sub-commands
command_name = arglist[0]
try: command_index = command_names.index(command_name)
except: print('Usage : python <cmd> %s ...' % '[one of %s]' % ','.join(command_names)); sys.exit(1)
command_interpreters[command_index][1](arglist[1:]) # call appropriate interpreter
# MAIN
parse_and_interpret(sys.argv[1:]) # causes sub-command processing to occur as well
sys.exit(0)