Skip to content

Commit c5a360e

Browse files
authored
Merge pull request #158 from Public-Health-Bioinformatics/cancogen_NML_LIMS_v0.2
WIP Cancogen nml lims v0.2 export
2 parents 9534809 + 8e28c1c commit c5a360e

File tree

10 files changed

+3421
-2167
lines changed

10 files changed

+3421
-2167
lines changed

script/export_utils.js

+131-29
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@ const getFieldNameMap = (fields) => {
2222
* This code works on exportHeaders as either a Map or an array of
2323
* [['field_name',[fields],...]
2424
* @param {Array} exportHeaders See `export.js`.
25-
* @param {Array<Object>} array of all source fields.
26-
* @param {String} export column prefix
25+
* @param {Array<Object>} fields array of all source fields.
26+
* @param {String} prefix export column prefix
2727
*/
2828
const getHeaderMap = (exportHeaders, fields, prefix) => {
2929
var headerMap = {};
@@ -44,51 +44,153 @@ const getHeaderMap = (exportHeaders, fields, prefix) => {
4444
}
4545
}
4646

47+
let field_message = [];
48+
let field_export_message = [];
49+
4750
for (const [fieldIndex, field] of fields.entries()) {
4851
if (field.exportField && prefix in field.exportField) {
4952
for (const target of field.exportField[prefix]) {
5053
if ('field' in target) {
51-
if (target.field in headerMap) {
52-
var sources;
53-
if (exportHeaders instanceof Map) {
54-
sources = exportHeaders.get(target.field);
55-
// If given field isn't already mapped, add it.
56-
if (sources.indexOf(field.fieldName) == -1) {
57-
sources.push(field.fieldName);
58-
};
59-
exportHeaders.set(target.field, sources);
54+
var sources;
55+
if (exportHeaders instanceof Map) {
56+
if (target.field in headerMap) {
57+
field_export_message.push(target.field);
58+
}
59+
else {
60+
if (!exportHeaders.has(target.field)) {
61+
field_message.push(target.field);
62+
// Issue: all template driven exportHeader fields are showing AFTER export.js mentioned ones.
63+
headerMap[target.field] = exportHeaders.length;
64+
exportHeaders.set(target.field, []);
65+
}
6066
}
61-
else { // Save to array
62-
sources = exportHeaders[headerMap[target.field]][1];
63-
// As above
64-
if (sources.indexOf(field.fieldName) == -1) {
65-
sources.push(field.fieldName);
66-
};
67-
exportHeaders[headerMap[target.field]][1] = sources;
67+
let sources = exportHeaders.get(target.field);
68+
if (!sources)
69+
console.log('Malformed export.js exportHeader field:', target.field)
70+
// If given field isn't already mapped, add it.
71+
if (sources.indexOf(field.fieldName) == -1) {
72+
sources.push(field.fieldName);
6873
};
74+
exportHeaders.set(target.field, sources);
6975
}
70-
else {
71-
const msg = 'The EXPORT_' + prefix + ' column for ' + field.fieldName +' requests a map to a non-existen export template field: ' + target.field;
72-
console.log (msg);
76+
else { // Save to array
77+
if (target.field in headerMap) {
78+
field_export_message.push(target.field);
79+
}
80+
else {
81+
// Add field to exportHeaders
82+
// Issue: can this handle many-to-one mapping?
83+
field_message.push(target.field);
84+
headerMap[target.field] = exportHeaders.length;
85+
exportHeaders.push([target.field, []]);
86+
}
87+
sources = exportHeaders[headerMap[target.field]][1];
88+
// As above
89+
if (sources.indexOf(field.fieldName) == -1) {
90+
sources.push(field.fieldName);
91+
};
92+
exportHeaders[headerMap[target.field]][1] = sources;
7393
};
94+
7495
};
7596
};
7697
};
7798
};
99+
// This will output a list of fields added to exportHeaders by way of template specification which haven't been included in export.js
100+
if (field_message)
101+
console.log('Export fields added by template:', field_message)
102+
if (field_export_message)
103+
console.log('Export fields stated in export.js):', field_export_message)
78104
};
79105

80-
const getMappedField = (sourceRow, sourceFieldNames, fieldNameMap, delimiter) => {
81-
// This provides an export field composed of one or more more input
82-
// fields, separated by a ';' delimiter if not null.
106+
/**
107+
* This provides an export field composed of one or more more input
108+
* fields, separated by a ';' delimiter if not null.
109+
* nullOptionsDict allows conversion of "Missing" etc. metadata options to
110+
* target export system's version of these.
111+
* @param {Object} sourceRow
112+
* @param {Array<Object>} sourceFieldNames array of all source fields.
113+
* @param {Object} fieldNameMap
114+
* @param {String} delimiter to separate multi-source field values with
115+
* @param {String} prefix of export format
116+
* @param {Map} nullOptionsMap conversion of Missing etc. to export db equivalent.
117+
* @returm {String} Concatenated string of values.
118+
*/
119+
const getMappedField = (sourceRow, sourceFieldNames, sourceFields, fieldNameMap, delimiter, prefix, nullOptionsMap = null) => {
120+
83121
const mappedCell = [];
84122
for (const fieldName of sourceFieldNames) {
85-
const mappedCellVal = sourceRow[fieldNameMap[fieldName]];
123+
let mappedCellVal = sourceRow[fieldNameMap[fieldName]];
86124
if (!mappedCellVal) continue;
87-
mappedCell.push(mappedCellVal);
125+
mappedCellVal = mappedCellVal.trim();
126+
if (mappedCellVal.length === 0) continue;
127+
if (nullOptionsMap && nullOptionsMap.has(mappedCellVal)){
128+
mappedCellVal = nullOptionsMap.get(mappedCellVal);
129+
};
130+
let field = sourceFields[fieldNameMap[fieldName]];
131+
if (field.datatype === 'select') {
132+
mappedCell.push( getTransformedField(mappedCellVal, field, prefix));
133+
}
134+
else if (field.datatype === 'multiple') {
135+
// ISSUE: relying on semicolon delimiter in input
136+
for (let cellVal of mappedCellVal.split(';')) {
137+
mappedCell.push( getTransformedField(cellVal.trim(), field, prefix));
138+
}
139+
}
140+
else {
141+
mappedCell.push(mappedCellVal)
142+
}
88143
};
89144
return mappedCell.join(delimiter);
90145
}
91146

147+
/**
148+
* Some vocabulary fields get mapped over to export format values.
149+
*
150+
* @param {String} value to be exported.
151+
* @param {Array<String>} fields list of source fields to examine for mappings.
152+
* @param {String} prefix of export format to examine.
153+
*/
154+
const getTransformedField = (value, field, prefix) => {
155+
156+
if (field['schema:ItemList']) {
157+
const term = findById(field['schema:ItemList'], value);
158+
159+
// Looking for term.exportField['GRDI'] for example:
160+
if (term && 'exportField' in term && prefix in term.exportField) {
161+
// Here mapping involves a value substitution
162+
// Note possible [target field]:[value] twist
163+
for (let mapping of term.exportField[prefix]) {
164+
return mapping.value;
165+
};
166+
};
167+
168+
};
169+
return value;
170+
};
171+
172+
/* Find key in nested object (nested dictionaries)
173+
* Adapted from: https://codereview.stackexchange.com/questions/73714/find-a-nested-property-in-an-object
174+
* @param {Dictionary<Dictionary>} o nested Dictionaries
175+
* @param {String}Key to find in dictionaries
176+
* @return {Dictionary} or null
177+
*/
178+
function findById(o, key) {
179+
if (key in o)
180+
return o[key];
181+
var result, p;
182+
for (p in o) {
183+
if( o.hasOwnProperty(p) && typeof o[p] === 'object' ) {
184+
result = findById(o[p], key);
185+
if(result){
186+
return result;
187+
}
188+
}
189+
}
190+
return result;
191+
}
192+
193+
92194
/**
93195
* Get a dictionary of empty arrays for each ExportHeader field
94196
* FUTURE: enable it to work with hierarchic vocabulary lists
@@ -112,9 +214,9 @@ const getRowMap = (sourceRow, sourceFields, RuleDB, fields, fieldNameMap, prefix
112214
// has a mapping for export to a GRDI target field above, then set target
113215
// to value.
114216
if (value && value.length > 0) {
115-
const vocabulary = fields[sourceIndex].vocabulary;
116-
if (value in vocabulary) {
117-
const term = vocabulary[value];
217+
const vocab_list = fields[sourceIndex]['schema:ItemList'];
218+
if (value in vocab_list) {
219+
const term = vocab_list[value];
118220
// Looking for term.exportField['GRDI'] for example:
119221
if ('exportField' in term && prefix in term.exportField) {
120222
for (let mapping of term.exportField[prefix]) {

script/main.js

+14-9
Original file line numberDiff line numberDiff line change
@@ -76,15 +76,17 @@ const processData = (data) => {
7676
const flatVocabularies = {};
7777
const fields = getFields(data);
7878
for (const field of fields) {
79-
if (field.vocabulary) {
79+
if ('schema:ItemList' in field) {
8080
flatVocabularies[field.fieldName] =
81-
stringifyNestedVocabulary(field.vocabulary);
81+
stringifyNestedVocabulary(field['schema:ItemList']);
8282
}
8383
}
8484

85+
// parent is each data section
8586
for (const parent of data) {
87+
// parent.children is list of fields
8688
for (const child of parent.children) {
87-
if (child.vocabulary) {
89+
if ('schema:ItemList' in child) {
8890
child.flatVocabulary = flatVocabularies[child.fieldName];
8991

9092
if (child.source) {
@@ -257,7 +259,9 @@ const getColumns = (data) => {
257259
let ret = [];
258260
for (const field of getFields(data)) {
259261
const col = {};
260-
if (field.requirement) col.requirement = field.requirement;
262+
if (field.requirement) {
263+
col.requirement = field.requirement;
264+
}
261265
switch (field.datatype) {
262266
case 'xs:date':
263267
col.type = 'date';
@@ -303,13 +307,14 @@ const getColumns = (data) => {
303307
* processing.
304308
* @return {Array<String>} Flattened vocabulary.
305309
*/
306-
const stringifyNestedVocabulary = (vocabulary, level=0) => {
310+
const stringifyNestedVocabulary = (vocab_list, level=0) => {
307311

308312
let ret = [];
309-
for (const val of Object.keys(vocabulary)) {
310-
if (val != 'exportField') { // Ignore field map values used for export.
311-
ret.push(' '.repeat(level) + val);
312-
ret = ret.concat(stringifyNestedVocabulary(vocabulary[val], level+1));
313+
for (const val of Object.keys(vocab_list)) {
314+
//if (val != 'exportField') { // Ignore field map values used for export.
315+
ret.push(' '.repeat(level) + val);
316+
if ('schema:ItemList' in vocab_list[val]) {
317+
ret = ret.concat(stringifyNestedVocabulary(vocab_list[val]['schema:ItemList'], level+1));
313318
}
314319
}
315320
return ret;

script/make_data.py

+60-16
Original file line numberDiff line numberDiff line change
@@ -22,28 +22,68 @@
2222
reference_html = ''; # Content of a report that details section fields
2323
search_root = '/';
2424

25-
# Consolidates all EXPORT_XYZ terms into one data structure
26-
# exportField: {PREFIX:[[field name],[value rename],...]}
27-
def export_fields (EXPORT_FORMAT, field, row):
25+
# For a column in input spreadsheet named EXPORT_[EXPORT_FORMAT], add to
26+
# dictionary structure (field) a field.exportField datastructure containing
27+
# transforms to each EXPORT_FORMAT value, or column and value combination.
28+
# e.g.
29+
# "Confusion": {
30+
# "exportField": {
31+
# "NML_LIMS": [
32+
# {
33+
# "field": "HC_SYMPTOMS",
34+
# "value": "CONFUSION"
35+
# }
36+
# ],
37+
# },
38+
# ... other child terms
39+
#
40+
# exportField: {[PREFIX]:[{"field":[value],"value":[value transform],...]}
41+
# input spreadsheet EXPORT_[EXPORT_FORMAT] is coded as:
42+
# [column1]:[value];[column2]:[value]; // multiple column targets
43+
# or [value];[value]; // default column target
44+
#
45+
# @param Array<String> EXPORT_FORMAT list of export formats to search for
46+
# @param Dict field Dictionary of vocabulary field details
47+
# @param Dict row containing all field data
48+
# @return Dict field modified
49+
50+
def export_fields (EXPORT_FORMAT, field, row, as_field = False):
2851
if len(EXPORT_FORMAT) > 0:
2952
formats = {};
3053
for export_field in EXPORT_FORMAT:
3154
prefix = export_field[7:]; # Get rid of "EXPORT_" part.
3255
if row[export_field] == None:
3356
print ('Error: ', export_field, 'not found in row with label [',row['label'], ']. Malformed text in row?');
3457
continue;
58+
59+
# An export field may have one or more [field name]:[field value] transforms, separated by ";"
3560
for item in row[export_field].split(";"):
36-
# an export field may have one or more [field name]:[new field value] mapping.
3761
item = item.strip();
38-
if len(item.strip()) > 0:
39-
binding = item.strip().split(":",1);
40-
conversion = {}
41-
if binding[0].strip() > '':
42-
conversion['field'] = binding[0].strip();
43-
if len (binding) > 1 and binding[1].strip() > '':
44-
conversion['value'] = binding[1].strip();
62+
if len(item) > 0:
63+
conversion = {};
64+
# We have a transform of some kind
4565
if not prefix in formats:
4666
formats[prefix] = [];
67+
68+
# A colon indicates a different target field is in play
69+
if ":" in item:
70+
binding = item.split(":",1);
71+
binding[0] = binding[0].strip();
72+
binding[1] = binding[1].strip();
73+
if binding[0] > '':
74+
conversion['field'] = binding[0];
75+
if binding[1] > '':
76+
conversion['value'] = binding[1];
77+
else:
78+
# A single ":" value enables clearing out of a value.
79+
conversion['value'] = '';
80+
81+
# No colon
82+
elif as_field == True:
83+
conversion['field'] = item;
84+
else:
85+
conversion['value'] = item;
86+
4787
formats[prefix].append(conversion);
4888

4989
if formats: # Only if some keys have been added.
@@ -109,7 +149,7 @@ def export_fields (EXPORT_FORMAT, field, row):
109149
'examples': row['examples']
110150
}
111151

112-
export_fields (EXPORT_FORMAT, field, row);
152+
export_fields (EXPORT_FORMAT, field, row, True);
113153

114154
reference_html += '''
115155
<tr>
@@ -126,7 +166,7 @@ def export_fields (EXPORT_FORMAT, field, row):
126166
choice = collections.OrderedDict();
127167
# Top level case-sensitive field index, curators must be exact
128168
CHOICE_INDEX[label] = choice;
129-
field['vocabulary'] = choice;
169+
field['schema:ItemList'] = choice;
130170

131171
section['children'].append(field)
132172
FIELD_INDEX[label.lower()] = field;
@@ -144,12 +184,12 @@ def export_fields (EXPORT_FORMAT, field, row):
144184
search_root = parent_label;
145185
print ('vocabulary field:', parent_label);
146186

147-
if not 'vocabulary' in FIELD_INDEX[parent_label_lc]:
187+
if not 'schema:ItemList' in FIELD_INDEX[parent_label_lc]:
148188
print ("error: field ",parent_label, "not marked as select or multiple but it has child term", label);
149189
else:
150190
# Basically top-level entries in field_map:
151191
choice = collections.OrderedDict();
152-
FIELD_INDEX[parent_label_lc]['vocabulary'][label] = choice;
192+
FIELD_INDEX[parent_label_lc]['schema:ItemList'][label] = choice;
153193

154194
# Parent_label is top level field name:
155195
CHOICE_INDEX[parent_label][label] = choice;
@@ -163,7 +203,11 @@ def export_fields (EXPORT_FORMAT, field, row):
163203
# in parent label switches that to a wildcard.
164204
try:
165205
result = dpath.util.get(CHOICE_INDEX, '/' + search_root +'/**/' + parent_label.replace('/','?'), separator='/');
166-
result[label] = collections.OrderedDict(); # new child {}
206+
choice = collections.OrderedDict(); # new child {}
207+
if not 'schema:ItemList' in result:
208+
result['schema:ItemList'] = {};
209+
result['schema:ItemList'][label] = choice;
210+
export_fields(EXPORT_FORMAT, choice, row);
167211
except:
168212
print ("Error: parent class ", parent_label, "doesn't exist as section or field for term. Make sure parent term is trimmed of whitespace.", label);
169213
pass

0 commit comments

Comments
 (0)