Skip to content

Commit 7a9fb6b

Browse files
committed
Merge branch 'master' into dh2-i18n-rc1
2 parents 0d53b40 + 9273db6 commit 7a9fb6b

14 files changed

+636
-498
lines changed

script/dh-validate.py

+7-10
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,9 @@
5353
from linkml_runtime.dumpers.yaml_dumper import YAMLDumper
5454
import subprocess
5555

56+
BOOLEANS = {'y':1, 'yes':1, 't':1, 'true':1, 'on':1, '1':1,
57+
'n':0, 'no':0, 'f':0, 'false':0, 'off':0, '0':0}
58+
5659
def init_parser():
5760

5861
parser = argparse.ArgumentParser(
@@ -74,12 +77,6 @@ def init_parser():
7477
required=False,
7578
help="A schema class to test given data file against.");
7679

77-
parser.add_argument("-S", "--index-slot",
78-
dest="index_slot",
79-
metavar="TEXT",
80-
required=False,
81-
help="A slot within target class which acts as an identifier or unique key. This is required for CSV dumping/loading. It is optional when this slot can be identified automatically as the only identifier or unique_key in the slot.");
82-
8380
parser.add_argument('data_sources',
8481
metavar='DATA_SOURCES',
8582
#nargs='+',
@@ -142,10 +139,7 @@ def getTargetClass(SCHEMA, target_class, slot_key = None):
142139

143140
break;
144141

145-
if not found:
146-
exit("Target class [" + target_class + "] does not have an identifier or key slot!");
147-
148-
print ("index:", slot_key);
142+
print ("Identifier slot:", slot_key);
149143
return (target_class, slot_key);
150144

151145

@@ -392,6 +386,9 @@ def getLinkMLTransform(SCHEMA, template, row_data):
392386
# datatype
393387
for slot_range in ranges:
394388
match slot_range:
389+
case 'boolean':
390+
if val.lower() in BOOLEANS:
391+
output_val = bool(BOOLEANS[val.lower()]);
395392
case 'integer':
396393
if isInteger(val):
397394
output_val = int(val);

web/templates/ambr/schema.json

+18-2
Original file line numberDiff line numberDiff line change
@@ -2539,6 +2539,10 @@
25392539
"name": "sample_received_date",
25402540
"description": "The date on which the sample was received.",
25412541
"title": "sample received date",
2542+
"todos": [
2543+
">=2019-10-01",
2544+
"<={today}"
2545+
],
25422546
"comments": [
25432547
"The date should be provided in ISO 8601 standard format \"YYYY-MM-DD\"."
25442548
],
@@ -2923,7 +2927,7 @@
29232927
},
29242928
"collection_method": {
29252929
"name": "collection_method",
2926-
"description": "The process used to collect the sample e.g. phlebotamy, necropsy.",
2930+
"description": "The process used to collect the sample e.g. phlebotomy, necropsy.",
29272931
"title": "collection method",
29282932
"comments": [
29292933
"Provide a descriptor if a collection method was used for sampling. Use the picklist provided in the template. If a desired term is missing from the picklist, contact [email protected]. If not applicable, do not leave blank. Choose a null value. Information for populating this field may be available in the \"Source of Isolation\" field in the Alberta Microbiota Repository (AMBR) Master file."
@@ -3442,6 +3446,10 @@
34423446
"name": "sequencing_date",
34433447
"description": "The date the sample was sequenced.",
34443448
"title": "sequencing date",
3449+
"todos": [
3450+
">=2019-10-01",
3451+
"<={today}"
3452+
],
34453453
"comments": [
34463454
"The date should be provided in ISO 8601 standard format \"YYYY-MM-DD\"."
34473455
],
@@ -4443,6 +4451,10 @@
44434451
"name": "sample_received_date",
44444452
"description": "The date on which the sample was received.",
44454453
"title": "sample received date",
4454+
"todos": [
4455+
">=2019-10-01",
4456+
"<={today}"
4457+
],
44464458
"comments": [
44474459
"The date should be provided in ISO 8601 standard format \"YYYY-MM-DD\"."
44484460
],
@@ -4887,7 +4899,7 @@
48874899
},
48884900
"collection_method": {
48894901
"name": "collection_method",
4890-
"description": "The process used to collect the sample e.g. phlebotamy, necropsy.",
4902+
"description": "The process used to collect the sample e.g. phlebotomy, necropsy.",
48914903
"title": "collection method",
48924904
"comments": [
48934905
"Provide a descriptor if a collection method was used for sampling. Use the picklist provided in the template. If a desired term is missing from the picklist, contact [email protected]. If not applicable, do not leave blank. Choose a null value. Information for populating this field may be available in the \"Source of Isolation\" field in the Alberta Microbiota Repository (AMBR) Master file."
@@ -5490,6 +5502,10 @@
54905502
"name": "sequencing_date",
54915503
"description": "The date the sample was sequenced.",
54925504
"title": "sequencing date",
5505+
"todos": [
5506+
">=2019-10-01",
5507+
"<={today}"
5508+
],
54935509
"comments": [
54945510
"The date should be provided in ISO 8601 standard format \"YYYY-MM-DD\"."
54955511
],

web/templates/ambr/schema.yaml

+7-1
Original file line numberDiff line numberDiff line change
@@ -388,6 +388,9 @@ slots:
388388
any_of:
389389
- range: date
390390
- range: NullValueMenu
391+
todos:
392+
- '>=2019-10-01'
393+
- <={today}
391394
examples:
392395
- value: '2020-03-20'
393396
geo_loc_name_country:
@@ -608,7 +611,7 @@ slots:
608611
collection_method:
609612
name: collection_method
610613
title: collection method
611-
description: The process used to collect the sample e.g. phlebotamy, necropsy.
614+
description: The process used to collect the sample e.g. phlebotomy, necropsy.
612615
comments: Provide a descriptor if a collection method was used for sampling. Use
613616
the picklist provided in the template. If a desired term is missing from the
614617
picklist, contact [email protected]. If not applicable, do not leave blank.
@@ -941,6 +944,9 @@ slots:
941944
any_of:
942945
- range: date
943946
- range: NullValueMenu
947+
todos:
948+
- '>=2019-10-01'
949+
- <={today}
944950
examples:
945951
- value: '2020-06-22'
946952
library_id:

web/templates/ambr/schema_slots.tsv

+3-3
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ AMBR GENEPIO:0001122 Database Identifiers database_identifiers
99
Sample collection and processing GENEPIO:0001156 sample collector contact email sample_collector_contact_email WhitespaceMinimizedString ^\S+@\S+\.\S+$ The email address of the contact responsible for follow-up regarding the sample. The email address can represent a specific individual or lab e.g. [email protected], or [email protected] [email protected]
1010
Sample collection and processing GENEPIO:0001158 sample collector contact address sample_collector_contact_address WhitespaceMinimizedString The mailing address of the agency submitting the sample. The mailing address should be in the format: Street number and name, City, Province/Territory, Postal Code, Country 655 Lab St, Vancouver, British Columbia, V5N 2A2, Canada
1111
Sample collection and processing GENEPIO:0001174 sample collection date sample_collection_date date NullValueMenu TRUE 2019-10-01 {today} The date on which the sample was collected. The date should be provided in ISO 8601 standard format "YYYY-MM-DD". 2020-03-16
12-
Sample collection and processing GENEPIO:0001179 sample received date sample_received_date date NullValueMenu The date on which the sample was received. The date should be provided in ISO 8601 standard format "YYYY-MM-DD". 2020-03-20
12+
Sample collection and processing GENEPIO:0001179 sample received date sample_received_date date NullValueMenu 2019-10-01 {today} The date on which the sample was received. The date should be provided in ISO 8601 standard format "YYYY-MM-DD". 2020-03-20
1313
Sample collection and processing GENEPIO:0001181 geo_loc_name (country) geo_loc_name_country GeoLocNameCountryMenu NullValueMenu TRUE The country where the sample was collected. Provide the country name from the controlled vocabulary provided. Canada
1414
Sample collection and processing GENEPIO:0001185 geo_loc_name (state/province/territory) geo_loc_name_state_province_territory WhitespaceMinimizedString NullValueMenu TRUE The province/territory where the sample was collected. Provide the province/territory name from the controlled vocabulary provided. Saskatchewan
1515
Sample collection and processing GENEPIO:0001189 geo_loc_name (city) geo_loc_name_city WhitespaceMinimizedString The city where the sample was collected. Provide the city name. Use this look-up service to identify the standardized term: https://www.ebi.ac.uk/ols/ontologies/gaz Medicine Hat
@@ -24,7 +24,7 @@ AMBR GENEPIO:0001122 Database Identifiers database_identifiers
2424
Sample collection and processing GENEPIO:0001223 environmental material environmental_material EnvironmentalMaterialMenu NullValueMenu TRUE TRUE A substance obtained from the natural or man-made environment e.g. soil, water, sewage. Provide a descriptor if an environmental material was sampled. Use the picklist provided in the template. If a desired term is missing from the picklist, contact [email protected]. If not applicable, do not leave blank. Choose a null value. Information for populating this field may be available in the "Source of Isolation" field in the Alberta Microbiota Repository (AMBR) Master file. Bandage
2525
Sample collection and processing GENEPIO:0001232 environmental site environmental_site EnvironmentalSiteMenu NullValueMenu TRUE TRUE An environmental location may describe a site in the natural or built environment e.g. contact surface, metal can, hospital, wet market, bat cave. Provide a descriptor if an environmental site was sampled. Use the picklist provided in the template. If a desired term is missing from the picklist, contact [email protected]. If not applicable, do not leave blank. Choose a null value. Information for populating this field may be available in the "Source of Isolation" field in the Alberta Microbiota Repository (AMBR) Master file. Hospital
2626
Sample collection and processing GENEPIO:0001234 collection device collection_device CollectionDeviceMenu NullValueMenu TRUE TRUE The instrument or container used to collect the sample e.g. swab. Provide a descriptor if a device was used for sampling. Use the picklist provided in the template. If a desired term is missing from the picklist, contact [email protected]. If not applicable, do not leave blank. Choose a null value. Information for populating this field may be available in the "Source of Isolation" field in the Alberta Microbiota Repository (AMBR) Master file. Swab
27-
Sample collection and processing GENEPIO:0001241 collection method collection_method CollectionMethodMenu NullValueMenu TRUE TRUE The process used to collect the sample e.g. phlebotamy, necropsy. Provide a descriptor if a collection method was used for sampling. Use the picklist provided in the template. If a desired term is missing from the picklist, contact [email protected]. If not applicable, do not leave blank. Choose a null value. Information for populating this field may be available in the "Source of Isolation" field in the Alberta Microbiota Repository (AMBR) Master file. Biopsy
27+
Sample collection and processing GENEPIO:0001241 collection method collection_method CollectionMethodMenu NullValueMenu TRUE TRUE The process used to collect the sample e.g. phlebotomy, necropsy. Provide a descriptor if a collection method was used for sampling. Use the picklist provided in the template. If a desired term is missing from the picklist, contact [email protected]. If not applicable, do not leave blank. Choose a null value. Information for populating this field may be available in the "Source of Isolation" field in the Alberta Microbiota Repository (AMBR) Master file. Biopsy
2828
Sample collection and processing GENEPIO:0001243 collection protocol collection_protocol WhitespaceMinimizedString The name and version of a particular protocol used for sampling. Free text. Information for populating this field may be available in the "Source of Isolation" field in the Alberta Microbiota Repository (AMBR) Master file. Collection_protocol_Children's Hospital biofilm study (A3-701-01)
2929
Sample collection and processing GENEPIO:0001253 specimen processing specimen_processing SpecimenProcessingMenu NullValueMenu TRUE TRUE Any processing applied to the sample during or after receiving the sample. If multiple PCR products were generated from the isolate using different primer sets, indicate that the sequence records represents the same isolate by selecting "Biological replicate" in the "specimen processing" field. Every different sequence experiment should have its own record (i.e. if different amplicons have the same sequence but were generated using different primer sets, these should be stored as separate entries/lines in the spreadsheet). Information about replicates may be available in the "Top-hit taxon (taxa)" or "Trimmed Ribosomal Sequence" fields if there are multiple values for the same "Strain ID#" in the Alberta Microbiota Repository (AMBR) Master file. Biological replicate
3030
Sample collection and processing GENEPIO:0100311 specimen processing details specimen_processing_details WhitespaceMinimizedString Detailed information regarding the processing applied to a sample during or after receiving the sample. Provide a free text description of any processing details applied to a sample. Information about replicates may be available in the "Top-hit taxon (taxa)" or "Trimmed Ribosomal Sequence" fields if there are multiple values for the same "Strain ID#" in the Alberta Microbiota Repository (AMBR) Master file. Multiple amplicons generated for isolate SA32 using different primer sets
@@ -48,7 +48,7 @@ AMBR GENEPIO:0001122 Database Identifiers database_identifiers
4848
Sequencing GENEPIO:0100422 sequenced by contact email sequenced_by_contact_email WhitespaceMinimizedString The email address of the contact responsible for follow-up regarding the sequence. Provide the email associated with the listed contact. As personnel turnover may render an individual's email obsolete, it is more prefereable to provide an address for a position or lab, to ensure accuracy of information and institutional memory. If the information is unknown or cannot be provided, leave blank or provide a null value. [email protected]
4949
Sequencing GENEPIO:0001445 purpose of sequencing purpose_of_sequencing PurposeOfSequencingMenu NullValueMenu The reason that the sample was sequenced. The reason why a sample was originally collected may differ from the reason why it was selected for sequencing. The reason a sample was sequenced may provide information about potential biases in sequencing strategy. Provide the purpose of sequencing from the picklist in the template. The reason for sample collection should be indicated in the "purpose of sampling" field. Research
5050
Sequencing GENEPIO:0001446 purpose of sequencing details purpose_of_sequencing_details WhitespaceMinimizedString NullValueMenu The description of why the sample was sequenced providing specific details. Provide an expanded description of why the sample was sequenced using free text. This information can provide details about why the sample source might contain antibiotic potentiators. Screening for antibiotic potentiators in Cystic fibrosis disease contexts.
51-
Sequencing GENEPIO:0001447 sequencing date sequencing_date date NullValueMenu The date the sample was sequenced. The date should be provided in ISO 8601 standard format "YYYY-MM-DD". 2020-06-22
51+
Sequencing GENEPIO:0001447 sequencing date sequencing_date date NullValueMenu 2019-10-01 {today} The date the sample was sequenced. The date should be provided in ISO 8601 standard format "YYYY-MM-DD". 2020-06-22
5252
Sequencing GENEPIO:0001448 library ID library_id WhitespaceMinimizedString The user-specified identifier for the library prepared for sequencing. Provide the name of the run. This value maps to information in the "Sequencing Batch #" field Alberta Microbiota Repository (AMBR) Master file. 1876515_SA01_Plate 02 library ID
5353
Sequencing GENEPIO:0001452 sequencing instrument sequencing_instrument SequencingInstrumentMenu NullValueMenu TRUE TRUE The model of the sequencing instrument used. Select a sequencing instrument from the picklist provided in the template. Oxford Nanopore MinION
5454
Sequencing GENEPIO:0001453 sequencing protocol name sequencing_protocol_name WhitespaceMinimizedString TRUE The name and version number of the sequencing protocol used. Provide the name and version of the sequencing protocol e.g. 1D_DNA_MinION https://www.protocols.io/view/covid-19-artic-v3-illumina-library-construction-an-bibtkann

web/templates/canada_covid19/locales/fr/schema.json

+8
Original file line numberDiff line numberDiff line change
@@ -2998,6 +2998,10 @@
29982998
"text": "Ontario Institute for Cancer Research (OICR)",
29992999
"title": "Institut ontarien de recherche sur le cancer (IORC)"
30003000
},
3001+
"Ontario COVID-19 Genomic Network": {
3002+
"text": "Ontario COVID-19 Genomic Network",
3003+
"title": "Réseau génomique ontarien COVID-19"
3004+
},
30013005
"Prince Edward Island - Health PEI": {
30023006
"text": "Prince Edward Island - Health PEI",
30033007
"title": "Île-du-Prince-Édouard – Santé Î.-P.-É."
@@ -3033,6 +3037,10 @@
30333037
"text": "Eastern Ontario Regional Laboratory Association",
30343038
"title": "Association des laboratoires régionaux de l’Est de l’Ontario"
30353039
},
3040+
"Kingston Health Sciences Centre": {
3041+
"text": "Kingston Health Sciences Centre",
3042+
"title": "Centre des sciences de la santé de Kingston"
3043+
},
30363044
"Lake of the Woods District Hospital - Ontario": {
30373045
"text": "Lake of the Woods District Hospital - Ontario",
30383046
"title": "Lake of the Woods District Hospital – Ontario"

web/templates/canada_covid19/locales/fr/schema.yaml

+4
Original file line numberDiff line numberDiff line change
@@ -3717,6 +3717,8 @@ enums:
37173717
title: "Autorit\xE9 sanitaire de la Nouvelle-\xC9cosse"
37183718
Ontario Institute for Cancer Research (OICR):
37193719
title: Institut ontarien de recherche sur le cancer (IORC)
3720+
Ontario COVID-19 Genomic Network:
3721+
title: "R\xE9seau g\xE9nomique ontarien COVID-19"
37203722
Prince Edward Island - Health PEI:
37213723
title: "\xCEle-du-Prince-\xC9douard\_\u2013 Sant\xE9 \xCE.-P.-\xC9."
37223724
Public Health Ontario (PHO):
@@ -3737,6 +3739,8 @@ enums:
37373739
Eastern Ontario Regional Laboratory Association:
37383740
title: "Association des laboratoires r\xE9gionaux de l\u2019Est de l\u2019\
37393741
Ontario"
3742+
Kingston Health Sciences Centre:
3743+
title: "Centre des sciences de la sant\xE9 de Kingston"
37403744
Lake of the Woods District Hospital - Ontario:
37413745
title: "Lake of the Woods District Hospital\_\u2013 Ontario"
37423746
Manitoba Cadham Provincial Laboratory:

0 commit comments

Comments
 (0)