39
39
# integer or decimal number, may begin with + or - /^[-+]?\d*\.?\d+$
40
40
# integer /^-?[0-9]+$
41
41
#
42
- # Textual:
43
- # Capital or lower case letters only, at least 1 character, and 50 characters max ^[A-Za-z]{1,50}$
44
- # Capital or lower case letters only, 50 characters max ^[A-Za-z]{0,50}$
45
- # Short text, 50 characters max ^.{0,50}$
46
- # Short text, 250 characters max ^.{0,250}$
47
- # long text, 800 characters max ^.{0,800}$
48
- # long text, 4000 characters max ^.{0,4000}$
49
- # Canadian postal codes (A1A 1A1) ^[A-Z][0-9][A-Z]\s[0-9][A-Z][0-9]$
50
- # Zip code ^\d{5,6}(?:[-\s]\d{4})?$
51
- # Email address [a-zA-Z0-9_\.\+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-\.]+
52
- # URL https?\:\/\/[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,}
53
- # Phone number \+?\(?\d{2,4}\)?[\d\s-]{3,}
54
- # Latitude in formats S30°15'45.678" or N12°30.999" ^[NS]-?(?:[0-8]?\d|90)°(?:\d+(?:\.\d+)?)(?:'(\d+(?:\.\d+)?)")?$
55
- # Longitude in formats E30°15'45.678" or W90°00.000" ^[WE]-?(?:[0-8]?\d|90)°(?:\d+(?:\.\d+)?)(?:'(\d+(?:\.\d+)?)")?$
56
- #
42
+ # See also: https://github.com/agrifooddatacanada/OCA_package_standard
57
43
58
44
59
45
import json
270
256
"range_2" ,
271
257
"identifier" ,
272
258
"multivalued" ,
259
+ "minimum_cardinality" ,
260
+ "maximum_cardinality" ,
273
261
"required" ,
274
262
"recommended" ,
275
263
"minimum_value" ,
278
266
"structured_pattern" ,
279
267
"description" ,
280
268
"comments" ,
281
- "examples"
269
+ "examples" ,
270
+ "annotations"
282
271
];
283
272
284
273
SCHEMA_ENUMS = [
@@ -385,9 +374,12 @@ def writeSchemaCore():
385
374
'name' : SCHEMA ["name" ],
386
375
'title' : SCHEMA ["title" ] or SCHEMA ["name" ],
387
376
'description' : SCHEMA ["description" ],
388
- # 'is_a': 'dh_interface'
389
377
}
390
378
379
+ # Associate classification keywords with this class (Rather than LinkML schema as a whole)
380
+ if len (oca_classification ):
381
+ SCHEMA ["classes" ][SCHEMA ["name" ]]["keywords" ] = oca_classification ;
382
+
391
383
# Set up Container class to hold given schema class's data
392
384
SCHEMA ["classes" ]['Container' ]['attributes' ] = {
393
385
'name' : SCHEMA ["name" ] + 'Data' ,
@@ -409,7 +401,7 @@ def writeSlots():
409
401
# Ensure SCHEMA_SLOTS has language variation
410
402
addLocaleHeaders (SCHEMA_SLOTS , ["slot_group" ,"title" ,"description" ,"comments" ,"examples" ]);
411
403
412
- # start slots as an ordered dictionary {slot name:,...} of DH slot attributes.
404
+ # Start slots as an ordered dictionary {slot name:,...} of DH slot attributes.
413
405
slots = OrderedDict ([i , OrderedDict ([i ,"" ] for i in SCHEMA_SLOTS ) ] for i in oca_attributes )
414
406
415
407
for slot_name in oca_attributes :
@@ -418,45 +410,110 @@ def writeSlots():
418
410
slot ['class_name' ] = SCHEMA ["name" ];
419
411
slot ['name' ] = slot_name ;
420
412
slot ['title' ] = oca_labels [slot_name ];
421
- slot ['range' ] = oca_attributes [slot_name ]; # ISSUE: Numeric
413
+ slot ['range' ] = oca_attributes [slot_name ]; # Yeilds Type
422
414
slot ['pattern' ] = oca_formats [slot_name ];
423
415
slot ['description' ] = oca_informations [slot_name ];
424
416
417
+ # Minnum and maximum number of values in array of a multivalued field.
418
+ # See https://oca.colossi.network/specification/#cardinality-overlay
419
+ if slot_name in oca_cardinality : # Format: n, n-, n-m, -m
420
+ card = oca_cardinality [slot_name ];
421
+ if '-' in card :
422
+ if '-' == card [0 ]:
423
+ slot ['maximum_cardinality' ] = int (card [1 :]);
424
+ if (slot ['maximum_cardinality' ] > 1 ):
425
+ slot ['multivalued' ] = True ;
426
+ elif '-' == card [- 1 ]:
427
+ slot ['minimum_cardinality' ] = int (card [0 :- 1 ]);
428
+ slot ['multivalued' ] = True ;
429
+ else :
430
+ (min , max ) = card .split ('-' );
431
+ slot ['minimum_cardinality' ] = int (min );
432
+ slot ['maximum_cardinality' ] = int (max );
433
+ if (int (max ) < int (min )):
434
+ warnings .append ("Field " + slot_name + " has maximum_cardinality less than the minimum_cardinality." )
435
+ if int (max ) > 1 :
436
+ slot ['multivalued' ] = True ;
437
+ else : # A single value so both min and max
438
+ slot ['minimum_cardinality' ] = slot ['maximum_cardinality' ] = int (card );
439
+ if int (card ) > 1 :
440
+ slot ['multivalued' ] = True ;
441
+
442
+ # If slot range is "Array[some datatype]",
443
+ if slot ['range' ][0 :5 ] == "Array" :
444
+ slot ['multivalued' ] = True ;
445
+ slot ['range' ] = re .search ('\[(.+)\]' , slot ['range' ]).group (1 );
446
+
425
447
# Range 2 gets any picklist for now.
426
448
if slot_name in oca_entry_codes :
427
449
slots [slot_name ]['range_2' ] = slot_name ;
428
450
451
+ if slot_name in oca_conformance :
452
+ match oca_conformance [slot_name ]:
453
+ case "M" : # Mandatory
454
+ slot ['required' ] = True ;
455
+ case "O" : # Optional -> Recommended?!
456
+ slot ['recommended' ] = True ;
457
+
458
+ # Flag that this field may have confidentiality compromising content.
459
+ # Field confidentiality https://kantarainitiative.org/download/blinding-identity-taxonomy-pdf/
460
+ # https://lf-toip.atlassian.net/wiki/spaces/HOME/pages/22974595/Blinding+Identity+Taxonomy
461
+ # Currently the only use of slot.attributes:
462
+ if slot_name in oca_identifying_factors :
463
+ slot ['annotations' ] = 'identifying_factor:True' ;
464
+
429
465
# Conversion of range field from OCA to LinkML data types.
430
466
# See https://github.com/ClimateSmartAgCollab/JSON-Form-Generator/blob/main/src/JsonFormGenerator.js
467
+ # See also: https://oca.colossi.network/specification/#attribute-type
468
+ # There's also a list of file types: https://github.com/agrifooddatacanada/format_options/blob/main/format/binary.md
469
+ # Data types: Text | Numeric | Reference (crypto hash) | Boolean | Binary | DateTime | Array[data type]
431
470
match slot ['range' ]: # case sensitive?
471
+
432
472
case "Text" :
473
+ # https://github.com/agrifooddatacanada/format_options/blob/main/format/text.md
433
474
slot ['range' ] = "WhitespaceMinimizedString" # or "string"
475
+
434
476
case "Numeric" :
477
+ # https://github.com/agrifooddatacanada/format_options/blob/main/format/numeric.md
435
478
# ISSUE: if field is marked as an integer or decimal, then even
436
479
# if regular expression validates, a test against integer or
437
480
# decimal format will INVALIDATE this slot.
438
- # Sniff whether it is integer or decimal. FUTURE: allow negatives?
481
+ # Sniff whether it is integer or decimal.
439
482
if re .search ("^-?\[0-9\]\{\d+\}$" , slot ['pattern' ]):
440
483
slot ['range' ] = "integer" ;
441
484
else :
442
485
slot ['range' ] = "decimal" ;
443
- case "DateTime" :
444
- case "Boolean" :
486
+
487
+ case "DateTime" :
488
+ # There are many datatypes that might be matched via the OCA regex expression used to define them.
489
+ pass
490
+ case "Boolean" :
491
+ pass
492
+
493
+ # Now convert any slot datatypes where pattern matches OCA-specific data type
494
+ for type_name in SCHEMA ["types" ]:
495
+ if "pattern" in SCHEMA ["types" ][type_name ]:
496
+ if SCHEMA ["types" ][type_name ]["pattern" ] == slot ['pattern' ]:
497
+ #print("PATTERN", type_name, )
498
+ slot ['range' ] = type_name ;
499
+ slot ['pattern' ] = '' ; # Redundant
445
500
446
501
447
- case ""
448
502
# Need access to original oca language parameter, e.g. "eng"
449
503
if len (locale_mapping ) > 1 :
450
504
for locale in list (locale_mapping )[1 :]:
451
505
oca_locale = locale_mapping [locale ];
452
506
slot ['slot_group_' + locale ] = "Generic" ;
453
507
slot ['title_' + locale ] = getLookup ("label" , oca_locale , slot_name )
454
508
slot ['description_' + locale ] = getLookup ("information" , oca_locale , slot_name )
455
- #slot['comments_'+locale]
456
- #slot['examples_'+locale]
509
+ #slot['comments_'+locale] # No OCA equivalent
510
+ #slot['examples_'+locale] # No OCA equivalent
457
511
512
+
513
+
458
514
save_tsv ("schema_slots.tsv" , SCHEMA_SLOTS , slots );
459
515
516
+
460
517
def writeEnums ():
461
518
addLocaleHeaders (SCHEMA_ENUMS , ["title" , "menu_1" ]);
462
519
enums = [];
@@ -500,26 +557,39 @@ def writeEnums():
500
557
# ALSO, it is assumed that language variant objects all have the "default"
501
558
# and consistent primary language as first entry.
502
559
560
+ # oca_attributes contains slot.name and slot.Type (datatype, e.g. Numeric, ...)
561
+ oca_attributes = oca_obj ["bundle" ]["capture_base" ]["attributes" ];
562
+
563
+ # Keywords about this schema (class's) subject categorization.
564
+ oca_classification = oca_obj ["bundle" ]["capture_base" ]["classification" ];
565
+
566
+ # Fields which likely have personal or institutional confidentiality content:
567
+ oca_identifying_factors = oca_obj ["bundle" ]["capture_base" ]["flagged_attributes" ];
568
+
569
+ ############################# Overlays #################################
503
570
oca_overlays = oca_obj ["bundle" ]["overlays" ];
504
571
505
- # Contains {schema.name,.description,.language} in array
506
- # Optional?
572
+ # Contains {schema.name,.description,.language} in array. Optional?
507
573
oca_metas = oca_overlays ["meta" ][0 ];
508
574
509
- # oca_attributes contains slot.name and slot.datatype
510
- oca_attributes = oca_obj ["bundle" ]["capture_base" ]["attributes" ];
511
-
512
575
# Contains slot.name and slot.pattern
513
576
oca_formats = oca_overlays ["format" ]["attribute_formats" ];
514
577
578
+ # Minnum and maximum number of values in array of a multivalued field.
579
+ if "cardinality" in oca_overlays :
580
+ oca_cardinality = oca_overlays ["cardinality" ]["attr_cardinality" ];
581
+ else :
582
+ oca_cardinality = {};
583
+
515
584
# Contains {slot.title,.language} in array
516
585
oca_labels = oca_overlays ["label" ][0 ]["attribute_labels" ];
517
586
518
587
# Contains {slot.name,.description,.language} in array
519
588
# Optional?
520
589
oca_informations = oca_overlays ["information" ][0 ]["attribute_information" ];
521
590
522
- # Contains {"d": "M", "i": "M", "passed": "M"} # "M" ?
591
+ # A dictionary for each field indicating required/recommended status:
592
+ # M is mandatory and O is optional.
523
593
oca_conformance = oca_overlays ["conformance" ]["attribute_conformance" ];
524
594
525
595
# Contains [enumeration name]:[code,...]
0 commit comments