diff --git a/nmdc_schema/nmdc-pydantic.py b/nmdc_schema/nmdc-pydantic.py index 61e658fdc1..27a0668ebb 100644 --- a/nmdc_schema/nmdc-pydantic.py +++ b/nmdc_schema/nmdc-pydantic.py @@ -92,7 +92,9 @@ def __contains__(self, key:str) -> bool: 'name': 'NMDC', 'notes': ['not importing any MIxS terms where the relationship between the ' "name (SCN) and the id isn't 1:1"], - 'prefixes': {'CATH': {'prefix_prefix': 'CATH', + 'prefixes': {'BFO': {'prefix_prefix': 'BFO', + 'prefix_reference': 'http://purl.obolibrary.org/obo/BFO_'}, + 'CATH': {'prefix_prefix': 'CATH', 'prefix_reference': 'https://bioregistry.io/cath:'}, 'CHEBI': {'prefix_prefix': 'CHEBI', 'prefix_reference': 'http://purl.obolibrary.org/obo/CHEBI_'}, @@ -248,6 +250,8 @@ def __contains__(self, key:str) -> bool: 'prefix_reference': 'http://example.org/neon/schema/'}, 'nmdc': {'prefix_prefix': 'nmdc', 'prefix_reference': 'https://w3id.org/nmdc/'}, + 'owl': {'prefix_prefix': 'owl', + 'prefix_reference': 'http://www.w3.org/2002/07/owl#'}, 'prov': {'prefix_prefix': 'prov', 'prefix_reference': 'http://www.w3.org/ns/prov#'}, 'qud': {'prefix_prefix': 'qud', @@ -279,69 +283,6 @@ def __contains__(self, key:str) -> bool: 'id_version': {'setting_key': 'id_version', 'setting_value': '(\\.[0-9]{1,})'}}, 'source_file': 'nmdc_schema/nmdc_materialized_patterns.yaml', - 'subsets': {'checklist': {'description': 'A MIxS checklist. These can be ' - 'combined with packages', - 'from_schema': 'https://w3id.org/nmdc/nmdc', - 'name': 'checklist'}, - 'checklist_package_combination': {'description': 'A combination ' - 'of a checklist ' - 'and a package', - 'from_schema': 'https://w3id.org/nmdc/nmdc', - 'name': 'checklist_package_combination'}, - 'data object subset': {'description': 'Subset consisting of the ' - 'data objects that either ' - 'inputs or outputs of ' - 'processes or workflows.', - 'from_schema': 'https://w3id.org/nmdc/nmdc', - 'name': 'data object subset'}, - 'data_portal_subset': {'comments': ['Schema authors are ' - 'responsible for alerting and ' - 'supporting Kitware and ' - 'nmdc-server authors about ' - 'changes they will have to ' - 'make if entities labeled ' - 'with data_portal_subset are ' - 'modified.', - 'Assignment of the ' - 'data_portal_subset is ' - 'incomplete in the schema.'], - 'description': 'Subset consisting of ' - 'entities that ' - 'Kitware/nmdc-server use to ' - 'populate the data portal.', - 'from_schema': 'https://w3id.org/nmdc/nmdc', - 'name': 'data_portal_subset'}, - 'environment': {'from_schema': 'https://w3id.org/nmdc/nmdc', - 'name': 'environment'}, - 'investigation': {'from_schema': 'https://w3id.org/nmdc/nmdc', - 'name': 'investigation'}, - 'mixs extension': {'from_schema': 'https://w3id.org/nmdc/nmdc', - 'name': 'mixs extension'}, - 'mixs_environmental_triad': {'from_schema': 'https://w3id.org/nmdc/nmdc', - 'name': 'mixs_environmental_triad', - 'title': 'MIxS Environmental Triad'}, - 'nucleic acid sequence source': {'from_schema': 'https://w3id.org/nmdc/nmdc', - 'name': 'nucleic acid sequence ' - 'source'}, - 'package': {'description': 'A MIxS package. These can be combined ' - 'with checklists', - 'from_schema': 'https://w3id.org/nmdc/nmdc', - 'name': 'package'}, - 'proteases': {'from_schema': 'https://w3id.org/nmdc/nmdc', - 'name': 'proteases'}, - 'sample subset': {'description': 'Subset consisting of entities ' - 'linked to the processing of ' - 'samples. Currently, this ' - 'subset consists of study, omics ' - 'process, and biosample.', - 'from_schema': 'https://w3id.org/nmdc/nmdc', - 'name': 'sample subset'}, - 'sequencing': {'from_schema': 'https://w3id.org/nmdc/nmdc', - 'name': 'sequencing'}, - 'workflow subset': {'description': 'Subset consisting of just the ' - 'workflow execution activities', - 'from_schema': 'https://w3id.org/nmdc/nmdc', - 'name': 'workflow subset'}}, 'title': 'NMDC Schema', 'types': {'boolean': {'base': 'Bool', 'description': 'A binary (true or false) value', @@ -418,12 +359,12 @@ def __contains__(self, key:str) -> bool: 'the type is referenced with the lower case ' '"decimal".'], 'uri': 'xsd:decimal'}, - 'decimal degree': {'base': 'float', + 'decimal_degree': {'base': 'float', 'description': 'A decimal degree expresses ' 'latitude or longitude as decimal ' 'fractions.', 'from_schema': 'https://w3id.org/nmdc/nmdc', - 'name': 'decimal degree', + 'name': 'decimal_degree', 'see_also': ['https://en.wikipedia.org/wiki/Decimal_degrees'], 'uri': 'xsd:decimal'}, 'double': {'base': 'float', @@ -493,11 +434,11 @@ def __contains__(self, key:str) -> bool: 'lower case "jsonpointer".'], 'repr': 'str', 'uri': 'xsd:string'}, - 'language code': {'base': 'str', + 'language_code': {'base': 'str', 'description': 'A language code conforming to ' 'ISO_639-1', 'from_schema': 'https://w3id.org/nmdc/nmdc', - 'name': 'language code', + 'name': 'language_code', 'see_also': ['https://en.wikipedia.org/wiki/ISO_639-1'], 'uri': 'xsd:language'}, 'ncname': {'base': 'NCName', @@ -1292,8 +1233,12 @@ class HeatDelivLocEnum(str, Enum): class HostSexEnum(str, Enum): female = "female" hermaphrodite = "hermaphrodite" + non_binary = "non-binary" male = "male" - neuter = "neuter" + transgender = "transgender" + transgender_LEFT_PARENTHESISfemale_to_maleRIGHT_PARENTHESIS = "transgender (female to male)" + transgender_LEFT_PARENTHESISmale_to_femaleRIGHT_PARENTHESIS = "transgender (male to female)" + undeclared = "undeclared" class IndoorSpaceEnum(str, Enum): @@ -1886,6 +1831,17 @@ class WindowVertPosEnum(str, Enum): high = "high" +class TargetGeneEnum(str, Enum): + # the small subunit of the bacterial/archean ribosome + number_16S_rRNA = "16S_rRNA" + # the large subunit of the bacterial/archean ribosome + number_23S_rRNA = "23S_rRNA" + # the small subunit of the eukaryotic ribosome + number_18S_rRNA = "18S_rRNA" + # the large subunit of the eukaryotic ribosome + number_28S_rRNA = "28S_rRNA" + + class CreditEnum(str, Enum): # Conceptualization Conceptualization = "Conceptualization" @@ -2277,6 +2233,7 @@ class AnalyteCategoryEnum(str, Enum): metabolome = "metabolome" lipidome = "lipidome" nom = "nom" + amplicon_sequencing_assay = "amplicon_sequencing_assay" class ExtractionTargetEnum(str, Enum): @@ -2367,6 +2324,17 @@ class AnalysisTypeEnum(str, Enum): metatranscriptomics = "metatranscriptomics" natural_organic_matter = "natural organic matter" bulk_chemistry = "bulk chemistry" + Amplicon_sequencing_assay = "amplicon sequencing assay" + + +class MetaproteomicsAnalysisCategoryEnum(str, Enum): + """ + The category of metaproteomics analysis being performed. + """ + # A metaproteomics analysis that is matched to a metagenome derived from the same biosample. + matched_metagenome = "matched_metagenome" + # A metaproteomics analysis that is matched to an in silico generated metagenome. + in_silico_metagenome = "in_silico_metagenome" @@ -2387,12 +2355,11 @@ class EukEval(ConfiguredBaseModel): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -2463,12 +2430,11 @@ class FunctionalAnnotationAggMember(ConfiguredBaseModel): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -2588,12 +2554,11 @@ class MobilePhaseSegment(ConfiguredBaseModel): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -2644,12 +2609,11 @@ class PortionOfSubstance(ConfiguredBaseModel): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -2730,12 +2694,11 @@ class MagBin(ConfiguredBaseModel): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -2772,12 +2735,11 @@ class MetaboliteIdentification(ConfiguredBaseModel): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -2793,7 +2755,7 @@ class MetaboliteIdentification(ConfiguredBaseModel): @field_validator('alternative_identifiers') def pattern_alternative_identifiers(cls, v): - pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$") + pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,\(\)\=\#]*$") if isinstance(v,list): for element in v: if isinstance(v, str) and not pattern.match(element): @@ -2804,125 +2766,6 @@ def pattern_alternative_identifiers(cls, v): return v -class PeptideQuantification(ConfiguredBaseModel): - """ - This is used to link a metaproteomics analysis workflow to a specific peptide sequence and related information - """ - linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'class_uri': 'nmdc:PeptideQuantification', - 'deprecated': 'not used. 2024-11 ' - 'https://github.com/microbiomedata/nmdc-schema/issues/2250', - 'from_schema': 'https://w3id.org/nmdc/nmdc'}) - - type: Literal["https://w3id.org/nmdc/PeptideQuantification","nmdc:PeptideQuantification"] = Field("nmdc:PeptideQuantification", description="""the class_uri of the class that has been instantiated""", json_schema_extra = { "linkml_meta": {'alias': 'type', - 'designates_type': True, - 'domain_of': ['EukEval', - 'FunctionalAnnotationAggMember', - 'MobilePhaseSegment', - 'PortionOfSubstance', - 'MagBin', - 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', - 'GenomeFeature', - 'FunctionalAnnotation', - 'AttributeValue', - 'NamedThing', - 'FailureCategorization', - 'Protocol', - 'CreditAssociation', - 'Doi'], - 'examples': [{'value': 'nmdc:Biosample'}, {'value': 'nmdc:Study'}], - 'notes': ['replaces legacy nmdc:type slot', - 'makes it easier to read example data files', - 'required for polymorphic MongoDB collections'], - 'see_also': ['https://github.com/microbiomedata/nmdc-schema/issues/1048', - 'https://github.com/microbiomedata/nmdc-schema/issues/1233', - 'https://github.com/microbiomedata/nmdc-schema/issues/248'], - 'slot_uri': 'rdf:type'} }) - all_proteins: Optional[List[str]] = Field(None, description="""the list of protein identifiers that are associated with the peptide sequence""", json_schema_extra = { "linkml_meta": {'alias': 'all_proteins', - 'deprecated': 'not used. 2024-11 ' - 'https://github.com/microbiomedata/nmdc-schema/issues/2250', - 'domain_of': ['PeptideQuantification', 'ProteinQuantification']} }) - best_protein: Optional[str] = Field(None, description="""the specific protein identifier most correctly associated with the peptide sequence""", json_schema_extra = { "linkml_meta": {'alias': 'best_protein', - 'deprecated': 'not used. 2024-11 ' - 'https://github.com/microbiomedata/nmdc-schema/issues/2250', - 'domain_of': ['PeptideQuantification', 'ProteinQuantification']} }) - min_q_value: Optional[float] = Field(None, description="""smallest Q-Value associated with the peptide sequence as provided by MSGFPlus tool""", json_schema_extra = { "linkml_meta": {'alias': 'min_q_value', - 'deprecated': 'not used. 2024-11 ' - 'https://github.com/microbiomedata/nmdc-schema/issues/2250', - 'domain_of': ['PeptideQuantification'], - 'see_also': ['OBI:0001442']} }) - peptide_sequence: Optional[str] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'peptide_sequence', - 'deprecated': 'not used. 2024-11 ' - 'https://github.com/microbiomedata/nmdc-schema/issues/2250', - 'domain_of': ['PeptideQuantification']} }) - peptide_spectral_count: Optional[int] = Field(None, description="""sum of filter passing MS2 spectra associated with the peptide sequence within a given LC-MS/MS data file""", json_schema_extra = { "linkml_meta": {'alias': 'peptide_spectral_count', - 'deprecated': 'not used. 2024-11 ' - 'https://github.com/microbiomedata/nmdc-schema/issues/2250', - 'domain_of': ['PeptideQuantification']} }) - peptide_sum_masic_abundance: Optional[int] = Field(None, description="""combined MS1 extracted ion chromatograms derived from MS2 spectra associated with the peptide sequence from a given LC-MS/MS data file using the MASIC tool""", json_schema_extra = { "linkml_meta": {'alias': 'peptide_sum_masic_abundance', - 'deprecated': 'not used. 2024-11 ' - 'https://github.com/microbiomedata/nmdc-schema/issues/2250', - 'domain_of': ['PeptideQuantification']} }) - - -class ProteinQuantification(ConfiguredBaseModel): - """ - This is used to link a metaproteomics analysis workflow to a specific protein - """ - linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'class_uri': 'nmdc:ProteinQuantification', - 'deprecated': 'not used. 2024-11 ' - 'https://github.com/microbiomedata/nmdc-schema/issues/2250', - 'from_schema': 'https://w3id.org/nmdc/nmdc'}) - - all_proteins: Optional[List[str]] = Field(None, description="""the list of protein identifiers that are associated with the peptide sequence""", json_schema_extra = { "linkml_meta": {'alias': 'all_proteins', - 'deprecated': 'not used. 2024-11 ' - 'https://github.com/microbiomedata/nmdc-schema/issues/2250', - 'domain_of': ['PeptideQuantification', 'ProteinQuantification']} }) - best_protein: Optional[str] = Field(None, description="""the specific protein identifier most correctly associated with the peptide sequence""", json_schema_extra = { "linkml_meta": {'alias': 'best_protein', - 'deprecated': 'not used. 2024-11 ' - 'https://github.com/microbiomedata/nmdc-schema/issues/2250', - 'domain_of': ['PeptideQuantification', 'ProteinQuantification']} }) - peptide_sequence_count: Optional[int] = Field(None, description="""count of peptide sequences grouped to the best_protein""", json_schema_extra = { "linkml_meta": {'alias': 'peptide_sequence_count', - 'deprecated': 'not used. 2024-11 ' - 'https://github.com/microbiomedata/nmdc-schema/issues/2250', - 'domain_of': ['ProteinQuantification']} }) - protein_spectral_count: Optional[int] = Field(None, description="""sum of filter passing MS2 spectra associated with the best protein within a given LC-MS/MS data file""", json_schema_extra = { "linkml_meta": {'alias': 'protein_spectral_count', - 'deprecated': 'not used. 2024-11 ' - 'https://github.com/microbiomedata/nmdc-schema/issues/2250', - 'domain_of': ['ProteinQuantification']} }) - protein_sum_masic_abundance: Optional[int] = Field(None, description="""combined MS1 extracted ion chromatograms derived from MS2 spectra associated with the best protein from a given LC-MS/MS data file using the MASIC tool""", json_schema_extra = { "linkml_meta": {'alias': 'protein_sum_masic_abundance', - 'deprecated': 'not used. 2024-11 ' - 'https://github.com/microbiomedata/nmdc-schema/issues/2250', - 'domain_of': ['ProteinQuantification']} }) - type: Literal["https://w3id.org/nmdc/ProteinQuantification","nmdc:ProteinQuantification"] = Field("nmdc:ProteinQuantification", description="""the class_uri of the class that has been instantiated""", json_schema_extra = { "linkml_meta": {'alias': 'type', - 'designates_type': True, - 'domain_of': ['EukEval', - 'FunctionalAnnotationAggMember', - 'MobilePhaseSegment', - 'PortionOfSubstance', - 'MagBin', - 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', - 'GenomeFeature', - 'FunctionalAnnotation', - 'AttributeValue', - 'NamedThing', - 'FailureCategorization', - 'Protocol', - 'CreditAssociation', - 'Doi'], - 'examples': [{'value': 'nmdc:Biosample'}, {'value': 'nmdc:Study'}], - 'notes': ['replaces legacy nmdc:type slot', - 'makes it easier to read example data files', - 'required for polymorphic MongoDB collections'], - 'see_also': ['https://github.com/microbiomedata/nmdc-schema/issues/1048', - 'https://github.com/microbiomedata/nmdc-schema/issues/1233', - 'https://github.com/microbiomedata/nmdc-schema/issues/248'], - 'slot_uri': 'rdf:type'} }) - - class GenomeFeature(ConfiguredBaseModel): """ A feature localized to an interval along a genome @@ -2969,12 +2812,11 @@ class GenomeFeature(ConfiguredBaseModel): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -3028,7 +2870,7 @@ class FunctionalAnnotation(ConfiguredBaseModel): 'discussion about that possibility had been started, including the ' 'question of whether these lists are intended to be open examples ' 'or closed']} }) - subject: Optional[str] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'subject', 'domain_of': ['FunctionalAnnotation']} }) + subject: Optional[str] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'subject', 'domain_of': ['FunctionalAnnotation', 'OntologyRelation']} }) was_generated_by: Optional[str] = Field(None, description="""provenance for the annotation.""", json_schema_extra = { "linkml_meta": {'alias': 'was_generated_by', 'any_of': [{'range': 'WorkflowExecution'}, {'range': 'DataGeneration'}], 'domain_of': ['FunctionalAnnotationAggMember', @@ -3047,12 +2889,11 @@ class FunctionalAnnotation(ConfiguredBaseModel): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -3110,12 +2951,11 @@ class AttributeValue(ConfiguredBaseModel): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -3169,12 +3009,11 @@ class QuantityValue(AttributeValue): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -3211,12 +3050,11 @@ class ImageValue(AttributeValue): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -3310,12 +3148,11 @@ class PersonValue(AttributeValue): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -3358,12 +3195,11 @@ class TextValue(AttributeValue): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -3395,12 +3231,11 @@ class TimestampValue(AttributeValue): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -3435,12 +3270,11 @@ class ControlledTermValue(AttributeValue): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -3476,12 +3310,11 @@ class ControlledIdentifiedTermValue(ControlledTermValue): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -3530,12 +3363,11 @@ class GeolocationValue(AttributeValue): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -3584,12 +3416,11 @@ class NamedThing(ConfiguredBaseModel): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -3617,7 +3448,7 @@ def pattern_id(cls, v): @field_validator('alternative_identifiers') def pattern_alternative_identifiers(cls, v): - pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$") + pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,\(\)\=\#]*$") if isinstance(v,list): for element in v: if isinstance(v, str) and not pattern.match(element): @@ -3665,12 +3496,11 @@ class GeneProduct(NamedThing): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -3698,7 +3528,7 @@ def pattern_id(cls, v): @field_validator('alternative_identifiers') def pattern_alternative_identifiers(cls, v): - pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$") + pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,\(\)\=\#]*$") if isinstance(v,list): for element in v: if isinstance(v, str) and not pattern.match(element): @@ -3710,25 +3540,29 @@ def pattern_alternative_identifiers(cls, v): class OntologyClass(NamedThing): + """ + A representation of class defined in an external ontology. + """ linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'class_uri': 'nmdc:OntologyClass', + 'exact_mappings': ['biolink:OntologyClass', 'owl:Class', 'schema:Class'], 'from_schema': 'https://w3id.org/nmdc/nmdc', - 'notes': ["The identifiers for terms from external ontologies can't have " - 'their ids constrained to the nmdc namespace'], 'slot_usage': {'id': {'name': 'id', + 'notes': ['The identifiers for terms from external ' + "ontologies can't have their ids constrained " + 'to the nmdc namespace'], 'pattern': '^[a-zA-Z0-9][a-zA-Z0-9_\\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\\-\\/\\.,]*$'}}}) + alternative_names: Optional[List[str]] = Field(None, description="""A list of alternative names used to refer to the entity. The distinction between name and alternative names is application-specific.""", json_schema_extra = { "linkml_meta": {'alias': 'alternative_names', + 'domain_of': ['OntologyClass', 'Study'], + 'exact_mappings': ['dcterms:alternative', 'skos:altLabel']} }) + definition: Optional[str] = Field(None, description="""The definition of the ontology term as provided by the ontology.""", json_schema_extra = { "linkml_meta": {'alias': 'definition', 'domain_of': ['OntologyClass']} }) + relations: Optional[List[OntologyRelation]] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'relations', 'domain_of': ['OntologyClass']} }) id: str = Field(..., description="""A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI""", json_schema_extra = { "linkml_meta": {'alias': 'id', 'domain_of': ['NamedThing'], 'examples': [{'description': 'https://github.com/microbiomedata/nmdc-schema/pull/499#discussion_r1018499248', 'value': 'nmdc:mgmag-00-x012.1_7_c1'}], - 'notes': ['abstracted pattern: ' - 'prefix:typecode-authshoulder-blade(.version)?(_seqsuffix)?', - 'a minimum length of 3 characters is suggested for typecodes, but 1 ' - 'or 2 characters will be accepted', - 'typecodes must correspond 1:1 to a class in the NMDC schema. this ' - 'will be checked via per-class id slot usage assertions', - 'minting authority shoulders should probably be enumerated and ' - 'checked in the pattern']} }) + 'notes': ["The identifiers for terms from external ontologies can't have " + 'their ids constrained to the nmdc namespace']} }) name: Optional[str] = Field(None, description="""A human readable label for an entity""", json_schema_extra = { "linkml_meta": {'alias': 'name', 'domain_of': ['PersonValue', 'NamedThing', 'Protocol']} }) description: Optional[str] = Field(None, description="""a human-readable description of a thing""", json_schema_extra = { "linkml_meta": {'alias': 'description', 'domain_of': ['ImageValue', 'NamedThing'], @@ -3743,12 +3577,11 @@ class OntologyClass(NamedThing): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -3776,7 +3609,7 @@ def pattern_id(cls, v): @field_validator('alternative_identifiers') def pattern_alternative_identifiers(cls, v): - pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$") + pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,\(\)\=\#]*$") if isinstance(v,list): for element in v: if isinstance(v, str) and not pattern.match(element): @@ -3793,18 +3626,17 @@ class EnvironmentalMaterialTerm(OntologyClass): 'https://github.com/microbiomedata/nmdc-schema/issues/1881', 'from_schema': 'https://w3id.org/nmdc/nmdc'}) + alternative_names: Optional[List[str]] = Field(None, description="""A list of alternative names used to refer to the entity. The distinction between name and alternative names is application-specific.""", json_schema_extra = { "linkml_meta": {'alias': 'alternative_names', + 'domain_of': ['OntologyClass', 'Study'], + 'exact_mappings': ['dcterms:alternative', 'skos:altLabel']} }) + definition: Optional[str] = Field(None, description="""The definition of the ontology term as provided by the ontology.""", json_schema_extra = { "linkml_meta": {'alias': 'definition', 'domain_of': ['OntologyClass']} }) + relations: Optional[List[OntologyRelation]] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'relations', 'domain_of': ['OntologyClass']} }) id: str = Field(..., description="""A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI""", json_schema_extra = { "linkml_meta": {'alias': 'id', 'domain_of': ['NamedThing'], 'examples': [{'description': 'https://github.com/microbiomedata/nmdc-schema/pull/499#discussion_r1018499248', 'value': 'nmdc:mgmag-00-x012.1_7_c1'}], - 'notes': ['abstracted pattern: ' - 'prefix:typecode-authshoulder-blade(.version)?(_seqsuffix)?', - 'a minimum length of 3 characters is suggested for typecodes, but 1 ' - 'or 2 characters will be accepted', - 'typecodes must correspond 1:1 to a class in the NMDC schema. this ' - 'will be checked via per-class id slot usage assertions', - 'minting authority shoulders should probably be enumerated and ' - 'checked in the pattern']} }) + 'notes': ["The identifiers for terms from external ontologies can't have " + 'their ids constrained to the nmdc namespace']} }) name: Optional[str] = Field(None, description="""A human readable label for an entity""", json_schema_extra = { "linkml_meta": {'alias': 'name', 'domain_of': ['PersonValue', 'NamedThing', 'Protocol']} }) description: Optional[str] = Field(None, description="""a human-readable description of a thing""", json_schema_extra = { "linkml_meta": {'alias': 'description', 'domain_of': ['ImageValue', 'NamedThing'], @@ -3819,12 +3651,11 @@ class EnvironmentalMaterialTerm(OntologyClass): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -3852,7 +3683,7 @@ def pattern_id(cls, v): @field_validator('alternative_identifiers') def pattern_alternative_identifiers(cls, v): - pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$") + pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,\(\)\=\#]*$") if isinstance(v,list): for element in v: if isinstance(v, str) and not pattern.match(element): @@ -3886,9 +3717,6 @@ class ChemicalEntity(OntologyClass): 'PUBCHEM.COMPOUND'], 'see_also': ['https://bioconductor.org/packages/devel/data/annotation/vignettes/metaboliteIDmapping/inst/doc/metaboliteIDmapping.html']}) - alternative_names: Optional[List[str]] = Field(None, description="""A list of alternative names used to refer to the entity. The distinction between name and alternative names is application-specific.""", json_schema_extra = { "linkml_meta": {'alias': 'alternative_names', - 'domain_of': ['ChemicalEntity', 'Study'], - 'exact_mappings': ['dcterms:alternative', 'skos:altLabel']} }) chemical_formula: Optional[str] = Field(None, description="""A generic grouping for molecular formulae and empirical formulae""", json_schema_extra = { "linkml_meta": {'alias': 'chemical_formula', 'domain_of': ['ChemicalEntity']} }) inchi: Optional[str] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'inchi', 'domain_of': ['ChemicalEntity']} }) inchi_key: Optional[str] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'inchi_key', @@ -3897,18 +3725,17 @@ class ChemicalEntity(OntologyClass): 'McNaught A, Blinov K, Tchekhovskoi D, Heller S (2012) InChIKey ' 'collision resistance: an experimental testing. J Cheminform. 4:12']} }) smiles: Optional[List[str]] = Field(None, description="""A string encoding of a molecular graph, no chiral or isotopic information. There are usually a large number of valid SMILES which represent a given structure. For example, CCO, OCC and C(O)C all specify the structure of ethanol.""", json_schema_extra = { "linkml_meta": {'alias': 'smiles', 'domain_of': ['ChemicalEntity']} }) + alternative_names: Optional[List[str]] = Field(None, description="""A list of alternative names used to refer to the entity. The distinction between name and alternative names is application-specific.""", json_schema_extra = { "linkml_meta": {'alias': 'alternative_names', + 'domain_of': ['OntologyClass', 'Study'], + 'exact_mappings': ['dcterms:alternative', 'skos:altLabel']} }) + definition: Optional[str] = Field(None, description="""The definition of the ontology term as provided by the ontology.""", json_schema_extra = { "linkml_meta": {'alias': 'definition', 'domain_of': ['OntologyClass']} }) + relations: Optional[List[OntologyRelation]] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'relations', 'domain_of': ['OntologyClass']} }) id: str = Field(..., description="""A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI""", json_schema_extra = { "linkml_meta": {'alias': 'id', 'domain_of': ['NamedThing'], 'examples': [{'description': 'https://github.com/microbiomedata/nmdc-schema/pull/499#discussion_r1018499248', 'value': 'nmdc:mgmag-00-x012.1_7_c1'}], - 'notes': ['abstracted pattern: ' - 'prefix:typecode-authshoulder-blade(.version)?(_seqsuffix)?', - 'a minimum length of 3 characters is suggested for typecodes, but 1 ' - 'or 2 characters will be accepted', - 'typecodes must correspond 1:1 to a class in the NMDC schema. this ' - 'will be checked via per-class id slot usage assertions', - 'minting authority shoulders should probably be enumerated and ' - 'checked in the pattern']} }) + 'notes': ["The identifiers for terms from external ontologies can't have " + 'their ids constrained to the nmdc namespace']} }) name: Optional[str] = Field(None, description="""A human readable label for an entity""", json_schema_extra = { "linkml_meta": {'alias': 'name', 'domain_of': ['PersonValue', 'NamedThing', 'Protocol']} }) description: Optional[str] = Field(None, description="""a human-readable description of a thing""", json_schema_extra = { "linkml_meta": {'alias': 'description', 'domain_of': ['ImageValue', 'NamedThing'], @@ -3923,12 +3750,11 @@ class ChemicalEntity(OntologyClass): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -3956,7 +3782,7 @@ def pattern_id(cls, v): @field_validator('alternative_identifiers') def pattern_alternative_identifiers(cls, v): - pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$") + pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,\(\)\=\#]*$") if isinstance(v,list): for element in v: if isinstance(v, str) and not pattern.match(element): @@ -3980,18 +3806,17 @@ class FunctionalAnnotationTerm(OntologyClass): 'Retaining this even after removing Reaction. See todos on the ' 'Pathway and OrthologyGroup subclasses.']}) + alternative_names: Optional[List[str]] = Field(None, description="""A list of alternative names used to refer to the entity. The distinction between name and alternative names is application-specific.""", json_schema_extra = { "linkml_meta": {'alias': 'alternative_names', + 'domain_of': ['OntologyClass', 'Study'], + 'exact_mappings': ['dcterms:alternative', 'skos:altLabel']} }) + definition: Optional[str] = Field(None, description="""The definition of the ontology term as provided by the ontology.""", json_schema_extra = { "linkml_meta": {'alias': 'definition', 'domain_of': ['OntologyClass']} }) + relations: Optional[List[OntologyRelation]] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'relations', 'domain_of': ['OntologyClass']} }) id: str = Field(..., description="""A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI""", json_schema_extra = { "linkml_meta": {'alias': 'id', 'domain_of': ['NamedThing'], 'examples': [{'description': 'https://github.com/microbiomedata/nmdc-schema/pull/499#discussion_r1018499248', 'value': 'nmdc:mgmag-00-x012.1_7_c1'}], - 'notes': ['abstracted pattern: ' - 'prefix:typecode-authshoulder-blade(.version)?(_seqsuffix)?', - 'a minimum length of 3 characters is suggested for typecodes, but 1 ' - 'or 2 characters will be accepted', - 'typecodes must correspond 1:1 to a class in the NMDC schema. this ' - 'will be checked via per-class id slot usage assertions', - 'minting authority shoulders should probably be enumerated and ' - 'checked in the pattern']} }) + 'notes': ["The identifiers for terms from external ontologies can't have " + 'their ids constrained to the nmdc namespace']} }) name: Optional[str] = Field(None, description="""A human readable label for an entity""", json_schema_extra = { "linkml_meta": {'alias': 'name', 'domain_of': ['PersonValue', 'NamedThing', 'Protocol']} }) description: Optional[str] = Field(None, description="""a human-readable description of a thing""", json_schema_extra = { "linkml_meta": {'alias': 'description', 'domain_of': ['ImageValue', 'NamedThing'], @@ -4006,12 +3831,11 @@ class FunctionalAnnotationTerm(OntologyClass): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -4039,7 +3863,7 @@ def pattern_id(cls, v): @field_validator('alternative_identifiers') def pattern_alternative_identifiers(cls, v): - pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$") + pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,\(\)\=\#]*$") if isinstance(v,list): for element in v: if isinstance(v, str) and not pattern.match(element): @@ -4066,18 +3890,17 @@ class Pathway(FunctionalAnnotationTerm): "is Pathway instantiated in an MongoDB collection? Aren't Pathways " 'searchable in the Data Portal?']}) + alternative_names: Optional[List[str]] = Field(None, description="""A list of alternative names used to refer to the entity. The distinction between name and alternative names is application-specific.""", json_schema_extra = { "linkml_meta": {'alias': 'alternative_names', + 'domain_of': ['OntologyClass', 'Study'], + 'exact_mappings': ['dcterms:alternative', 'skos:altLabel']} }) + definition: Optional[str] = Field(None, description="""The definition of the ontology term as provided by the ontology.""", json_schema_extra = { "linkml_meta": {'alias': 'definition', 'domain_of': ['OntologyClass']} }) + relations: Optional[List[OntologyRelation]] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'relations', 'domain_of': ['OntologyClass']} }) id: str = Field(..., description="""A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI""", json_schema_extra = { "linkml_meta": {'alias': 'id', 'domain_of': ['NamedThing'], 'examples': [{'description': 'https://github.com/microbiomedata/nmdc-schema/pull/499#discussion_r1018499248', 'value': 'nmdc:mgmag-00-x012.1_7_c1'}], - 'notes': ['abstracted pattern: ' - 'prefix:typecode-authshoulder-blade(.version)?(_seqsuffix)?', - 'a minimum length of 3 characters is suggested for typecodes, but 1 ' - 'or 2 characters will be accepted', - 'typecodes must correspond 1:1 to a class in the NMDC schema. this ' - 'will be checked via per-class id slot usage assertions', - 'minting authority shoulders should probably be enumerated and ' - 'checked in the pattern']} }) + 'notes': ["The identifiers for terms from external ontologies can't have " + 'their ids constrained to the nmdc namespace']} }) name: Optional[str] = Field(None, description="""A human readable label for an entity""", json_schema_extra = { "linkml_meta": {'alias': 'name', 'domain_of': ['PersonValue', 'NamedThing', 'Protocol']} }) description: Optional[str] = Field(None, description="""a human-readable description of a thing""", json_schema_extra = { "linkml_meta": {'alias': 'description', 'domain_of': ['ImageValue', 'NamedThing'], @@ -4092,12 +3915,11 @@ class Pathway(FunctionalAnnotationTerm): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -4125,7 +3947,7 @@ def pattern_id(cls, v): @field_validator('alternative_identifiers') def pattern_alternative_identifiers(cls, v): - pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$") + pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,\(\)\=\#]*$") if isinstance(v,list): for element in v: if isinstance(v, str) and not pattern.match(element): @@ -4154,18 +3976,17 @@ class OrthologyGroup(FunctionalAnnotationTerm): 'todos': ["is OrthologyGroup instantiated in an MongoDB collection? Aren't " 'Pathways searchable in the Data Portal?']}) + alternative_names: Optional[List[str]] = Field(None, description="""A list of alternative names used to refer to the entity. The distinction between name and alternative names is application-specific.""", json_schema_extra = { "linkml_meta": {'alias': 'alternative_names', + 'domain_of': ['OntologyClass', 'Study'], + 'exact_mappings': ['dcterms:alternative', 'skos:altLabel']} }) + definition: Optional[str] = Field(None, description="""The definition of the ontology term as provided by the ontology.""", json_schema_extra = { "linkml_meta": {'alias': 'definition', 'domain_of': ['OntologyClass']} }) + relations: Optional[List[OntologyRelation]] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'relations', 'domain_of': ['OntologyClass']} }) id: str = Field(..., description="""A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI""", json_schema_extra = { "linkml_meta": {'alias': 'id', 'domain_of': ['NamedThing'], 'examples': [{'description': 'https://github.com/microbiomedata/nmdc-schema/pull/499#discussion_r1018499248', 'value': 'nmdc:mgmag-00-x012.1_7_c1'}], - 'notes': ['abstracted pattern: ' - 'prefix:typecode-authshoulder-blade(.version)?(_seqsuffix)?', - 'a minimum length of 3 characters is suggested for typecodes, but 1 ' - 'or 2 characters will be accepted', - 'typecodes must correspond 1:1 to a class in the NMDC schema. this ' - 'will be checked via per-class id slot usage assertions', - 'minting authority shoulders should probably be enumerated and ' - 'checked in the pattern']} }) + 'notes': ["The identifiers for terms from external ontologies can't have " + 'their ids constrained to the nmdc namespace']} }) name: Optional[str] = Field(None, description="""A human readable label for an entity""", json_schema_extra = { "linkml_meta": {'alias': 'name', 'domain_of': ['PersonValue', 'NamedThing', 'Protocol']} }) description: Optional[str] = Field(None, description="""a human-readable description of a thing""", json_schema_extra = { "linkml_meta": {'alias': 'description', 'domain_of': ['ImageValue', 'NamedThing'], @@ -4180,12 +4001,11 @@ class OrthologyGroup(FunctionalAnnotationTerm): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -4213,7 +4033,7 @@ def pattern_id(cls, v): @field_validator('alternative_identifiers') def pattern_alternative_identifiers(cls, v): - pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$") + pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,\(\)\=\#]*$") if isinstance(v,list): for element in v: if isinstance(v, str) and not pattern.match(element): @@ -4224,6 +4044,63 @@ def pattern_alternative_identifiers(cls, v): return v +class OntologyRelation(ConfiguredBaseModel): + """ + A relationship between two ontology classes as specified either directly in the ontology in the form of axioms (statements or assertions that defines rules or constraints in an ontology) or inferred via reasoning. The association object is defined by two terms (the subject and the object) and the relationship between them (the predicate). Because ontologies often have a plethora of relationships/axiom types and can have additional metadata on the relationship itself, these kinds of relationships are structured as a class instead of a simple set of slots on OntologyClass itself. + """ + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'aliases': ['OntologyAssociation'], + 'class_uri': 'nmdc:OntologyRelation', + 'comments': ["For example, the relationship between 'soil' and 'enriched " + "soil' in the Environmental Ontology (ENVO) is defined by the " + "following axioms: 'enriched soil' subclass_of 'soil', and " + "'enriched soil' has_increased_levels_of (some) 'material " + "entity.' Converting these stat∂ements to OntologyAssociations " + 'so they can be used in the NMDC data stores, the subject of the ' + "first axiom or statement, would be 'soil', the predicate would " + "be 'subclass_of', and the object would be 'enriched soil'. For " + "the second axiom, the subject would be 'enriched soil', the " + "predicate would be 'has_increased_levels_of', and the object " + "would be 'material entity.' (Note that text labels are used in " + 'this example for ease of understanding and the literal values ' + 'of subject, predicate, and object in this class will be the id ' + '(curie) of the `OntologyClass` as defined below). Not all ' + 'ontology axioms (associations) between terms need to be ' + 'ingested into the NMDC data stores. In general, subclass_of ' + 'and part_of relationships/axioms are often good default ' + 'relations/associations to support ontology browsing in user ' + 'interfaces.'], + 'from_schema': 'https://w3id.org/nmdc/nmdc'}) + + type: Literal["https://w3id.org/nmdc/OntologyRelation","nmdc:OntologyRelation"] = Field("nmdc:OntologyRelation", description="""the class_uri of the class that has been instantiated""", json_schema_extra = { "linkml_meta": {'alias': 'type', + 'designates_type': True, + 'domain_of': ['EukEval', + 'FunctionalAnnotationAggMember', + 'MobilePhaseSegment', + 'PortionOfSubstance', + 'MagBin', + 'MetaboliteIdentification', + 'GenomeFeature', + 'FunctionalAnnotation', + 'AttributeValue', + 'NamedThing', + 'OntologyRelation', + 'FailureCategorization', + 'Protocol', + 'CreditAssociation', + 'Doi'], + 'examples': [{'value': 'nmdc:Biosample'}, {'value': 'nmdc:Study'}], + 'notes': ['replaces legacy nmdc:type slot', + 'makes it easier to read example data files', + 'required for polymorphic MongoDB collections'], + 'see_also': ['https://github.com/microbiomedata/nmdc-schema/issues/1048', + 'https://github.com/microbiomedata/nmdc-schema/issues/1233', + 'https://github.com/microbiomedata/nmdc-schema/issues/248'], + 'slot_uri': 'rdf:type'} }) + subject: str = Field(..., json_schema_extra = { "linkml_meta": {'alias': 'subject', 'domain_of': ['FunctionalAnnotation', 'OntologyRelation']} }) + predicate: str = Field(..., json_schema_extra = { "linkml_meta": {'alias': 'predicate', 'domain_of': ['OntologyRelation']} }) + object: str = Field(..., json_schema_extra = { "linkml_meta": {'alias': 'object', 'domain_of': ['OntologyRelation']} }) + + class FailureCategorization(ConfiguredBaseModel): linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'class_uri': 'nmdc:FailureCategorization', 'from_schema': 'https://w3id.org/nmdc/nmdc'}) @@ -4244,12 +4121,11 @@ class FailureCategorization(ConfiguredBaseModel): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -4297,12 +4173,11 @@ class MaterialEntity(NamedThing): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -4330,7 +4205,84 @@ def pattern_id(cls, v): @field_validator('alternative_identifiers') def pattern_alternative_identifiers(cls, v): + pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,\(\)\=\#]*$") + if isinstance(v,list): + for element in v: + if isinstance(v, str) and not pattern.match(element): + raise ValueError(f"Invalid alternative_identifiers format: {element}") + elif isinstance(v,str): + if not pattern.match(v): + raise ValueError(f"Invalid alternative_identifiers format: {v}") + return v + + +class Sample(MaterialEntity): + """ + A sample is a material entity that can be characterized by an experiment. + """ + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'abstract': True, + 'class_uri': 'nmdc:Sample', + 'from_schema': 'https://w3id.org/nmdc/nmdc'}) + + id: str = Field(..., description="""A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI""", json_schema_extra = { "linkml_meta": {'alias': 'id', + 'domain_of': ['NamedThing'], + 'examples': [{'description': 'https://github.com/microbiomedata/nmdc-schema/pull/499#discussion_r1018499248', + 'value': 'nmdc:mgmag-00-x012.1_7_c1'}], + 'notes': ['abstracted pattern: ' + 'prefix:typecode-authshoulder-blade(.version)?(_seqsuffix)?', + 'a minimum length of 3 characters is suggested for typecodes, but 1 ' + 'or 2 characters will be accepted', + 'typecodes must correspond 1:1 to a class in the NMDC schema. this ' + 'will be checked via per-class id slot usage assertions', + 'minting authority shoulders should probably be enumerated and ' + 'checked in the pattern']} }) + name: Optional[str] = Field(None, description="""A human readable label for an entity""", json_schema_extra = { "linkml_meta": {'alias': 'name', 'domain_of': ['PersonValue', 'NamedThing', 'Protocol']} }) + description: Optional[str] = Field(None, description="""a human-readable description of a thing""", json_schema_extra = { "linkml_meta": {'alias': 'description', + 'domain_of': ['ImageValue', 'NamedThing'], + 'slot_uri': 'dcterms:description'} }) + alternative_identifiers: Optional[List[str]] = Field(None, description="""A list of alternative identifiers for the entity.""", json_schema_extra = { "linkml_meta": {'alias': 'alternative_identifiers', + 'domain_of': ['MetaboliteIdentification', 'NamedThing']} }) + type: Literal["https://w3id.org/nmdc/Sample","nmdc:Sample"] = Field("nmdc:Sample", description="""the class_uri of the class that has been instantiated""", json_schema_extra = { "linkml_meta": {'alias': 'type', + 'designates_type': True, + 'domain_of': ['EukEval', + 'FunctionalAnnotationAggMember', + 'MobilePhaseSegment', + 'PortionOfSubstance', + 'MagBin', + 'MetaboliteIdentification', + 'GenomeFeature', + 'FunctionalAnnotation', + 'AttributeValue', + 'NamedThing', + 'OntologyRelation', + 'FailureCategorization', + 'Protocol', + 'CreditAssociation', + 'Doi'], + 'examples': [{'value': 'nmdc:Biosample'}, {'value': 'nmdc:Study'}], + 'notes': ['replaces legacy nmdc:type slot', + 'makes it easier to read example data files', + 'required for polymorphic MongoDB collections'], + 'see_also': ['https://github.com/microbiomedata/nmdc-schema/issues/1048', + 'https://github.com/microbiomedata/nmdc-schema/issues/1233', + 'https://github.com/microbiomedata/nmdc-schema/issues/248'], + 'slot_uri': 'rdf:type'} }) + + @field_validator('id') + def pattern_id(cls, v): pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$") + if isinstance(v,list): + for element in v: + if isinstance(v, str) and not pattern.match(element): + raise ValueError(f"Invalid id format: {element}") + elif isinstance(v,str): + if not pattern.match(v): + raise ValueError(f"Invalid id format: {v}") + return v + + @field_validator('alternative_identifiers') + def pattern_alternative_identifiers(cls, v): + pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,\(\)\=\#]*$") if isinstance(v,list): for element in v: if isinstance(v, str) and not pattern.match(element): @@ -4341,7 +4293,7 @@ def pattern_alternative_identifiers(cls, v): return v -class Biosample(MaterialEntity): +class Biosample(Sample): """ Biological source material which can be characterized by an experiment. """ @@ -4361,7 +4313,6 @@ class Biosample(MaterialEntity): 'NCIT:C43412', 'http://purl.obolibrary.org/obo/FBcv_0003024'], 'from_schema': 'https://w3id.org/nmdc/nmdc', - 'in_subset': ['sample subset'], 'notes': ["could add GOLD and EBI's biosample definitions to the " 'alt_descriptions?'], 'rules': [{'description': 'DNA samples shipped to JGI for metagenomic ' @@ -5074,31 +5025,7 @@ class Biosample(MaterialEntity): 'aliases': ['altitude'], 'annotations': {'expected_value': {'tag': 'expected_value', 'value': 'measurement value'}}, - 'domain_of': ['agriculture', - 'air', - 'built environment', - 'core', - 'food-animal and animal feed', - 'food-farm environment', - 'food-food production facility', - 'food-human foods', - 'host-associated', - 'human-associated', - 'human-gut', - 'human-oral', - 'human-skin', - 'human-vaginal', - 'hydrocarbon resources-cores', - 'hydrocarbon resources-fluids_swabs', - 'microbial mat_biofilm', - 'miscellaneous natural or artificial environment', - 'plant-associated', - 'sediment', - 'soil', - 'symbiont-associated', - 'wastewater_sludge', - 'water', - 'Biosample'], + 'domain_of': ['Biosample'], 'examples': [{'value': '100 meter'}], 'is_a': 'environment field', 'slot_uri': 'MIXS:0000094'} }) @@ -6282,9 +6209,7 @@ class Biosample(MaterialEntity): 'describe the broad anatomical or ' 'morphological context'}}, 'domain_of': ['Biosample'], - 'examples': [{'value': 'oceanic epipelagic zone biome [ENVO:01000033] for ' - 'annotating a water sample from the photic zone in ' - 'middle of the Atlantic Ocean'}], + 'examples': [{'value': 'oceanic epipelagic zone biome [ENVO:01000035]'}], 'is_a': 'environment field', 'slot_uri': 'MIXS:0000012', 'string_serialization': '{termLabel} {[termID]}'} }) @@ -6311,12 +6236,7 @@ class Biosample(MaterialEntity): 'Plant Ontology to describe specific ' 'anatomical structures or plant parts.'}}, 'domain_of': ['Biosample'], - 'examples': [{'value': 'litter layer [ENVO:01000338]; Annotating a pooled ' - 'sample taken from various vegetation layers in a ' - 'forest consider: canopy [ENVO:00000047]|herb and fern ' - 'layer [ENVO:01000337]|litter layer ' - '[ENVO:01000338]|understory [01000335]|shrub layer ' - '[ENVO:01000336].'}], + 'examples': [{'value': 'litter layer [ENVO:01000338]'}], 'is_a': 'environment field', 'slot_uri': 'MIXS:0000013', 'string_serialization': '{termLabel} {[termID]}'} }) @@ -6344,18 +6264,13 @@ class Biosample(MaterialEntity): 'Plant Ontology to indicate a tissue, ' 'organ, or plant structure'}}, 'domain_of': ['Biosample'], - 'examples': [{'value': 'soil [ENVO:00001998]; Annotating a fish swimming in ' - 'the upper 100 m of the Atlantic Ocean, consider: ' - 'ocean water [ENVO:00002151]. Example: Annotating a ' - 'duck on a pond consider: pond water ' - '[ENVO:00002228]|air [ENVO_00002005]'}], + 'examples': [{'value': 'soil [ENVO:00001998]'}], 'is_a': 'environment field', 'slot_uri': 'MIXS:0000014', 'string_serialization': '{termLabel} {[termID]}'} }) env_package: Optional[TextValue] = Field(None, description="""MIxS extension for reporting of measurements and observations obtained from one or more of the environments where the sample was obtained. All environmental packages listed here are further defined in separate subtables. By giving the name of the environmental package, a selection of fields can be made from the subtables and can be reported""", json_schema_extra = { "linkml_meta": {'alias': 'env_package', 'aliases': ['environmental package'], 'domain_of': ['Biosample'], - 'in_subset': ['mixs extension'], 'notes': ['no longer in MIxS as of 6.0?']} }) environment_field: Optional[str] = Field(None, description="""field describing environmental aspect of a sample""", json_schema_extra = { "linkml_meta": {'abstract': True, 'alias': 'environment_field', 'domain_of': ['Biosample']} }) escalator: Optional[TextValue] = Field(None, title="escalator count", description="""The number of escalators within the built structure""", json_schema_extra = { "linkml_meta": {'alias': 'escalator', @@ -7001,7 +6916,7 @@ class Biosample(MaterialEntity): 'value': 'FMA or UBERON'}, 'occurrence': {'tag': 'occurrence', 'value': '1'}}, 'domain_of': ['Biosample'], - 'examples': [{'value': 'Portion of mucus [fma66938]'}], + 'examples': [{'value': 'mucus [UBERON:0000912]'}], 'is_a': 'core field', 'slot_uri': 'MIXS:0000888', 'string_serialization': '{termLabel} {[termID]}'} }) @@ -10605,7 +10520,7 @@ class Biosample(MaterialEntity): 'comments': ['Date should be formatted as YYYY(-MM(-DD)). Ie, 2021-04-15, ' '2021-04 and 2021 are all acceptable.'], 'domain_of': ['Biosample'], - 'examples': [{'value': '2021-04-15, 2021-04 and 2021 are all acceptable.'}], + 'examples': [{'value': '2021-04-15'}, {'value': '2021-04'}, {'value': '2021'}], 'notes': ['MIxS collection_date accepts (truncated) ISO8601. DH taking ' 'arbitrary precision date only'], 'rank': 2, @@ -10617,7 +10532,7 @@ class Biosample(MaterialEntity): 'comments': ['Time should be entered as HH:MM(:SS) in GMT. See here for a ' 'converter: https://www.worldtimebuddy.com/pst-to-gmt-converter'], 'domain_of': ['Biosample'], - 'examples': [{'value': '13:33 or 13:33:55'}], + 'examples': [{'value': '813'}, {'value': '48835'}], 'notes': ['MIxS collection_date accepts (truncated) ISO8601. DH taking ' 'seconds optional time only'], 'rank': 1, @@ -10629,7 +10544,7 @@ class Biosample(MaterialEntity): 'comments': ['Time should be entered as HH:MM(:SS) in GMT. See here for a ' 'converter: https://www.worldtimebuddy.com/pst-to-gmt-converter'], 'domain_of': ['Biosample'], - 'examples': [{'value': '13:33 or 13:33:55'}], + 'examples': [{'value': '813'}, {'value': '48835'}], 'notes': ['MIxS collection_date accepts (truncated) ISO8601. DH taking ' 'seconds optional time only'], 'rank': 3, @@ -10663,12 +10578,11 @@ class Biosample(MaterialEntity): 'comments': ['This is required when your experimental design includes the use ' 'of isotopically labeled compounds'], 'domain_of': ['Biosample'], - 'examples': [{'value': '13C glucose'}, {'value': 'H218O'}], + 'examples': [{'value': '13C glucose'}, {'value': '18O water'}], 'rank': 16, 'recommended': True, 'see_also': ['MIXS:0000751'], 'slot_group': 'MIxS Inspired', - 'string_serialization': '{termLabel} {[termID]}; {timestamp}', 'todos': ['Can we make the H218O correctly super and subscripted?']} }) micro_biomass_c_meth: Optional[str] = Field(None, title="microbial biomass carbon method", description="""Reference or method used in determining microbial biomass carbon""", json_schema_extra = { "linkml_meta": {'alias': 'micro_biomass_c_meth', 'comments': ['required if "microbial_biomass_c" is provided'], @@ -10748,7 +10662,7 @@ class Biosample(MaterialEntity): 'comments': ['Date should be formatted as YYYY(-MM(-DD)). Ie, 2021-04-15, ' '2021-04 and 2021 are all acceptable.'], 'domain_of': ['Biosample'], - 'examples': [{'value': '2021-04-15, 2021-04 and 2021 are all acceptable.'}], + 'examples': [{'value': '2021-04-15'}, {'value': '2021-04'}, {'value': '2021'}], 'notes': ['MIxS collection_date accepts (truncated) ISO8601. DH taking ' 'arbitrary precision date only'], 'rank': 4, @@ -10760,7 +10674,7 @@ class Biosample(MaterialEntity): 'comments': ['Time should be entered as HH:MM(:SS) in GMT. See here for a ' 'converter: https://www.worldtimebuddy.com/pst-to-gmt-converter'], 'domain_of': ['Biosample'], - 'examples': [{'value': '13:33 or 13:33:55'}], + 'examples': [{'value': '813'}, {'value': '48835'}], 'notes': ['MIxS collection_date accepts (truncated) ISO8601. DH taking ' 'seconds optional time only'], 'rank': 5, @@ -10810,7 +10724,7 @@ class Biosample(MaterialEntity): 'string_serialization': '{integer}'} }) analysis_type: Optional[List[AnalysisTypeEnum]] = Field(None, title="analysis/data type", description="""Select all the data types associated or available for this biosample""", json_schema_extra = { "linkml_meta": {'alias': 'analysis_type', 'domain_of': ['Biosample'], - 'examples': [{'value': 'metagenomics; metabolomics; proteomics'}], + 'examples': [{'value': 'metagenomics; metabolomics; metaproteomics'}], 'rank': 3, 'recommended': True, 'see_also': ['MIxS:investigation_type'], @@ -10868,12 +10782,11 @@ class Biosample(MaterialEntity): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -11057,7 +10970,7 @@ def pattern_id(cls, v): @field_validator('alternative_identifiers') def pattern_alternative_identifiers(cls, v): - pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$") + pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,\(\)\=\#]*$") if isinstance(v,list): for element in v: if isinstance(v, str) and not pattern.match(element): @@ -11068,7 +10981,7 @@ def pattern_alternative_identifiers(cls, v): return v -class ProcessedSample(MaterialEntity): +class ProcessedSample(Sample): linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'class_uri': 'nmdc:ProcessedSample', 'from_schema': 'https://w3id.org/nmdc/nmdc', 'slot_usage': {'id': {'name': 'id', @@ -11132,12 +11045,11 @@ class ProcessedSample(MaterialEntity): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -11177,7 +11089,7 @@ def pattern_id(cls, v): @field_validator('alternative_identifiers') def pattern_alternative_identifiers(cls, v): - pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$") + pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,\(\)\=\#]*$") if isinstance(v,list): for element in v: if isinstance(v, str) and not pattern.match(element): @@ -11221,12 +11133,11 @@ class Site(MaterialEntity): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -11254,7 +11165,7 @@ def pattern_id(cls, v): @field_validator('alternative_identifiers') def pattern_alternative_identifiers(cls, v): - pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$") + pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,\(\)\=\#]*$") if isinstance(v,list): for element in v: if isinstance(v, str) and not pattern.match(element): @@ -11386,12 +11297,11 @@ class FieldResearchSite(Site): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -11431,7 +11341,7 @@ def pattern_id(cls, v): @field_validator('alternative_identifiers') def pattern_alternative_identifiers(cls, v): - pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$") + pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,\(\)\=\#]*$") if isinstance(v,list): for element in v: if isinstance(v, str) and not pattern.match(element): @@ -11485,12 +11395,11 @@ class Instrument(MaterialEntity): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -11518,7 +11427,7 @@ def pattern_id(cls, v): @field_validator('alternative_identifiers') def pattern_alternative_identifiers(cls, v): - pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$") + pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,\(\)\=\#]*$") if isinstance(v,list): for element in v: if isinstance(v, str) and not pattern.match(element): @@ -11580,12 +11489,11 @@ class PlannedProcess(NamedThing): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -11613,7 +11521,7 @@ def pattern_id(cls, v): @field_validator('alternative_identifiers') def pattern_alternative_identifiers(cls, v): - pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$") + pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,\(\)\=\#]*$") if isinstance(v,list): for element in v: if isinstance(v, str) and not pattern.match(element): @@ -11704,12 +11612,11 @@ class CollectingBiosamplesFromSite(PlannedProcess): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -11761,7 +11668,7 @@ def pattern_id(cls, v): @field_validator('alternative_identifiers') def pattern_alternative_identifiers(cls, v): - pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$") + pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,\(\)\=\#]*$") if isinstance(v,list): for element in v: if isinstance(v, str) and not pattern.match(element): @@ -11862,12 +11769,11 @@ class ProtocolExecution(PlannedProcess): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -11931,7 +11837,7 @@ def pattern_id(cls, v): @field_validator('alternative_identifiers') def pattern_alternative_identifiers(cls, v): - pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$") + pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,\(\)\=\#]*$") if isinstance(v,list): for element in v: if isinstance(v, str) and not pattern.match(element): @@ -12042,12 +11948,11 @@ class StorageProcess(PlannedProcess): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -12099,7 +12004,7 @@ def pattern_id(cls, v): @field_validator('alternative_identifiers') def pattern_alternative_identifiers(cls, v): - pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$") + pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,\(\)\=\#]*$") if isinstance(v,list): for element in v: if isinstance(v, str) and not pattern.match(element): @@ -12119,10 +12024,9 @@ class MaterialProcessing(PlannedProcess): 'class_uri': 'nmdc:MaterialProcessing', 'from_schema': 'https://w3id.org/nmdc/nmdc', 'notes': ['This class is a replacement for BiosampleProcessing.'], - 'slot_usage': {'has_input': {'any_of': [{'range': 'Biosample'}, - {'range': 'ProcessedSample'}], - 'name': 'has_input', + 'slot_usage': {'has_input': {'name': 'has_input', 'pattern': '^(nmdc):(bsm|procsm)-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})$', + 'range': 'Sample', 'structured_pattern': {'interpolated': True, 'syntax': '{id_nmdc_prefix}:(bsm|procsm)-{id_shoulder}-{id_blade}$'}}, 'has_output': {'name': 'has_output', @@ -12135,7 +12039,6 @@ class MaterialProcessing(PlannedProcess): 'domain_of': ['MaterialProcessing', 'DataGeneration']} }) has_input: Optional[List[str]] = Field(None, description="""An input to a process.""", json_schema_extra = { "linkml_meta": {'alias': 'has_input', 'aliases': ['input'], - 'any_of': [{'range': 'Biosample'}, {'range': 'ProcessedSample'}], 'domain_of': ['PlannedProcess'], 'structured_pattern': {'interpolated': True, 'syntax': '{id_nmdc_prefix}:(bsm|procsm)-{id_shoulder}-{id_blade}$'}} }) @@ -12187,12 +12090,11 @@ class MaterialProcessing(PlannedProcess): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -12244,7 +12146,7 @@ def pattern_id(cls, v): @field_validator('alternative_identifiers') def pattern_alternative_identifiers(cls, v): - pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$") + pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,\(\)\=\#]*$") if isinstance(v,list): for element in v: if isinstance(v, str) and not pattern.match(element): @@ -12285,7 +12187,6 @@ class Pooling(MaterialProcessing): 'domain_of': ['MaterialProcessing', 'DataGeneration']} }) has_input: List[str] = Field(..., description="""An input to a process.""", min_length=2, json_schema_extra = { "linkml_meta": {'alias': 'has_input', 'aliases': ['input'], - 'any_of': [{'range': 'Biosample'}, {'range': 'ProcessedSample'}], 'domain_of': ['PlannedProcess'], 'structured_pattern': {'interpolated': True, 'syntax': '{id_nmdc_prefix}:(bsm|procsm)-{id_shoulder}-{id_blade}$'}} }) @@ -12339,12 +12240,11 @@ class Pooling(MaterialProcessing): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -12396,7 +12296,7 @@ def pattern_id(cls, v): @field_validator('alternative_identifiers') def pattern_alternative_identifiers(cls, v): - pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$") + pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,\(\)\=\#]*$") if isinstance(v,list): for element in v: if isinstance(v, str) and not pattern.match(element): @@ -12414,10 +12314,9 @@ class Extraction(MaterialProcessing): linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'class_uri': 'nmdc:Extraction', 'exact_mappings': ['OBI:0302884'], 'from_schema': 'https://w3id.org/nmdc/nmdc', - 'slot_usage': {'has_input': {'any_of': [{'range': 'Biosample'}, - {'range': 'ProcessedSample'}], - 'name': 'has_input', + 'slot_usage': {'has_input': {'name': 'has_input', 'pattern': '^(nmdc):(bsm|procsm)-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})$', + 'range': 'Sample', 'required': True, 'structured_pattern': {'interpolated': True, 'syntax': '{id_nmdc_prefix}:(bsm|procsm)-{id_shoulder}-{id_blade}$'}}, @@ -12462,7 +12361,6 @@ class Extraction(MaterialProcessing): 'domain_of': ['MaterialProcessing', 'DataGeneration']} }) has_input: List[str] = Field(..., description="""An input to a process.""", json_schema_extra = { "linkml_meta": {'alias': 'has_input', 'aliases': ['input'], - 'any_of': [{'range': 'Biosample'}, {'range': 'ProcessedSample'}], 'domain_of': ['PlannedProcess'], 'structured_pattern': {'interpolated': True, 'syntax': '{id_nmdc_prefix}:(bsm|procsm)-{id_shoulder}-{id_blade}$'}} }) @@ -12516,12 +12414,11 @@ class Extraction(MaterialProcessing): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -12573,7 +12470,7 @@ def pattern_id(cls, v): @field_validator('alternative_identifiers') def pattern_alternative_identifiers(cls, v): - pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$") + pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,\(\)\=\#]*$") if isinstance(v,list): for element in v: if isinstance(v, str) and not pattern.match(element): @@ -12664,7 +12561,6 @@ class LibraryPreparation(MaterialProcessing): 'domain_of': ['MaterialProcessing', 'DataGeneration']} }) has_input: List[str] = Field(..., description="""An input to a process.""", json_schema_extra = { "linkml_meta": {'alias': 'has_input', 'aliases': ['input'], - 'any_of': [{'range': 'Biosample'}, {'range': 'ProcessedSample'}], 'domain_of': ['PlannedProcess'], 'structured_pattern': {'interpolated': True, 'syntax': '{id_nmdc_prefix}:(bsm|procsm)-{id_shoulder}-{id_blade}$'}} }) @@ -12718,12 +12614,11 @@ class LibraryPreparation(MaterialProcessing): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -12775,7 +12670,7 @@ def pattern_id(cls, v): @field_validator('alternative_identifiers') def pattern_alternative_identifiers(cls, v): - pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$") + pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,\(\)\=\#]*$") if isinstance(v,list): for element in v: if isinstance(v, str) and not pattern.match(element): @@ -12805,10 +12700,9 @@ class SubSamplingProcess(MaterialProcessing): 'usually apparent from the context or is defined.', 'TODO - Montana to visit slot descriptions'], 'related_mappings': ['OBI:0000744'], - 'slot_usage': {'has_input': {'any_of': [{'range': 'Biosample'}, - {'range': 'ProcessedSample'}], - 'name': 'has_input', + 'slot_usage': {'has_input': {'name': 'has_input', 'pattern': '^(nmdc):(bsm|procsm)-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})$', + 'range': 'Sample', 'structured_pattern': {'interpolated': True, 'syntax': '{id_nmdc_prefix}:(bsm|procsm)-{id_shoulder}-{id_blade}$'}}, 'has_output': {'description': 'The subsample.', @@ -12861,7 +12755,6 @@ class SubSamplingProcess(MaterialProcessing): 'domain_of': ['MaterialProcessing', 'DataGeneration']} }) has_input: Optional[List[str]] = Field(None, description="""An input to a process.""", json_schema_extra = { "linkml_meta": {'alias': 'has_input', 'aliases': ['input'], - 'any_of': [{'range': 'Biosample'}, {'range': 'ProcessedSample'}], 'domain_of': ['PlannedProcess'], 'structured_pattern': {'interpolated': True, 'syntax': '{id_nmdc_prefix}:(bsm|procsm)-{id_shoulder}-{id_blade}$'}} }) @@ -12915,12 +12808,11 @@ class SubSamplingProcess(MaterialProcessing): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -12972,7 +12864,7 @@ def pattern_id(cls, v): @field_validator('alternative_identifiers') def pattern_alternative_identifiers(cls, v): - pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$") + pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,\(\)\=\#]*$") if isinstance(v,list): for element in v: if isinstance(v, str) and not pattern.match(element): @@ -13000,10 +12892,9 @@ class MixingProcess(MaterialProcessing): 'measurement system.'], 'contributors': ['ORCID:0009-0001-1555-1601', 'ORCID:0000-0002-8683-0050'], 'from_schema': 'https://w3id.org/nmdc/nmdc', - 'slot_usage': {'has_input': {'any_of': [{'range': 'Biosample'}, - {'range': 'ProcessedSample'}], - 'name': 'has_input', + 'slot_usage': {'has_input': {'name': 'has_input', 'pattern': '^(nmdc):(bsm|procsm)-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})$', + 'range': 'Sample', 'structured_pattern': {'interpolated': True, 'syntax': '{id_nmdc_prefix}:(bsm|procsm)-{id_shoulder}-{id_blade}$'}}, 'has_output': {'description': 'The mixed sample.', @@ -13027,7 +12918,6 @@ class MixingProcess(MaterialProcessing): 'domain_of': ['MaterialProcessing', 'DataGeneration']} }) has_input: Optional[List[str]] = Field(None, description="""An input to a process.""", json_schema_extra = { "linkml_meta": {'alias': 'has_input', 'aliases': ['input'], - 'any_of': [{'range': 'Biosample'}, {'range': 'ProcessedSample'}], 'domain_of': ['PlannedProcess'], 'structured_pattern': {'interpolated': True, 'syntax': '{id_nmdc_prefix}:(bsm|procsm)-{id_shoulder}-{id_blade}$'}} }) @@ -13080,12 +12970,11 @@ class MixingProcess(MaterialProcessing): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -13137,7 +13026,7 @@ def pattern_id(cls, v): @field_validator('alternative_identifiers') def pattern_alternative_identifiers(cls, v): - pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$") + pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,\(\)\=\#]*$") if isinstance(v,list): for element in v: if isinstance(v, str) and not pattern.match(element): @@ -13159,10 +13048,9 @@ class FiltrationProcess(MaterialProcessing): 'ORCID:0009-0008-4013-7737'], 'from_schema': 'https://w3id.org/nmdc/nmdc', 'related_mappings': ['CHMO:0001640'], - 'slot_usage': {'has_input': {'any_of': [{'range': 'Biosample'}, - {'range': 'ProcessedSample'}], - 'name': 'has_input', + 'slot_usage': {'has_input': {'name': 'has_input', 'pattern': '^(nmdc):(bsm|procsm)-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})$', + 'range': 'Sample', 'structured_pattern': {'interpolated': True, 'syntax': '{id_nmdc_prefix}:(bsm|procsm)-{id_shoulder}-{id_blade}$'}}, 'has_output': {'name': 'has_output', @@ -13211,7 +13099,6 @@ class FiltrationProcess(MaterialProcessing): 'domain_of': ['MaterialProcessing', 'DataGeneration']} }) has_input: Optional[List[str]] = Field(None, description="""An input to a process.""", json_schema_extra = { "linkml_meta": {'alias': 'has_input', 'aliases': ['input'], - 'any_of': [{'range': 'Biosample'}, {'range': 'ProcessedSample'}], 'domain_of': ['PlannedProcess'], 'structured_pattern': {'interpolated': True, 'syntax': '{id_nmdc_prefix}:(bsm|procsm)-{id_shoulder}-{id_blade}$'}} }) @@ -13265,12 +13152,11 @@ class FiltrationProcess(MaterialProcessing): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -13322,7 +13208,7 @@ def pattern_id(cls, v): @field_validator('alternative_identifiers') def pattern_alternative_identifiers(cls, v): - pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$") + pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,\(\)\=\#]*$") if isinstance(v,list): for element in v: if isinstance(v, str) and not pattern.match(element): @@ -13340,10 +13226,9 @@ class ChromatographicSeparationProcess(MaterialProcessing): linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'class_uri': 'nmdc:ChromatographicSeparationProcess', 'contributors': ['ORCID:0009-0001-1555-1601', 'ORCID:0000-0002-1368-8217'], 'from_schema': 'https://w3id.org/nmdc/nmdc', - 'slot_usage': {'has_input': {'any_of': [{'range': 'Biosample'}, - {'range': 'ProcessedSample'}], - 'name': 'has_input', + 'slot_usage': {'has_input': {'name': 'has_input', 'pattern': '^(nmdc):(bsm|procsm)-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})$', + 'range': 'Sample', 'structured_pattern': {'interpolated': True, 'syntax': '{id_nmdc_prefix}:(bsm|procsm)-{id_shoulder}-{id_blade}$'}}, 'has_output': {'name': 'has_output', @@ -13379,7 +13264,6 @@ class ChromatographicSeparationProcess(MaterialProcessing): 'domain_of': ['MaterialProcessing', 'DataGeneration']} }) has_input: Optional[List[str]] = Field(None, description="""An input to a process.""", json_schema_extra = { "linkml_meta": {'alias': 'has_input', 'aliases': ['input'], - 'any_of': [{'range': 'Biosample'}, {'range': 'ProcessedSample'}], 'domain_of': ['PlannedProcess'], 'structured_pattern': {'interpolated': True, 'syntax': '{id_nmdc_prefix}:(bsm|procsm)-{id_shoulder}-{id_blade}$'}} }) @@ -13432,12 +13316,11 @@ class ChromatographicSeparationProcess(MaterialProcessing): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -13489,7 +13372,7 @@ def pattern_id(cls, v): @field_validator('alternative_identifiers') def pattern_alternative_identifiers(cls, v): - pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$") + pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,\(\)\=\#]*$") if isinstance(v,list): for element in v: if isinstance(v, str) and not pattern.match(element): @@ -13541,7 +13424,6 @@ class DissolvingProcess(MaterialProcessing): 'domain_of': ['MaterialProcessing', 'DataGeneration']} }) has_input: Optional[List[str]] = Field(None, description="""An input to a process.""", json_schema_extra = { "linkml_meta": {'alias': 'has_input', 'aliases': ['input'], - 'any_of': [{'range': 'Biosample'}, {'range': 'ProcessedSample'}], 'domain_of': ['PlannedProcess'], 'structured_pattern': {'interpolated': True, 'syntax': '{id_nmdc_prefix}:(bsm|procsm)-{id_shoulder}-{id_blade}$'}} }) @@ -13595,12 +13477,11 @@ class DissolvingProcess(MaterialProcessing): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -13652,7 +13533,7 @@ def pattern_id(cls, v): @field_validator('alternative_identifiers') def pattern_alternative_identifiers(cls, v): - pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$") + pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,\(\)\=\#]*$") if isinstance(v,list): for element in v: if isinstance(v, str) and not pattern.match(element): @@ -13709,7 +13590,6 @@ class ChemicalConversionProcess(MaterialProcessing): 'domain_of': ['MaterialProcessing', 'DataGeneration']} }) has_input: Optional[List[str]] = Field(None, description="""An input to a process.""", json_schema_extra = { "linkml_meta": {'alias': 'has_input', 'aliases': ['input'], - 'any_of': [{'range': 'Biosample'}, {'range': 'ProcessedSample'}], 'domain_of': ['PlannedProcess'], 'structured_pattern': {'interpolated': True, 'syntax': '{id_nmdc_prefix}:(bsm|procsm)-{id_shoulder}-{id_blade}$'}} }) @@ -13763,12 +13643,11 @@ class ChemicalConversionProcess(MaterialProcessing): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -13820,7 +13699,7 @@ def pattern_id(cls, v): @field_validator('alternative_identifiers') def pattern_alternative_identifiers(cls, v): - pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$") + pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,\(\)\=\#]*$") if isinstance(v,list): for element in v: if isinstance(v, str) and not pattern.match(element): @@ -13846,12 +13725,11 @@ class Protocol(ConfiguredBaseModel): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -13892,12 +13770,11 @@ class CreditAssociation(ConfiguredBaseModel): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -13936,21 +13813,18 @@ class Doi(ConfiguredBaseModel): 'exact_mappings': ['OBI:0002110'], 'examples': [{'description': 'The DOI links to an electronic document.', 'value': 'doi:10.46936/10.25585/60000880'}], - 'in_subset': ['data_portal_subset'], 'narrow_mappings': ['edam.data:1188']} }) doi_provider: Optional[DoiProviderEnum] = Field(None, description="""The authority, or organization, the DOI is associated with.""", json_schema_extra = { "linkml_meta": {'alias': 'doi_provider', 'close_mappings': ['NCIT:C74932'], 'domain_of': ['Doi'], 'examples': [{'description': 'The corresponding DOI is associated with ' 'ESS-DIVE.', - 'value': 'ess_dive'}], - 'in_subset': ['data_portal_subset']} }) + 'value': 'ess_dive'}]} }) doi_category: DoiCategoryEnum = Field(..., description="""The resource type the corresponding doi resolves to.""", json_schema_extra = { "linkml_meta": {'alias': 'doi_category', 'domain_of': ['Doi'], 'examples': [{'description': 'The corresponding DOI is a dataset resource ' 'type.', - 'value': 'dataset_doi'}], - 'in_subset': ['data_portal_subset']} }) + 'value': 'dataset_doi'}]} }) type: Literal["https://w3id.org/nmdc/Doi","nmdc:Doi"] = Field("nmdc:Doi", description="""the class_uri of the class that has been instantiated""", json_schema_extra = { "linkml_meta": {'alias': 'type', 'designates_type': True, 'domain_of': ['EukEval', @@ -13959,12 +13833,11 @@ class Doi(ConfiguredBaseModel): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -14019,7 +13892,6 @@ class Study(NamedThing): 'NCIT:C41198', 'ISA:Investigation'], 'from_schema': 'https://w3id.org/nmdc/nmdc', - 'in_subset': ['sample subset'], 'notes': ['sample GOLD link https://bioregistry.io/gold:Gs0110115', 'sample insdc.srs link ' 'https://www.ebi.ac.uk/ena/browser/view/PRJEB45055 ?', @@ -14243,7 +14115,7 @@ class Study(NamedThing): 'are also associated with this ' 'submission or other names / identifiers ' 'for this study.'}}, - 'domain_of': ['ChemicalEntity', 'Study'], + 'domain_of': ['OntologyClass', 'Study'], 'exact_mappings': ['dcterms:alternative', 'skos:altLabel']} }) alternative_titles: Optional[List[str]] = Field(None, description="""A list of alternative titles for the entity. The distinction between title and alternative titles is application-specific.""", json_schema_extra = { "linkml_meta": {'alias': 'alternative_titles', 'domain_of': ['Study'], @@ -14305,8 +14177,7 @@ class Study(NamedThing): "{'doi': 'doi:10.1101/2022.12.12.520098', " "'doi_category': 'publication_doi'}, {'doi': " "'doi:10.48321/D1Z60Q', 'doi_category': " - "'data_management_plan_doi', 'doi_provider': 'gsc'}]"}], - 'in_subset': ['data_portal_subset']} }) + "'data_management_plan_doi', 'doi_provider': 'gsc'}]"}]} }) funding_sources: Optional[List[str]] = Field(None, description="""A list of organizations, along with the award numbers, that underwrite financial support for projects of a particular type. Typically, they process applications and award funds to the chosen qualified applicants.""", json_schema_extra = { "linkml_meta": {'alias': 'funding_sources', 'close_mappings': ['NCIT:C39409'], 'comments': ['Include only the name of the funding organization and the award ' @@ -14404,12 +14275,11 @@ class Study(NamedThing): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -14557,7 +14427,7 @@ def pattern_id(cls, v): @field_validator('alternative_identifiers') def pattern_alternative_identifiers(cls, v): - pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$") + pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,\(\)\=\#]*$") if isinstance(v,list): for element in v: if isinstance(v, str) and not pattern.match(element): @@ -14617,12 +14487,11 @@ class InformationObject(NamedThing): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -14650,7 +14519,7 @@ def pattern_id(cls, v): @field_validator('alternative_identifiers') def pattern_alternative_identifiers(cls, v): - pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$") + pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,\(\)\=\#]*$") if isinstance(v,list): for element in v: if isinstance(v, str) and not pattern.match(element): @@ -14697,12 +14566,11 @@ class Configuration(InformationObject): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -14730,7 +14598,7 @@ def pattern_id(cls, v): @field_validator('alternative_identifiers') def pattern_alternative_identifiers(cls, v): - pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$") + pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,\(\)\=\#]*$") if isinstance(v,list): for element in v: if isinstance(v, str) and not pattern.match(element): @@ -14799,12 +14667,11 @@ class MassSpectrometryConfiguration(Configuration): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -14832,7 +14699,7 @@ def pattern_id(cls, v): @field_validator('alternative_identifiers') def pattern_alternative_identifiers(cls, v): - pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$") + pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,\(\)\=\#]*$") if isinstance(v,list): for element in v: if isinstance(v, str) and not pattern.match(element): @@ -14905,12 +14772,11 @@ class ChromatographyConfiguration(Configuration): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -14938,7 +14804,7 @@ def pattern_id(cls, v): @field_validator('alternative_identifiers') def pattern_alternative_identifiers(cls, v): - pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$") + pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,\(\)\=\#]*$") if isinstance(v,list): for element in v: if isinstance(v, str) and not pattern.match(element): @@ -14991,12 +14857,11 @@ class Manifest(InformationObject): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -15024,7 +14889,7 @@ def pattern_id(cls, v): @field_validator('alternative_identifiers') def pattern_alternative_identifiers(cls, v): - pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$") + pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,\(\)\=\#]*$") if isinstance(v,list): for element in v: if isinstance(v, str) and not pattern.match(element): @@ -15064,7 +14929,10 @@ class CalibrationInformation(InformationObject): 'internal_calibration': {'name': 'internal_calibration', 'required': True}}}) - calibration_object: Optional[str] = Field(None, description="""the file containing calibration data object""", json_schema_extra = { "linkml_meta": {'alias': 'calibration_object', 'domain_of': ['CalibrationInformation']} }) + calibration_object: Optional[str] = Field(None, description="""the file containing calibration data object""", json_schema_extra = { "linkml_meta": {'alias': 'calibration_object', + 'domain_of': ['CalibrationInformation'], + 'structured_pattern': {'interpolated': True, + 'syntax': '{id_nmdc_prefix}:dobj-{id_shoulder}-{id_blade}$'}} }) internal_calibration: bool = Field(..., description="""whether internal calibration was used, if false, external calibration was used""", json_schema_extra = { "linkml_meta": {'alias': 'internal_calibration', 'domain_of': ['CalibrationInformation']} }) calibration_target: CalibrationTargetEnum = Field(..., description="""the target measurement of the calibration""", json_schema_extra = { "linkml_meta": {'alias': 'calibration_target', 'domain_of': ['CalibrationInformation']} }) calibration_standard: Optional[CalibrationStandardEnum] = Field(None, description="""the reference standard(s) used for calibration""", json_schema_extra = { "linkml_meta": {'alias': 'calibration_standard', 'domain_of': ['CalibrationInformation']} }) @@ -15096,12 +14964,11 @@ class CalibrationInformation(InformationObject): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -15115,6 +14982,18 @@ class CalibrationInformation(InformationObject): 'https://github.com/microbiomedata/nmdc-schema/issues/248'], 'slot_uri': 'rdf:type'} }) + @field_validator('calibration_object') + def pattern_calibration_object(cls, v): + pattern=re.compile(r"^(nmdc):dobj-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})$") + if isinstance(v,list): + for element in v: + if isinstance(v, str) and not pattern.match(element): + raise ValueError(f"Invalid calibration_object format: {element}") + elif isinstance(v,str): + if not pattern.match(v): + raise ValueError(f"Invalid calibration_object format: {v}") + return v + @field_validator('id') def pattern_id(cls, v): pattern=re.compile(r"^(nmdc):calib-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})$") @@ -15129,7 +15008,7 @@ def pattern_id(cls, v): @field_validator('alternative_identifiers') def pattern_alternative_identifiers(cls, v): - pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$") + pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,\(\)\=\#]*$") if isinstance(v,list): for element in v: if isinstance(v, str) and not pattern.match(element): @@ -15146,7 +15025,6 @@ class DataObject(InformationObject): """ linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'class_uri': 'nmdc:DataObject', 'from_schema': 'https://w3id.org/nmdc/nmdc', - 'in_subset': ['data object subset'], 'slot_usage': {'description': {'name': 'description', 'required': True}, 'id': {'name': 'id', 'pattern': '^(nmdc):dobj-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})$', @@ -15190,7 +15068,9 @@ class DataObject(InformationObject): 'DataObject could be part of a manifest for a single run of an ' 'instrument and a manifest for technical replicates of a single ' 'sample.'], - 'domain_of': ['DataObject']} }) + 'domain_of': ['DataObject'], + 'structured_pattern': {'interpolated': True, + 'syntax': '^{id_nmdc_prefix}:manif-{id_shoulder}-{id_blade}$'}} }) id: str = Field(..., description="""A unique identifier for a thing. Must be either a CURIE shorthand for a URI or a complete URI""", json_schema_extra = { "linkml_meta": {'alias': 'id', 'domain_of': ['NamedThing'], 'examples': [{'description': 'https://github.com/microbiomedata/nmdc-schema/pull/499#discussion_r1018499248', @@ -15219,12 +15099,11 @@ class DataObject(InformationObject): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -15262,6 +15141,18 @@ def pattern_was_generated_by(cls, v): raise ValueError(f"Invalid was_generated_by format: {v}") return v + @field_validator('in_manifest') + def pattern_in_manifest(cls, v): + pattern=re.compile(r"^^(nmdc):manif-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})$") + if isinstance(v,list): + for element in v: + if isinstance(v, str) and not pattern.match(element): + raise ValueError(f"Invalid in_manifest format: {element}") + elif isinstance(v,str): + if not pattern.match(v): + raise ValueError(f"Invalid in_manifest format: {v}") + return v + @field_validator('id') def pattern_id(cls, v): pattern=re.compile(r"^(nmdc):dobj-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})$") @@ -15276,7 +15167,7 @@ def pattern_id(cls, v): @field_validator('alternative_identifiers') def pattern_alternative_identifiers(cls, v): - pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$") + pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,\(\)\=\#]*$") if isinstance(v,list): for element in v: if isinstance(v, str) and not pattern.match(element): @@ -15306,16 +15197,14 @@ class DataGeneration(PlannedProcess): 'broad_mappings': ['OBI:0000070', 'ISA:Assay'], 'class_uri': 'nmdc:DataGeneration', 'from_schema': 'https://w3id.org/nmdc/nmdc', - 'in_subset': ['sample subset'], 'slot_usage': {'associated_studies': {'name': 'associated_studies', 'pattern': '^(nmdc):(sty)-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})$', 'range': 'Study', 'structured_pattern': {'interpolated': True, 'syntax': '{id_nmdc_prefix}:(sty)-{id_shoulder}-{id_blade}$'}}, - 'has_input': {'any_of': [{'range': 'Biosample'}, - {'range': 'ProcessedSample'}], - 'name': 'has_input', + 'has_input': {'name': 'has_input', 'pattern': '^(nmdc):(bsm|procsm)-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})$', + 'range': 'Sample', 'required': True, 'structured_pattern': {'interpolated': True, 'syntax': '{id_nmdc_prefix}:(bsm|procsm)-{id_shoulder}-{id_blade}$'}}, @@ -15341,7 +15230,6 @@ class DataGeneration(PlannedProcess): 'domain_of': ['Study', 'DataGeneration']} }) has_input: List[str] = Field(..., description="""An input to a process.""", json_schema_extra = { "linkml_meta": {'alias': 'has_input', 'aliases': ['input'], - 'any_of': [{'range': 'Biosample'}, {'range': 'ProcessedSample'}], 'domain_of': ['PlannedProcess'], 'structured_pattern': {'interpolated': True, 'syntax': '{id_nmdc_prefix}:(bsm|procsm)-{id_shoulder}-{id_blade}$'}} }) @@ -15393,12 +15281,11 @@ class DataGeneration(PlannedProcess): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -15462,7 +15349,7 @@ def pattern_id(cls, v): @field_validator('alternative_identifiers') def pattern_alternative_identifiers(cls, v): - pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$") + pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,\(\)\=\#]*$") if isinstance(v,list): for element in v: if isinstance(v, str) and not pattern.match(element): @@ -15508,7 +15395,7 @@ class NucleotideSequencing(DataGeneration): 'is_a': 'external_database_identifiers', 'mixins': ['insdc_identifiers']} }) ncbi_project_name: Optional[str] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'ncbi_project_name', 'domain_of': ['NucleotideSequencing']} }) - target_gene: Optional[TextValue] = Field(None, title="target gene", description="""Targeted gene or locus name for marker gene studies""", json_schema_extra = { "linkml_meta": {'alias': 'target_gene', + target_gene: Optional[TargetGeneEnum] = Field(None, title="target gene", description="""Targeted gene or locus name for marker gene studies""", json_schema_extra = { "linkml_meta": {'alias': 'target_gene', 'aliases': ['target gene'], 'annotations': {'expected_value': {'tag': 'expected_value', 'value': 'gene name'}}, @@ -15542,7 +15429,6 @@ class NucleotideSequencing(DataGeneration): 'domain_of': ['Study', 'DataGeneration']} }) has_input: List[str] = Field(..., description="""An input to a process.""", json_schema_extra = { "linkml_meta": {'alias': 'has_input', 'aliases': ['input'], - 'any_of': [{'range': 'Biosample'}, {'range': 'ProcessedSample'}], 'domain_of': ['PlannedProcess'], 'structured_pattern': {'interpolated': True, 'syntax': '{id_nmdc_prefix}:(bsm|procsm)-{id_shoulder}-{id_blade}$'}} }) @@ -15596,12 +15482,11 @@ class NucleotideSequencing(DataGeneration): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -15701,7 +15586,7 @@ def pattern_id(cls, v): @field_validator('alternative_identifiers') def pattern_alternative_identifiers(cls, v): - pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$") + pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,\(\)\=\#]*$") if isinstance(v,list): for element in v: if isinstance(v, str) and not pattern.match(element): @@ -15720,13 +15605,13 @@ class MassSpectrometry(DataGeneration): 'exact_mappings': ['CHMO:0000470'], 'from_schema': 'https://w3id.org/nmdc/nmdc', 'rules': [{'description': 'If eluent_introduction_category is ' - 'gas_chromatography, then has_calibration is ' + 'gas_chromatography, then generates_calibration is ' 'required.', - 'postconditions': {'slot_conditions': {'has_calibration': {'name': 'has_calibration', - 'required': True}}}, + 'postconditions': {'slot_conditions': {'generates_calibration': {'name': 'generates_calibration', + 'required': True}}}, 'preconditions': {'slot_conditions': {'eluent_introduction_category': {'equals_string': 'gas_chromatography', 'name': 'eluent_introduction_category'}}}, - 'title': 'has_calibration_required_if_gc'}, + 'title': 'generates_calibration_required_if_gc'}, {'description': 'If eluent_introduction_category is ' 'liquid_chromatography or gas_chromatography, then ' 'has_chromatography_configuration is required.', @@ -15736,11 +15621,7 @@ class MassSpectrometry(DataGeneration): {'equals_string': 'gas_chromatography'}], 'name': 'eluent_introduction_category'}}}, 'title': 'has_chromatography_configuration_required_if_lc_or_gc'}], - 'slot_usage': {'has_calibration': {'name': 'has_calibration', - 'pattern': '^(nmdc):calib-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})$', - 'structured_pattern': {'interpolated': True, - 'syntax': '{id_nmdc_prefix}:calib-{id_shoulder}-{id_blade}$'}}, - 'has_chromatography_configuration': {'name': 'has_chromatography_configuration', + 'slot_usage': {'has_chromatography_configuration': {'name': 'has_chromatography_configuration', 'pattern': '^(nmdc):chrcon-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})$', 'structured_pattern': {'interpolated': True, 'syntax': '{id_nmdc_prefix}:chrcon-{id_shoulder}-{id_blade}$'}}, @@ -15757,7 +15638,9 @@ class MassSpectrometry(DataGeneration): 'domain_of': ['MassSpectrometry'], 'examples': [{'value': 'liquid_chromatography'}, {'value': 'direct_infusion_syringe'}]} }) - has_calibration: Optional[str] = Field(None, description="""a calibration instance associated with a process""", json_schema_extra = { "linkml_meta": {'alias': 'has_calibration', + generates_calibration: Optional[str] = Field(None, description="""calibration information is generated a process""", json_schema_extra = { "linkml_meta": {'alias': 'generates_calibration', + 'comments': ['A gas chromatography mass spectromery run generates data to ' + 'calibrate the retention index'], 'domain_of': ['MassSpectrometry'], 'structured_pattern': {'interpolated': True, 'syntax': '{id_nmdc_prefix}:calib-{id_shoulder}-{id_blade}$'}} }) @@ -15785,7 +15668,6 @@ class MassSpectrometry(DataGeneration): 'domain_of': ['Study', 'DataGeneration']} }) has_input: List[str] = Field(..., description="""An input to a process.""", json_schema_extra = { "linkml_meta": {'alias': 'has_input', 'aliases': ['input'], - 'any_of': [{'range': 'Biosample'}, {'range': 'ProcessedSample'}], 'domain_of': ['PlannedProcess'], 'structured_pattern': {'interpolated': True, 'syntax': '{id_nmdc_prefix}:(bsm|procsm)-{id_shoulder}-{id_blade}$'}} }) @@ -15839,12 +15721,11 @@ class MassSpectrometry(DataGeneration): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -15858,16 +15739,16 @@ class MassSpectrometry(DataGeneration): 'https://github.com/microbiomedata/nmdc-schema/issues/248'], 'slot_uri': 'rdf:type'} }) - @field_validator('has_calibration') - def pattern_has_calibration(cls, v): + @field_validator('generates_calibration') + def pattern_generates_calibration(cls, v): pattern=re.compile(r"^(nmdc):calib-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})$") if isinstance(v,list): for element in v: if isinstance(v, str) and not pattern.match(element): - raise ValueError(f"Invalid has_calibration format: {element}") + raise ValueError(f"Invalid generates_calibration format: {element}") elif isinstance(v,str): if not pattern.match(v): - raise ValueError(f"Invalid has_calibration format: {v}") + raise ValueError(f"Invalid generates_calibration format: {v}") return v @field_validator('has_chromatography_configuration') @@ -15944,7 +15825,7 @@ def pattern_id(cls, v): @field_validator('alternative_identifiers') def pattern_alternative_identifiers(cls, v): - pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$") + pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,\(\)\=\#]*$") if isinstance(v,list): for element in v: if isinstance(v, str) and not pattern.match(element): @@ -15971,7 +15852,6 @@ class WorkflowExecution(PlannedProcess): 'WorkflowExecution is a distinct run with start and stop times, ' 'potentially with different inputs and outputs'], 'from_schema': 'https://w3id.org/nmdc/nmdc', - 'in_subset': ['workflow subset'], 'rules': [{'description': 'If qc_status has a value of pass, then the ' 'has_output slot is required.', 'postconditions': {'slot_conditions': {'has_output': {'name': 'has_output', @@ -16079,12 +15959,11 @@ class WorkflowExecution(PlannedProcess): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -16160,7 +16039,7 @@ def pattern_id(cls, v): @field_validator('alternative_identifiers') def pattern_alternative_identifiers(cls, v): - pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$") + pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,\(\)\=\#]*$") if isinstance(v,list): for element in v: if isinstance(v, str) and not pattern.match(element): @@ -16177,7 +16056,6 @@ class MetagenomeAnnotation(WorkflowExecution): """ linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'class_uri': 'nmdc:MetagenomeAnnotation', 'from_schema': 'https://w3id.org/nmdc/nmdc', - 'in_subset': ['workflow subset'], 'slot_usage': {'gold_analysis_project_identifiers': {'name': 'gold_analysis_project_identifiers', 'pattern': '^gold:Ga[0-9]+$', 'structured_pattern': {'interpolated': True, @@ -16291,12 +16169,11 @@ class MetagenomeAnnotation(WorkflowExecution): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -16408,7 +16285,7 @@ def pattern_id(cls, v): @field_validator('alternative_identifiers') def pattern_alternative_identifiers(cls, v): - pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$") + pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,\(\)\=\#]*$") if isinstance(v,list): for element in v: if isinstance(v, str) and not pattern.match(element): @@ -16427,7 +16304,6 @@ class MetagenomeAssembly(WorkflowExecution): 'comments': ['instances of this class may use a de novo assembly strategy in ' 'most or all cases relevant to NMDC'], 'from_schema': 'https://w3id.org/nmdc/nmdc', - 'in_subset': ['workflow subset'], 'slot_usage': {'id': {'name': 'id', 'pattern': '^(nmdc):wfmgas-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})(\\.[0-9]{1,})$', 'required': True, @@ -16603,12 +16479,11 @@ class MetagenomeAssembly(WorkflowExecution): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -16708,7 +16583,7 @@ def pattern_id(cls, v): @field_validator('alternative_identifiers') def pattern_alternative_identifiers(cls, v): - pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$") + pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,\(\)\=\#]*$") if isinstance(v,list): for element in v: if isinstance(v, str) and not pattern.match(element): @@ -16722,7 +16597,6 @@ def pattern_alternative_identifiers(cls, v): class MetatranscriptomeAssembly(WorkflowExecution): linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'class_uri': 'nmdc:MetatranscriptomeAssembly', 'from_schema': 'https://w3id.org/nmdc/nmdc', - 'in_subset': ['workflow subset'], 'slot_usage': {'id': {'name': 'id', 'pattern': '^(nmdc):wfmtas-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})(\\.[0-9]{1,})$', 'required': True, @@ -16898,12 +16772,11 @@ class MetatranscriptomeAssembly(WorkflowExecution): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -17003,7 +16876,7 @@ def pattern_id(cls, v): @field_validator('alternative_identifiers') def pattern_alternative_identifiers(cls, v): - pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$") + pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,\(\)\=\#]*$") if isinstance(v,list): for element in v: if isinstance(v, str) and not pattern.match(element): @@ -17017,7 +16890,6 @@ def pattern_alternative_identifiers(cls, v): class MetatranscriptomeAnnotation(WorkflowExecution): linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'class_uri': 'nmdc:MetatranscriptomeAnnotation', 'from_schema': 'https://w3id.org/nmdc/nmdc', - 'in_subset': ['workflow subset'], 'slot_usage': {'gold_analysis_project_identifiers': {'name': 'gold_analysis_project_identifiers', 'pattern': '^gold:Ga[0-9]+$', 'structured_pattern': {'interpolated': True, @@ -17139,12 +17011,11 @@ class MetatranscriptomeAnnotation(WorkflowExecution): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -17256,7 +17127,7 @@ def pattern_id(cls, v): @field_validator('alternative_identifiers') def pattern_alternative_identifiers(cls, v): - pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$") + pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,\(\)\=\#]*$") if isinstance(v,list): for element in v: if isinstance(v, str) and not pattern.match(element): @@ -17273,7 +17144,6 @@ class MetatranscriptomeExpressionAnalysis(WorkflowExecution): """ linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'class_uri': 'nmdc:MetatranscriptomeExpressionAnalysis', 'from_schema': 'https://w3id.org/nmdc/nmdc', - 'in_subset': ['workflow subset'], 'slot_usage': {'id': {'name': 'id', 'pattern': '^(nmdc):wfmtex-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})(\\.[0-9]{1,})$', 'required': True, @@ -17377,12 +17247,11 @@ class MetatranscriptomeExpressionAnalysis(WorkflowExecution): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -17482,7 +17351,7 @@ def pattern_id(cls, v): @field_validator('alternative_identifiers') def pattern_alternative_identifiers(cls, v): - pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$") + pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,\(\)\=\#]*$") if isinstance(v,list): for element in v: if isinstance(v, str) and not pattern.match(element): @@ -17499,7 +17368,6 @@ class MagsAnalysis(WorkflowExecution): """ linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'class_uri': 'nmdc:MagsAnalysis', 'from_schema': 'https://w3id.org/nmdc/nmdc', - 'in_subset': ['workflow subset'], 'slot_usage': {'id': {'name': 'id', 'pattern': '^(nmdc):wfmag-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})(\\.[0-9]{1,})$', 'required': True, @@ -17610,12 +17478,11 @@ class MagsAnalysis(WorkflowExecution): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -17715,7 +17582,7 @@ def pattern_id(cls, v): @field_validator('alternative_identifiers') def pattern_alternative_identifiers(cls, v): - pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$") + pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,\(\)\=\#]*$") if isinstance(v,list): for element in v: if isinstance(v, str) and not pattern.match(element): @@ -17732,7 +17599,6 @@ class MetagenomeSequencing(WorkflowExecution): """ linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'class_uri': 'nmdc:MetagenomeSequencing', 'from_schema': 'https://w3id.org/nmdc/nmdc', - 'in_subset': ['workflow subset'], 'slot_usage': {'has_input': {'name': 'has_input', 'pattern': '^(nmdc):(bsm|procsm)-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})$', 'structured_pattern': {'interpolated': True, @@ -17828,12 +17694,11 @@ class MetagenomeSequencing(WorkflowExecution): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -17921,7 +17786,7 @@ def pattern_id(cls, v): @field_validator('alternative_identifiers') def pattern_alternative_identifiers(cls, v): - pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$") + pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,\(\)\=\#]*$") if isinstance(v,list): for element in v: if isinstance(v, str) and not pattern.match(element): @@ -17938,7 +17803,6 @@ class ReadQcAnalysis(WorkflowExecution): """ linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'class_uri': 'nmdc:ReadQcAnalysis', 'from_schema': 'https://w3id.org/nmdc/nmdc', - 'in_subset': ['workflow subset'], 'slot_usage': {'id': {'name': 'id', 'pattern': '^(nmdc):wfrqc-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})(\\.[0-9]{1,})$', 'required': True, @@ -18044,12 +17908,11 @@ class ReadQcAnalysis(WorkflowExecution): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -18137,7 +18000,7 @@ def pattern_id(cls, v): @field_validator('alternative_identifiers') def pattern_alternative_identifiers(cls, v): - pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$") + pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,\(\)\=\#]*$") if isinstance(v,list): for element in v: if isinstance(v, str) and not pattern.match(element): @@ -18154,7 +18017,6 @@ class ReadBasedTaxonomyAnalysis(WorkflowExecution): """ linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'class_uri': 'nmdc:ReadBasedTaxonomyAnalysis', 'from_schema': 'https://w3id.org/nmdc/nmdc', - 'in_subset': ['workflow subset'], 'slot_usage': {'id': {'name': 'id', 'pattern': '^(nmdc):wfrbt-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})(\\.[0-9]{1,})$', 'required': True, @@ -18246,12 +18108,11 @@ class ReadBasedTaxonomyAnalysis(WorkflowExecution): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -18339,7 +18200,7 @@ def pattern_id(cls, v): @field_validator('alternative_identifiers') def pattern_alternative_identifiers(cls, v): - pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$") + pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,\(\)\=\#]*$") if isinstance(v,list): for element in v: if isinstance(v, str) and not pattern.match(element): @@ -18353,7 +18214,6 @@ def pattern_alternative_identifiers(cls, v): class MetabolomicsAnalysis(WorkflowExecution): linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'class_uri': 'nmdc:MetabolomicsAnalysis', 'from_schema': 'https://w3id.org/nmdc/nmdc', - 'in_subset': ['workflow subset'], 'slot_usage': {'id': {'name': 'id', 'pattern': '^(nmdc):wfmb-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})(\\.[0-9]{1,})$', 'required': True, @@ -18366,6 +18226,13 @@ class MetabolomicsAnalysis(WorkflowExecution): has_metabolite_identifications: Optional[List[MetaboliteIdentification]] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'has_metabolite_identifications', 'domain_of': ['MetabolomicsAnalysis']} }) + uses_calibration: Optional[str] = Field(None, description="""calibration information is used by a process""", json_schema_extra = { "linkml_meta": {'alias': 'uses_calibration', + 'comments': ['Retenion index calibration data generated by a gas ' + 'chromatography mass spectromery run is used when analyzing ' + 'metabolomics data'], + 'domain_of': ['MetabolomicsAnalysis', 'NomAnalysis'], + 'structured_pattern': {'interpolated': True, + 'syntax': '{id_nmdc_prefix}:calib-{id_shoulder}-{id_blade}$'}} }) ended_at_time: Optional[str] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'ended_at_time', 'domain_of': ['WorkflowExecution'], 'mappings': ['prov:endedAtTime'], @@ -18446,12 +18313,11 @@ class MetabolomicsAnalysis(WorkflowExecution): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -18465,6 +18331,18 @@ class MetabolomicsAnalysis(WorkflowExecution): 'https://github.com/microbiomedata/nmdc-schema/issues/248'], 'slot_uri': 'rdf:type'} }) + @field_validator('uses_calibration') + def pattern_uses_calibration(cls, v): + pattern=re.compile(r"^(nmdc):calib-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})$") + if isinstance(v,list): + for element in v: + if isinstance(v, str) and not pattern.match(element): + raise ValueError(f"Invalid uses_calibration format: {element}") + elif isinstance(v,str): + if not pattern.match(v): + raise ValueError(f"Invalid uses_calibration format: {v}") + return v + @field_validator('ended_at_time') def pattern_ended_at_time(cls, v): pattern=re.compile(r"^([\+-]?\d{4}(?!\d{2}\b))((-?)((0[1-9]|1[0-2])(\3([12]\d|0[1-9]|3[01]))?|W([0-4]\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\d|[12]\d{2}|3([0-5]\d|6[1-6])))([T\s]((([01]\d|2[0-3])((:?)[0-5]\d)?|24\:?00)([\.,]\d+(?!:))?)?(\17[0-5]\d([\.,]\d+)?)?([zZ]|([\+-])([01]\d|2[0-3]):?([0-5]\d)?)?)?)?$") @@ -18539,7 +18417,7 @@ def pattern_id(cls, v): @field_validator('alternative_identifiers') def pattern_alternative_identifiers(cls, v): - pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$") + pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,\(\)\=\#]*$") if isinstance(v,list): for element in v: if isinstance(v, str) and not pattern.match(element): @@ -18553,7 +18431,6 @@ def pattern_alternative_identifiers(cls, v): class MetaproteomicsAnalysis(WorkflowExecution): linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'class_uri': 'nmdc:MetaproteomicsAnalysis', 'from_schema': 'https://w3id.org/nmdc/nmdc', - 'in_subset': ['workflow subset'], 'slot_usage': {'id': {'name': 'id', 'pattern': '^(nmdc):wfmp-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})(\\.[0-9]{1,})$', 'required': True, @@ -18564,6 +18441,8 @@ class MetaproteomicsAnalysis(WorkflowExecution): 'structured_pattern': {'interpolated': True, 'syntax': '{id_nmdc_prefix}:(omprc|dgms)-{id_shoulder}-{id_blade}$'}}}}) + metaproteomics_analysis_category: MetaproteomicsAnalysisCategoryEnum = Field(..., description="""The category of metaproteomics analysis being performed.""", json_schema_extra = { "linkml_meta": {'alias': 'metaproteomics_analysis_category', + 'domain_of': ['MetaproteomicsAnalysis']} }) ended_at_time: Optional[str] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'ended_at_time', 'domain_of': ['WorkflowExecution'], 'mappings': ['prov:endedAtTime'], @@ -18644,12 +18523,11 @@ class MetaproteomicsAnalysis(WorkflowExecution): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -18737,7 +18615,7 @@ def pattern_id(cls, v): @field_validator('alternative_identifiers') def pattern_alternative_identifiers(cls, v): - pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$") + pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,\(\)\=\#]*$") if isinstance(v,list): for element in v: if isinstance(v, str) and not pattern.match(element): @@ -18751,7 +18629,6 @@ def pattern_alternative_identifiers(cls, v): class NomAnalysis(WorkflowExecution): linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'class_uri': 'nmdc:NomAnalysis', 'from_schema': 'https://w3id.org/nmdc/nmdc', - 'in_subset': ['workflow subset'], 'slot_usage': {'id': {'name': 'id', 'pattern': '^(nmdc):wfnom-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})(\\.[0-9]{1,})$', 'required': True, @@ -18762,6 +18639,13 @@ class NomAnalysis(WorkflowExecution): 'structured_pattern': {'interpolated': True, 'syntax': '{id_nmdc_prefix}:(omprc|dgms)-{id_shoulder}-{id_blade}$'}}}}) + uses_calibration: Optional[str] = Field(None, description="""calibration information is used by a process""", json_schema_extra = { "linkml_meta": {'alias': 'uses_calibration', + 'comments': ['Retenion index calibration data generated by a gas ' + 'chromatography mass spectromery run is used when analyzing ' + 'metabolomics data'], + 'domain_of': ['MetabolomicsAnalysis', 'NomAnalysis'], + 'structured_pattern': {'interpolated': True, + 'syntax': '{id_nmdc_prefix}:calib-{id_shoulder}-{id_blade}$'}} }) ended_at_time: Optional[str] = Field(None, json_schema_extra = { "linkml_meta": {'alias': 'ended_at_time', 'domain_of': ['WorkflowExecution'], 'mappings': ['prov:endedAtTime'], @@ -18842,12 +18726,11 @@ class NomAnalysis(WorkflowExecution): 'PortionOfSubstance', 'MagBin', 'MetaboliteIdentification', - 'PeptideQuantification', - 'ProteinQuantification', 'GenomeFeature', 'FunctionalAnnotation', 'AttributeValue', 'NamedThing', + 'OntologyRelation', 'FailureCategorization', 'Protocol', 'CreditAssociation', @@ -18861,6 +18744,18 @@ class NomAnalysis(WorkflowExecution): 'https://github.com/microbiomedata/nmdc-schema/issues/248'], 'slot_uri': 'rdf:type'} }) + @field_validator('uses_calibration') + def pattern_uses_calibration(cls, v): + pattern=re.compile(r"^(nmdc):calib-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})$") + if isinstance(v,list): + for element in v: + if isinstance(v, str) and not pattern.match(element): + raise ValueError(f"Invalid uses_calibration format: {element}") + elif isinstance(v,str): + if not pattern.match(v): + raise ValueError(f"Invalid uses_calibration format: {v}") + return v + @field_validator('ended_at_time') def pattern_ended_at_time(cls, v): pattern=re.compile(r"^([\+-]?\d{4}(?!\d{2}\b))((-?)((0[1-9]|1[0-2])(\3([12]\d|0[1-9]|3[01]))?|W([0-4]\d|5[0-2])(-?[1-7])?|(00[1-9]|0[1-9]\d|[12]\d{2}|3([0-5]\d|6[1-6])))([T\s]((([01]\d|2[0-3])((:?)[0-5]\d)?|24\:?00)([\.,]\d+(?!:))?)?(\17[0-5]\d([\.,]\d+)?)?([zZ]|([\+-])([01]\d|2[0-3]):?([0-5]\d)?)?)?)?$") @@ -18935,7 +18830,7 @@ def pattern_id(cls, v): @field_validator('alternative_identifiers') def pattern_alternative_identifiers(cls, v): - pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$") + pattern=re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,\(\)\=\#]*$") if isinstance(v,list): for element in v: if isinstance(v, str) and not pattern.match(element): @@ -18955,8 +18850,6 @@ def pattern_alternative_identifiers(cls, v): PortionOfSubstance.model_rebuild() MagBin.model_rebuild() MetaboliteIdentification.model_rebuild() -PeptideQuantification.model_rebuild() -ProteinQuantification.model_rebuild() GenomeFeature.model_rebuild() FunctionalAnnotation.model_rebuild() AttributeValue.model_rebuild() @@ -18976,8 +18869,10 @@ def pattern_alternative_identifiers(cls, v): FunctionalAnnotationTerm.model_rebuild() Pathway.model_rebuild() OrthologyGroup.model_rebuild() +OntologyRelation.model_rebuild() FailureCategorization.model_rebuild() MaterialEntity.model_rebuild() +Sample.model_rebuild() Biosample.model_rebuild() ProcessedSample.model_rebuild() Site.model_rebuild() diff --git a/nmdc_schema/nmdc.py b/nmdc_schema/nmdc.py index dfe2b7e27f..821c0ed751 100644 --- a/nmdc_schema/nmdc.py +++ b/nmdc_schema/nmdc.py @@ -1,5 +1,5 @@ # Auto generated from nmdc.yaml by pythongen.py version: 0.0.1 -# Generation date: 2024-11-27T12:35:41 +# Generation date: 2024-12-07T20:12:39 # Schema: NMDC # # id: https://w3id.org/nmdc/nmdc @@ -14,20 +14,55 @@ import dataclasses import re -from jsonasobj2 import JsonObj, as_dict -from typing import Optional, List, Union, Dict, ClassVar, Any from dataclasses import dataclass -from datetime import date, datetime, time -from linkml_runtime.linkml_model.meta import EnumDefinition, PermissibleValue, PvFormulaOptions - -from linkml_runtime.utils.slot import Slot -from linkml_runtime.utils.metamodelcore import empty_list, empty_dict, bnode -from linkml_runtime.utils.yamlutils import YAMLRoot, extended_str, extended_float, extended_int +from datetime import ( + date, + datetime, + time +) +from typing import ( + Any, + ClassVar, + Dict, + List, + Optional, + Union +) + +from jsonasobj2 import ( + JsonObj, + as_dict +) +from linkml_runtime.linkml_model.meta import ( + EnumDefinition, + PermissibleValue, + PvFormulaOptions +) +from linkml_runtime.utils.curienamespace import CurieNamespace from linkml_runtime.utils.dataclass_extensions_376 import dataclasses_init_fn_with_kwargs -from linkml_runtime.utils.formatutils import camelcase, underscore, sfx from linkml_runtime.utils.enumerations import EnumDefinitionImpl -from rdflib import Namespace, URIRef -from linkml_runtime.utils.curienamespace import CurieNamespace +from linkml_runtime.utils.formatutils import ( + camelcase, + sfx, + underscore +) +from linkml_runtime.utils.metamodelcore import ( + bnode, + empty_dict, + empty_list +) +from linkml_runtime.utils.slot import Slot +from linkml_runtime.utils.yamlutils import ( + YAMLRoot, + extended_float, + extended_int, + extended_str +) +from rdflib import ( + Namespace, + URIRef +) + from linkml_runtime.linkml_model.types import Boolean, Decimal, Double, Float, Integer, String, Uriorcurie from linkml_runtime.utils.metamodelcore import Bool, Decimal, URIorCURIE @@ -139,7 +174,7 @@ class DecimalDegree(float): """ A decimal degree expresses latitude or longitude as decimal fractions. """ type_class_uri = XSD["decimal"] type_class_curie = "xsd:decimal" - type_name = "decimal degree" + type_name = "decimal_degree" type_model_uri = NMDC.DecimalDegree @@ -147,7 +182,7 @@ class LanguageCode(str): """ A language code conforming to ISO_639-1 """ type_class_uri = XSD["language"] type_class_curie = "xsd:language" - type_name = "language code" + type_name = "language_code" type_model_uri = NMDC.LanguageCode @@ -203,11 +238,15 @@ class MaterialEntityId(NamedThingId): pass -class BiosampleId(MaterialEntityId): +class SampleId(MaterialEntityId): + pass + + +class BiosampleId(SampleId): pass -class ProcessedSampleId(MaterialEntityId): +class ProcessedSampleId(SampleId): pass @@ -857,96 +896,6 @@ def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): super().__post_init__(**kwargs) -@dataclass(repr=False) -class PeptideQuantification(YAMLRoot): - """ - This is used to link a metaproteomics analysis workflow to a specific peptide sequence and related information - """ - _inherited_slots: ClassVar[List[str]] = [] - - class_class_uri: ClassVar[URIRef] = NMDC["PeptideQuantification"] - class_class_curie: ClassVar[str] = "nmdc:PeptideQuantification" - class_name: ClassVar[str] = "PeptideQuantification" - class_model_uri: ClassVar[URIRef] = NMDC.PeptideQuantification - - type: Union[str, URIorCURIE] = None - all_proteins: Optional[Union[Union[str, GeneProductId], List[Union[str, GeneProductId]]]] = empty_list() - best_protein: Optional[Union[str, GeneProductId]] = None - min_q_value: Optional[float] = None - peptide_sequence: Optional[str] = None - peptide_spectral_count: Optional[int] = None - peptide_sum_masic_abundance: Optional[int] = None - - def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): - if self._is_empty(self.type): - self.MissingRequiredField("type") - self.type = str(self.class_class_curie) - - if not isinstance(self.all_proteins, list): - self.all_proteins = [self.all_proteins] if self.all_proteins is not None else [] - self.all_proteins = [v if isinstance(v, GeneProductId) else GeneProductId(v) for v in self.all_proteins] - - if self.best_protein is not None and not isinstance(self.best_protein, GeneProductId): - self.best_protein = GeneProductId(self.best_protein) - - if self.min_q_value is not None and not isinstance(self.min_q_value, float): - self.min_q_value = float(self.min_q_value) - - if self.peptide_sequence is not None and not isinstance(self.peptide_sequence, str): - self.peptide_sequence = str(self.peptide_sequence) - - if self.peptide_spectral_count is not None and not isinstance(self.peptide_spectral_count, int): - self.peptide_spectral_count = int(self.peptide_spectral_count) - - if self.peptide_sum_masic_abundance is not None and not isinstance(self.peptide_sum_masic_abundance, int): - self.peptide_sum_masic_abundance = int(self.peptide_sum_masic_abundance) - - super().__post_init__(**kwargs) - - -@dataclass(repr=False) -class ProteinQuantification(YAMLRoot): - """ - This is used to link a metaproteomics analysis workflow to a specific protein - """ - _inherited_slots: ClassVar[List[str]] = [] - - class_class_uri: ClassVar[URIRef] = NMDC["ProteinQuantification"] - class_class_curie: ClassVar[str] = "nmdc:ProteinQuantification" - class_name: ClassVar[str] = "ProteinQuantification" - class_model_uri: ClassVar[URIRef] = NMDC.ProteinQuantification - - type: Union[str, URIorCURIE] = None - all_proteins: Optional[Union[Union[str, GeneProductId], List[Union[str, GeneProductId]]]] = empty_list() - best_protein: Optional[Union[str, GeneProductId]] = None - peptide_sequence_count: Optional[int] = None - protein_spectral_count: Optional[int] = None - protein_sum_masic_abundance: Optional[int] = None - - def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): - if self._is_empty(self.type): - self.MissingRequiredField("type") - self.type = str(self.class_class_curie) - - if not isinstance(self.all_proteins, list): - self.all_proteins = [self.all_proteins] if self.all_proteins is not None else [] - self.all_proteins = [v if isinstance(v, GeneProductId) else GeneProductId(v) for v in self.all_proteins] - - if self.best_protein is not None and not isinstance(self.best_protein, GeneProductId): - self.best_protein = GeneProductId(self.best_protein) - - if self.peptide_sequence_count is not None and not isinstance(self.peptide_sequence_count, int): - self.peptide_sequence_count = int(self.peptide_sequence_count) - - if self.protein_spectral_count is not None and not isinstance(self.protein_spectral_count, int): - self.protein_spectral_count = int(self.protein_spectral_count) - - if self.protein_sum_masic_abundance is not None and not isinstance(self.protein_sum_masic_abundance, int): - self.protein_sum_masic_abundance = int(self.protein_sum_masic_abundance) - - super().__post_init__(**kwargs) - - @dataclass(repr=False) class AttributeValue(YAMLRoot): """ @@ -1629,7 +1578,30 @@ def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): @dataclass(repr=False) -class Biosample(MaterialEntity): +class Sample(MaterialEntity): + """ + A sample is a material entity that can be characterized by an experiment. + """ + _inherited_slots: ClassVar[List[str]] = [] + + class_class_uri: ClassVar[URIRef] = NMDC["Sample"] + class_class_curie: ClassVar[str] = "nmdc:Sample" + class_name: ClassVar[str] = "Sample" + class_model_uri: ClassVar[URIRef] = NMDC.Sample + + id: Union[str, SampleId] = None + type: Union[str, URIorCURIE] = None + + def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): + + super().__post_init__(**kwargs) + if self._is_empty(self.type): + self.MissingRequiredField("type") + self.type = str(self.class_class_curie) + + +@dataclass(repr=False) +class Biosample(Sample): """ Biological source material which can be characterized by an experiment. """ @@ -4157,7 +4129,7 @@ def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): @dataclass(repr=False) -class ProcessedSample(MaterialEntity): +class ProcessedSample(Sample): _inherited_slots: ClassVar[List[str]] = [] class_class_uri: ClassVar[URIRef] = NMDC["ProcessedSample"] @@ -4524,7 +4496,7 @@ class MaterialProcessing(PlannedProcess): id: Union[str, MaterialProcessingId] = None type: Union[str, URIorCURIE] = None instrument_used: Optional[Union[Union[str, InstrumentId], List[Union[str, InstrumentId]]]] = empty_list() - has_input: Optional[Union[Union[str, NamedThingId], List[Union[str, NamedThingId]]]] = empty_list() + has_input: Optional[Union[Union[str, SampleId], List[Union[str, SampleId]]]] = empty_list() has_output: Optional[Union[Union[str, ProcessedSampleId], List[Union[str, ProcessedSampleId]]]] = empty_list() def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): @@ -4534,7 +4506,7 @@ def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): if not isinstance(self.has_input, list): self.has_input = [self.has_input] if self.has_input is not None else [] - self.has_input = [v if isinstance(v, NamedThingId) else NamedThingId(v) for v in self.has_input] + self.has_input = [v if isinstance(v, SampleId) else SampleId(v) for v in self.has_input] if not isinstance(self.has_output, list): self.has_output = [self.has_output] if self.has_output is not None else [] @@ -4601,7 +4573,7 @@ class Extraction(MaterialProcessing): id: Union[str, ExtractionId] = None type: Union[str, URIorCURIE] = None - has_input: Union[Union[str, NamedThingId], List[Union[str, NamedThingId]]] = None + has_input: Union[Union[str, SampleId], List[Union[str, SampleId]]] = None has_output: Union[Union[str, NamedThingId], List[Union[str, NamedThingId]]] = None substances_used: Optional[Union[Union[dict, PortionOfSubstance], List[Union[dict, PortionOfSubstance]]]] = empty_list() extraction_targets: Optional[Union[Union[str, "ExtractionTargetEnum"], List[Union[str, "ExtractionTargetEnum"]]]] = empty_list() @@ -4618,7 +4590,7 @@ def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): self.MissingRequiredField("has_input") if not isinstance(self.has_input, list): self.has_input = [self.has_input] if self.has_input is not None else [] - self.has_input = [v if isinstance(v, NamedThingId) else NamedThingId(v) for v in self.has_input] + self.has_input = [v if isinstance(v, SampleId) else SampleId(v) for v in self.has_input] if self._is_empty(self.has_output): self.MissingRequiredField("has_output") @@ -4736,7 +4708,7 @@ class SubSamplingProcess(MaterialProcessing): volume: Optional[Union[dict, QuantityValue]] = None mass: Optional[Union[dict, QuantityValue]] = None sampled_portion: Optional[Union[Union[str, "SamplePortionEnum"], List[Union[str, "SamplePortionEnum"]]]] = empty_list() - has_input: Optional[Union[Union[str, NamedThingId], List[Union[str, NamedThingId]]]] = empty_list() + has_input: Optional[Union[Union[str, SampleId], List[Union[str, SampleId]]]] = empty_list() has_output: Optional[Union[Union[str, ProcessedSampleId], List[Union[str, ProcessedSampleId]]]] = empty_list() def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): @@ -4766,7 +4738,7 @@ def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): if not isinstance(self.has_input, list): self.has_input = [self.has_input] if self.has_input is not None else [] - self.has_input = [v if isinstance(v, NamedThingId) else NamedThingId(v) for v in self.has_input] + self.has_input = [v if isinstance(v, SampleId) else SampleId(v) for v in self.has_input] if not isinstance(self.has_output, list): self.has_output = [self.has_output] if self.has_output is not None else [] @@ -4793,7 +4765,7 @@ class MixingProcess(MaterialProcessing): id: Union[str, MixingProcessId] = None type: Union[str, URIorCURIE] = None duration: Optional[Union[dict, QuantityValue]] = None - has_input: Optional[Union[Union[str, NamedThingId], List[Union[str, NamedThingId]]]] = empty_list() + has_input: Optional[Union[Union[str, SampleId], List[Union[str, SampleId]]]] = empty_list() has_output: Optional[Union[Union[str, ProcessedSampleId], List[Union[str, ProcessedSampleId]]]] = empty_list() def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): @@ -4807,7 +4779,7 @@ def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): if not isinstance(self.has_input, list): self.has_input = [self.has_input] if self.has_input is not None else [] - self.has_input = [v if isinstance(v, NamedThingId) else NamedThingId(v) for v in self.has_input] + self.has_input = [v if isinstance(v, SampleId) else SampleId(v) for v in self.has_input] if not isinstance(self.has_output, list): self.has_output = [self.has_output] if self.has_output is not None else [] @@ -4842,7 +4814,7 @@ class FiltrationProcess(MaterialProcessing): is_pressurized: Optional[Union[bool, Bool]] = None separation_method: Optional[Union[str, "SeparationMethodEnum"]] = None volume: Optional[Union[dict, QuantityValue]] = None - has_input: Optional[Union[Union[str, NamedThingId], List[Union[str, NamedThingId]]]] = empty_list() + has_input: Optional[Union[Union[str, SampleId], List[Union[str, SampleId]]]] = empty_list() has_output: Optional[Union[Union[str, ProcessedSampleId], List[Union[str, ProcessedSampleId]]]] = empty_list() def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): @@ -4878,7 +4850,7 @@ def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): if not isinstance(self.has_input, list): self.has_input = [self.has_input] if self.has_input is not None else [] - self.has_input = [v if isinstance(v, NamedThingId) else NamedThingId(v) for v in self.has_input] + self.has_input = [v if isinstance(v, SampleId) else SampleId(v) for v in self.has_input] if not isinstance(self.has_output, list): self.has_output = [self.has_output] if self.has_output is not None else [] @@ -4908,7 +4880,7 @@ class ChromatographicSeparationProcess(MaterialProcessing): ordered_mobile_phases: Optional[Union[Union[dict, MobilePhaseSegment], List[Union[dict, MobilePhaseSegment]]]] = empty_list() stationary_phase: Optional[Union[str, "StationaryPhaseEnum"]] = None temperature: Optional[Union[dict, QuantityValue]] = None - has_input: Optional[Union[Union[str, NamedThingId], List[Union[str, NamedThingId]]]] = empty_list() + has_input: Optional[Union[Union[str, SampleId], List[Union[str, SampleId]]]] = empty_list() has_output: Optional[Union[Union[str, ProcessedSampleId], List[Union[str, ProcessedSampleId]]]] = empty_list() def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): @@ -4932,7 +4904,7 @@ def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): if not isinstance(self.has_input, list): self.has_input = [self.has_input] if self.has_input is not None else [] - self.has_input = [v if isinstance(v, NamedThingId) else NamedThingId(v) for v in self.has_input] + self.has_input = [v if isinstance(v, SampleId) else SampleId(v) for v in self.has_input] if not isinstance(self.has_output, list): self.has_output = [self.has_output] if self.has_output is not None else [] @@ -5656,7 +5628,7 @@ class DataGeneration(PlannedProcess): type: Union[str, URIorCURIE] = None analyte_category: Union[str, "AnalyteCategoryEnum"] = None associated_studies: Union[Union[str, StudyId], List[Union[str, StudyId]]] = None - has_input: Union[Union[str, NamedThingId], List[Union[str, NamedThingId]]] = None + has_input: Union[Union[str, SampleId], List[Union[str, SampleId]]] = None add_date: Optional[str] = None instrument_used: Optional[Union[Union[str, InstrumentId], List[Union[str, InstrumentId]]]] = empty_list() mod_date: Optional[str] = None @@ -5679,7 +5651,7 @@ def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): self.MissingRequiredField("has_input") if not isinstance(self.has_input, list): self.has_input = [self.has_input] if self.has_input is not None else [] - self.has_input = [v if isinstance(v, NamedThingId) else NamedThingId(v) for v in self.has_input] + self.has_input = [v if isinstance(v, SampleId) else SampleId(v) for v in self.has_input] if self.add_date is not None and not isinstance(self.add_date, str): self.add_date = str(self.add_date) @@ -5720,7 +5692,7 @@ class NucleotideSequencing(DataGeneration): type: Union[str, URIorCURIE] = None analyte_category: Union[str, "AnalyteCategoryEnum"] = None associated_studies: Union[Union[str, StudyId], List[Union[str, StudyId]]] = None - has_input: Union[Union[str, NamedThingId], List[Union[str, NamedThingId]]] = None + has_input: Union[Union[str, SampleId], List[Union[str, SampleId]]] = None gold_sequencing_project_identifiers: Optional[Union[Union[str, ExternalIdentifier], List[Union[str, ExternalIdentifier]]]] = empty_list() insdc_bioproject_identifiers: Optional[Union[Union[str, ExternalIdentifier], List[Union[str, ExternalIdentifier]]]] = empty_list() insdc_experiment_identifiers: Optional[Union[Union[str, ExternalIdentifier], List[Union[str, ExternalIdentifier]]]] = empty_list() @@ -5778,9 +5750,9 @@ class MassSpectrometry(DataGeneration): type: Union[str, URIorCURIE] = None analyte_category: Union[str, "AnalyteCategoryEnum"] = None associated_studies: Union[Union[str, StudyId], List[Union[str, StudyId]]] = None - has_input: Union[Union[str, NamedThingId], List[Union[str, NamedThingId]]] = None + has_input: Union[Union[str, SampleId], List[Union[str, SampleId]]] = None eluent_introduction_category: Optional[Union[str, "EluentIntroductionCategoryEnum"]] = None - has_calibration: Optional[Union[str, CalibrationInformationId]] = None + generates_calibration: Optional[Union[str, CalibrationInformationId]] = None has_chromatography_configuration: Optional[Union[str, ChromatographyConfigurationId]] = None has_mass_spectrometry_configuration: Optional[Union[str, MassSpectrometryConfigurationId]] = None @@ -5793,8 +5765,8 @@ def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): if self.eluent_introduction_category is not None and not isinstance(self.eluent_introduction_category, EluentIntroductionCategoryEnum): self.eluent_introduction_category = EluentIntroductionCategoryEnum(self.eluent_introduction_category) - if self.has_calibration is not None and not isinstance(self.has_calibration, CalibrationInformationId): - self.has_calibration = CalibrationInformationId(self.has_calibration) + if self.generates_calibration is not None and not isinstance(self.generates_calibration, CalibrationInformationId): + self.generates_calibration = CalibrationInformationId(self.generates_calibration) if self.has_chromatography_configuration is not None and not isinstance(self.has_chromatography_configuration, ChromatographyConfigurationId): self.has_chromatography_configuration = ChromatographyConfigurationId(self.has_chromatography_configuration) @@ -6473,6 +6445,7 @@ class MetabolomicsAnalysis(WorkflowExecution): started_at_time: str = None has_input: Union[Union[str, NamedThingId], List[Union[str, NamedThingId]]] = None has_metabolite_identifications: Optional[Union[Union[dict, MetaboliteIdentification], List[Union[dict, MetaboliteIdentification]]]] = empty_list() + uses_calibration: Optional[Union[str, CalibrationInformationId]] = None was_informed_by: Optional[Union[str, DataGenerationId]] = None def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): @@ -6485,6 +6458,9 @@ def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): self.has_metabolite_identifications = [self.has_metabolite_identifications] if self.has_metabolite_identifications is not None else [] self.has_metabolite_identifications = [v if isinstance(v, MetaboliteIdentification) else MetaboliteIdentification(**as_dict(v)) for v in self.has_metabolite_identifications] + if self.uses_calibration is not None and not isinstance(self.uses_calibration, CalibrationInformationId): + self.uses_calibration = CalibrationInformationId(self.uses_calibration) + if self.was_informed_by is not None and not isinstance(self.was_informed_by, DataGenerationId): self.was_informed_by = DataGenerationId(self.was_informed_by) @@ -6509,6 +6485,7 @@ class MetaproteomicsAnalysis(WorkflowExecution): git_url: str = None started_at_time: str = None has_input: Union[Union[str, NamedThingId], List[Union[str, NamedThingId]]] = None + metaproteomics_analysis_category: Union[str, "MetaproteomicsAnalysisCategoryEnum"] = None was_informed_by: Optional[Union[str, DataGenerationId]] = None def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): @@ -6517,6 +6494,11 @@ def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): if not isinstance(self.id, MetaproteomicsAnalysisId): self.id = MetaproteomicsAnalysisId(self.id) + if self._is_empty(self.metaproteomics_analysis_category): + self.MissingRequiredField("metaproteomics_analysis_category") + if not isinstance(self.metaproteomics_analysis_category, MetaproteomicsAnalysisCategoryEnum): + self.metaproteomics_analysis_category = MetaproteomicsAnalysisCategoryEnum(self.metaproteomics_analysis_category) + if self.was_informed_by is not None and not isinstance(self.was_informed_by, DataGenerationId): self.was_informed_by = DataGenerationId(self.was_informed_by) @@ -6541,6 +6523,7 @@ class NomAnalysis(WorkflowExecution): git_url: str = None started_at_time: str = None has_input: Union[Union[str, NamedThingId], List[Union[str, NamedThingId]]] = None + uses_calibration: Optional[Union[str, CalibrationInformationId]] = None was_informed_by: Optional[Union[str, DataGenerationId]] = None def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): @@ -6549,6 +6532,9 @@ def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]): if not isinstance(self.id, NomAnalysisId): self.id = NomAnalysisId(self.id) + if self.uses_calibration is not None and not isinstance(self.uses_calibration, CalibrationInformationId): + self.uses_calibration = CalibrationInformationId(self.uses_calibration) + if self.was_informed_by is not None and not isinstance(self.was_informed_by, DataGenerationId): self.was_informed_by = DataGenerationId(self.was_informed_by) @@ -6866,6 +6852,22 @@ class SamplePortionEnum(EnumDefinitionImpl): name="SamplePortionEnum", ) +class MetaproteomicsAnalysisCategoryEnum(EnumDefinitionImpl): + """ + The category of metaproteomics analysis being performed. + """ + matched_metagenome = PermissibleValue( + text="matched_metagenome", + description="A metaproteomics analysis that is matched to a metagenome derived from the same biosample.") + in_silico_metagenome = PermissibleValue( + text="in_silico_metagenome", + description="A metaproteomics analysis that is matched to an in silico generated metagenome.") + + _defn = EnumDefinition( + name="MetaproteomicsAnalysisCategoryEnum", + description="The category of metaproteomics analysis being performed.", + ) + class BinQualityEnum(EnumDefinitionImpl): HQ = PermissibleValue( @@ -9862,6 +9864,10 @@ def _addvals(cls): PermissibleValue( text="23S_rRNA", description="the large subunit of the bacterial/archean ribosome")) + setattr(cls, "18S_rRNA", + PermissibleValue( + text="18S_rRNA", + description="the small subunit of the eukaryotic ribosome")) setattr(cls, "28S_rRNA", PermissibleValue( text="28S_rRNA", @@ -9871,8 +9877,11 @@ def _addvals(cls): class slots: pass -slots.has_calibration = Slot(uri=NMDC.has_calibration, name="has_calibration", curie=NMDC.curie('has_calibration'), - model_uri=NMDC.has_calibration, domain=None, range=Optional[Union[str, CalibrationInformationId]]) +slots.generates_calibration = Slot(uri=NMDC.generates_calibration, name="generates_calibration", curie=NMDC.curie('generates_calibration'), + model_uri=NMDC.generates_calibration, domain=None, range=Optional[Union[str, CalibrationInformationId]]) + +slots.uses_calibration = Slot(uri=NMDC.uses_calibration, name="uses_calibration", curie=NMDC.curie('uses_calibration'), + model_uri=NMDC.uses_calibration, domain=None, range=Optional[Union[str, CalibrationInformationId]]) slots.calibration_object = Slot(uri=NMDC.calibration_object, name="calibration_object", curie=NMDC.curie('calibration_object'), model_uri=NMDC.calibration_object, domain=None, range=Optional[Union[str, DataObjectId]]) @@ -9926,9 +9935,6 @@ class slots: slots.has_chromatography_configuration = Slot(uri=NMDC.has_chromatography_configuration, name="has_chromatography_configuration", curie=NMDC.curie('has_chromatography_configuration'), model_uri=NMDC.has_chromatography_configuration, domain=None, range=Optional[Union[str, ChromatographyConfigurationId]]) -slots.metagenome_annotation_id = Slot(uri=NMDC.metagenome_annotation_id, name="metagenome_annotation_id", curie=NMDC.curie('metagenome_annotation_id'), - model_uri=NMDC.metagenome_annotation_id, domain=None, range=Union[str, WorkflowExecutionId]) - slots.gene_function_id = Slot(uri=NMDC.gene_function_id, name="gene_function_id", curie=NMDC.curie('gene_function_id'), model_uri=NMDC.gene_function_id, domain=None, range=Union[str, URIorCURIE]) @@ -10140,12 +10146,12 @@ class slots: slots.start = Slot(uri=NMDC.start, name="start", curie=NMDC.curie('start'), model_uri=NMDC.start, domain=None, range=Optional[int]) +slots.metaproteomics_analysis_category = Slot(uri=NMDC.metaproteomics_analysis_category, name="metaproteomics_analysis_category", curie=NMDC.curie('metaproteomics_analysis_category'), + model_uri=NMDC.metaproteomics_analysis_category, domain=None, range=Union[str, "MetaproteomicsAnalysisCategoryEnum"]) + slots.metagenome_assembly_parameter = Slot(uri=NMDC.metagenome_assembly_parameter, name="metagenome_assembly_parameter", curie=NMDC.curie('metagenome_assembly_parameter'), model_uri=NMDC.metagenome_assembly_parameter, domain=None, range=Optional[str]) -slots.has_peptide_quantifications = Slot(uri=NMDC.has_peptide_quantifications, name="has_peptide_quantifications", curie=NMDC.curie('has_peptide_quantifications'), - model_uri=NMDC.has_peptide_quantifications, domain=None, range=Optional[Union[Union[dict, PeptideQuantification], List[Union[dict, PeptideQuantification]]]]) - slots.asm_score = Slot(uri=NMDC.asm_score, name="asm_score", curie=NMDC.curie('asm_score'), model_uri=NMDC.asm_score, domain=None, range=Optional[float]) @@ -10407,24 +10413,6 @@ class slots: slots.metabolite_identified = Slot(uri=NMDC.metabolite_identified, name="metabolite_identified", curie=NMDC.curie('metabolite_identified'), model_uri=NMDC.metabolite_identified, domain=None, range=Optional[Union[str, ChemicalEntityId]]) -slots.all_proteins = Slot(uri=NMDC.all_proteins, name="all_proteins", curie=NMDC.curie('all_proteins'), - model_uri=NMDC.all_proteins, domain=None, range=Optional[Union[Union[str, GeneProductId], List[Union[str, GeneProductId]]]]) - -slots.best_protein = Slot(uri=NMDC.best_protein, name="best_protein", curie=NMDC.curie('best_protein'), - model_uri=NMDC.best_protein, domain=None, range=Optional[Union[str, GeneProductId]]) - -slots.min_q_value = Slot(uri=NMDC.min_q_value, name="min_q_value", curie=NMDC.curie('min_q_value'), - model_uri=NMDC.min_q_value, domain=None, range=Optional[float]) - -slots.peptide_sequence = Slot(uri=NMDC.peptide_sequence, name="peptide_sequence", curie=NMDC.curie('peptide_sequence'), - model_uri=NMDC.peptide_sequence, domain=None, range=Optional[str]) - -slots.peptide_spectral_count = Slot(uri=NMDC.peptide_spectral_count, name="peptide_spectral_count", curie=NMDC.curie('peptide_spectral_count'), - model_uri=NMDC.peptide_spectral_count, domain=None, range=Optional[int]) - -slots.peptide_sum_masic_abundance = Slot(uri=NMDC.peptide_sum_masic_abundance, name="peptide_sum_masic_abundance", curie=NMDC.curie('peptide_sum_masic_abundance'), - model_uri=NMDC.peptide_sum_masic_abundance, domain=None, range=Optional[int]) - slots.chemical_formula = Slot(uri=NMDC.chemical_formula, name="chemical_formula", curie=NMDC.curie('chemical_formula'), model_uri=NMDC.chemical_formula, domain=None, range=Optional[str]) @@ -10434,15 +10422,6 @@ class slots: slots.inchi = Slot(uri=NMDC.inchi, name="inchi", curie=NMDC.curie('inchi'), model_uri=NMDC.inchi, domain=None, range=Optional[str]) -slots.peptide_sequence_count = Slot(uri=NMDC.peptide_sequence_count, name="peptide_sequence_count", curie=NMDC.curie('peptide_sequence_count'), - model_uri=NMDC.peptide_sequence_count, domain=None, range=Optional[int]) - -slots.protein_spectral_count = Slot(uri=NMDC.protein_spectral_count, name="protein_spectral_count", curie=NMDC.curie('protein_spectral_count'), - model_uri=NMDC.protein_spectral_count, domain=None, range=Optional[int]) - -slots.protein_sum_masic_abundance = Slot(uri=NMDC.protein_sum_masic_abundance, name="protein_sum_masic_abundance", curie=NMDC.curie('protein_sum_masic_abundance'), - model_uri=NMDC.protein_sum_masic_abundance, domain=None, range=Optional[int]) - slots.smiles = Slot(uri=NMDC.smiles, name="smiles", curie=NMDC.curie('smiles'), model_uri=NMDC.smiles, domain=None, range=Optional[Union[str, List[str]]]) @@ -12559,9 +12538,6 @@ class slots: model_uri=NMDC.MassSpectrometry_id, domain=MassSpectrometry, range=Union[str, MassSpectrometryId], pattern=re.compile(r'^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$')) -slots.MassSpectrometry_has_calibration = Slot(uri=NMDC.has_calibration, name="MassSpectrometry_has_calibration", curie=NMDC.curie('has_calibration'), - model_uri=NMDC.MassSpectrometry_has_calibration, domain=MassSpectrometry, range=Optional[Union[str, CalibrationInformationId]]) - slots.MassSpectrometry_has_chromatography_configuration = Slot(uri=NMDC.has_chromatography_configuration, name="MassSpectrometry_has_chromatography_configuration", curie=NMDC.curie('has_chromatography_configuration'), model_uri=NMDC.MassSpectrometry_has_chromatography_configuration, domain=MassSpectrometry, range=Optional[Union[str, ChromatographyConfigurationId]]) @@ -12619,7 +12595,7 @@ class slots: pattern=re.compile(r'^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$')) slots.Extraction_has_input = Slot(uri=NMDC['basic_classes/has_input'], name="Extraction_has_input", curie=NMDC.curie('basic_classes/has_input'), - model_uri=NMDC.Extraction_has_input, domain=Extraction, range=Union[Union[str, NamedThingId], List[Union[str, NamedThingId]]]) + model_uri=NMDC.Extraction_has_input, domain=Extraction, range=Union[Union[str, SampleId], List[Union[str, SampleId]]]) slots.Extraction_has_output = Slot(uri=NMDC['basic_classes/has_output'], name="Extraction_has_output", curie=NMDC.curie('basic_classes/has_output'), model_uri=NMDC.Extraction_has_output, domain=Extraction, range=Union[Union[str, NamedThingId], List[Union[str, NamedThingId]]]) @@ -12678,7 +12654,7 @@ class slots: model_uri=NMDC.SubSamplingProcess_mass, domain=SubSamplingProcess, range=Optional[Union[dict, QuantityValue]]) slots.SubSamplingProcess_has_input = Slot(uri=NMDC['basic_classes/has_input'], name="SubSamplingProcess_has_input", curie=NMDC.curie('basic_classes/has_input'), - model_uri=NMDC.SubSamplingProcess_has_input, domain=SubSamplingProcess, range=Optional[Union[Union[str, NamedThingId], List[Union[str, NamedThingId]]]]) + model_uri=NMDC.SubSamplingProcess_has_input, domain=SubSamplingProcess, range=Optional[Union[Union[str, SampleId], List[Union[str, SampleId]]]]) slots.SubSamplingProcess_has_output = Slot(uri=NMDC['basic_classes/has_output'], name="SubSamplingProcess_has_output", curie=NMDC.curie('basic_classes/has_output'), model_uri=NMDC.SubSamplingProcess_has_output, domain=SubSamplingProcess, range=Optional[Union[Union[str, ProcessedSampleId], List[Union[str, ProcessedSampleId]]]]) @@ -12688,7 +12664,7 @@ class slots: pattern=re.compile(r'^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$')) slots.MixingProcess_has_input = Slot(uri=NMDC['basic_classes/has_input'], name="MixingProcess_has_input", curie=NMDC.curie('basic_classes/has_input'), - model_uri=NMDC.MixingProcess_has_input, domain=MixingProcess, range=Optional[Union[Union[str, NamedThingId], List[Union[str, NamedThingId]]]]) + model_uri=NMDC.MixingProcess_has_input, domain=MixingProcess, range=Optional[Union[Union[str, SampleId], List[Union[str, SampleId]]]]) slots.MixingProcess_has_output = Slot(uri=NMDC['basic_classes/has_output'], name="MixingProcess_has_output", curie=NMDC.curie('basic_classes/has_output'), model_uri=NMDC.MixingProcess_has_output, domain=MixingProcess, range=Optional[Union[Union[str, ProcessedSampleId], List[Union[str, ProcessedSampleId]]]]) @@ -12701,7 +12677,7 @@ class slots: model_uri=NMDC.FiltrationProcess_volume, domain=FiltrationProcess, range=Optional[Union[dict, QuantityValue]]) slots.FiltrationProcess_has_input = Slot(uri=NMDC['basic_classes/has_input'], name="FiltrationProcess_has_input", curie=NMDC.curie('basic_classes/has_input'), - model_uri=NMDC.FiltrationProcess_has_input, domain=FiltrationProcess, range=Optional[Union[Union[str, NamedThingId], List[Union[str, NamedThingId]]]]) + model_uri=NMDC.FiltrationProcess_has_input, domain=FiltrationProcess, range=Optional[Union[Union[str, SampleId], List[Union[str, SampleId]]]]) slots.FiltrationProcess_has_output = Slot(uri=NMDC['basic_classes/has_output'], name="FiltrationProcess_has_output", curie=NMDC.curie('basic_classes/has_output'), model_uri=NMDC.FiltrationProcess_has_output, domain=FiltrationProcess, range=Optional[Union[Union[str, ProcessedSampleId], List[Union[str, ProcessedSampleId]]]]) @@ -12724,7 +12700,7 @@ class slots: pattern=re.compile(r'^[a-zA-Z0-9][a-zA-Z0-9_\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\-\/\.,]*$')) slots.ChromatographicSeparationProcess_has_input = Slot(uri=NMDC['basic_classes/has_input'], name="ChromatographicSeparationProcess_has_input", curie=NMDC.curie('basic_classes/has_input'), - model_uri=NMDC.ChromatographicSeparationProcess_has_input, domain=ChromatographicSeparationProcess, range=Optional[Union[Union[str, NamedThingId], List[Union[str, NamedThingId]]]]) + model_uri=NMDC.ChromatographicSeparationProcess_has_input, domain=ChromatographicSeparationProcess, range=Optional[Union[Union[str, SampleId], List[Union[str, SampleId]]]]) slots.ChromatographicSeparationProcess_has_output = Slot(uri=NMDC['basic_classes/has_output'], name="ChromatographicSeparationProcess_has_output", curie=NMDC.curie('basic_classes/has_output'), model_uri=NMDC.ChromatographicSeparationProcess_has_output, domain=ChromatographicSeparationProcess, range=Optional[Union[Union[str, ProcessedSampleId], List[Union[str, ProcessedSampleId]]]]) @@ -12993,7 +12969,7 @@ class slots: model_uri=NMDC.Biosample_source_mat_id, domain=Biosample, range=Optional[Union[dict, TextValue]]) slots.MaterialProcessing_has_input = Slot(uri=NMDC['basic_classes/has_input'], name="MaterialProcessing_has_input", curie=NMDC.curie('basic_classes/has_input'), - model_uri=NMDC.MaterialProcessing_has_input, domain=MaterialProcessing, range=Optional[Union[Union[str, NamedThingId], List[Union[str, NamedThingId]]]]) + model_uri=NMDC.MaterialProcessing_has_input, domain=MaterialProcessing, range=Optional[Union[Union[str, SampleId], List[Union[str, SampleId]]]]) slots.MaterialProcessing_has_output = Slot(uri=NMDC['basic_classes/has_output'], name="MaterialProcessing_has_output", curie=NMDC.curie('basic_classes/has_output'), model_uri=NMDC.MaterialProcessing_has_output, domain=MaterialProcessing, range=Optional[Union[Union[str, ProcessedSampleId], List[Union[str, ProcessedSampleId]]]]) @@ -13098,7 +13074,7 @@ class slots: model_uri=NMDC.DataObject_was_generated_by, domain=DataObject, range=Optional[Union[str, WorkflowExecutionId]], mappings = [PROV["wasGeneratedBy"]]) slots.DataGeneration_has_input = Slot(uri=NMDC['basic_classes/has_input'], name="DataGeneration_has_input", curie=NMDC.curie('basic_classes/has_input'), - model_uri=NMDC.DataGeneration_has_input, domain=DataGeneration, range=Union[Union[str, NamedThingId], List[Union[str, NamedThingId]]]) + model_uri=NMDC.DataGeneration_has_input, domain=DataGeneration, range=Union[Union[str, SampleId], List[Union[str, SampleId]]]) slots.DataGeneration_associated_studies = Slot(uri=NMDC['basic_classes/associated_studies'], name="DataGeneration_associated_studies", curie=NMDC.curie('basic_classes/associated_studies'), model_uri=NMDC.DataGeneration_associated_studies, domain=DataGeneration, range=Union[Union[str, StudyId], List[Union[str, StudyId]]]) @@ -13123,4 +13099,4 @@ class slots: model_uri=NMDC.WorkflowExecution_execution_resource, domain=WorkflowExecution, range=Union[str, "ExecutionResourceEnum"]) slots.WorkflowExecution_was_informed_by = Slot(uri=NMDC['basic_classes/was_informed_by'], name="WorkflowExecution_was_informed_by", curie=NMDC.curie('basic_classes/was_informed_by'), - model_uri=NMDC.WorkflowExecution_was_informed_by, domain=WorkflowExecution, range=Union[str, DataGenerationId], mappings = [PROV["wasInformedBy"]]) + model_uri=NMDC.WorkflowExecution_was_informed_by, domain=WorkflowExecution, range=Union[str, DataGenerationId], mappings = [PROV["wasInformedBy"]]) \ No newline at end of file diff --git a/nmdc_schema/nmdc.schema.json b/nmdc_schema/nmdc.schema.json index 6a5ca3dccd..30a084df47 100644 --- a/nmdc_schema/nmdc.schema.json +++ b/nmdc_schema/nmdc.schema.json @@ -9,7 +9,8 @@ "metaproteomics", "metatranscriptomics", "natural organic matter", - "bulk chemistry" + "bulk chemistry", + "amplicon sequencing assay" ], "title": "AnalysisTypeEnum", "type": "string" @@ -22,7 +23,8 @@ "metaproteome", "metabolome", "lipidome", - "nom" + "nom", + "amplicon_sequencing_assay" ], "title": "AnalyteCategoryEnum", "type": "string" @@ -5782,14 +5784,6 @@ "has_input": { "description": "An input to a process.", "items": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "string" - } - ], "type": "string" }, "type": [ @@ -6067,14 +6061,6 @@ "has_input": { "description": "An input to a process.", "items": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "string" - } - ], "type": "string" }, "type": [ @@ -7062,14 +7048,6 @@ "has_input": { "description": "An input to a process.", "items": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "string" - } - ], "type": "string" }, "type": [ @@ -7633,14 +7611,6 @@ "has_input": { "description": "An input to a process.", "items": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "string" - } - ], "type": "string" }, "type": "array" @@ -8182,14 +8152,6 @@ "has_input": { "description": "An input to a process.", "items": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "string" - } - ], "type": "string" }, "type": [ @@ -8761,8 +8723,12 @@ "enum": [ "female", "hermaphrodite", + "non-binary", "male", - "neuter" + "transgender", + "transgender (female to male)", + "transgender (male to female)", + "undeclared" ], "title": "HostSexEnum", "type": "string" @@ -9034,14 +9000,6 @@ "has_input": { "description": "An input to a process.", "items": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "string" - } - ], "type": "string" }, "type": "array" @@ -9651,7 +9609,7 @@ "alternative_identifiers": { "description": "A list of alternative identifiers for the entity.", "items": { - "pattern": "^[a-zA-Z0-9][a-zA-Z0-9_\\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\\-\\/\\.,]*$", + "pattern": "^[a-zA-Z0-9][a-zA-Z0-9_\\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\\-\\/\\.,\\(\\)\\=\\#]*$", "type": "string" }, "type": [ @@ -9736,10 +9694,10 @@ }, "then": { "properties": { - "has_calibration": {} + "generates_calibration": {} }, "required": [ - "has_calibration" + "generates_calibration" ] } }, @@ -9820,8 +9778,8 @@ "null" ] }, - "has_calibration": { - "description": "a calibration instance associated with a process", + "generates_calibration": { + "description": "calibration information is generated a process", "type": [ "string", "null" @@ -9846,14 +9804,6 @@ "has_input": { "description": "An input to a process.", "items": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "string" - } - ], "type": "string" }, "type": "array" @@ -10306,6 +10256,13 @@ ], "type": "string" }, + "uses_calibration": { + "description": "calibration information is used by a process", + "type": [ + "string", + "null" + ] + }, "version": { "type": [ "string", @@ -11231,6 +11188,10 @@ "pattern": "^[a-zA-Z0-9][a-zA-Z0-9_\\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\\-\\/\\.,]*$", "type": "string" }, + "metaproteomics_analysis_category": { + "$ref": "#/$defs/MetaproteomicsAnalysisCategoryEnum", + "description": "The category of metaproteomics analysis being performed." + }, "name": { "description": "A human readable label for an entity", "type": [ @@ -11292,6 +11253,7 @@ } }, "required": [ + "metaproteomics_analysis_category", "execution_resource", "git_url", "started_at_time", @@ -11303,6 +11265,15 @@ "title": "MetaproteomicsAnalysis", "type": "object" }, + "MetaproteomicsAnalysisCategoryEnum": { + "description": "The category of metaproteomics analysis being performed.", + "enum": [ + "matched_metagenome", + "in_silico_metagenome" + ], + "title": "MetaproteomicsAnalysisCategoryEnum", + "type": "string" + }, "MetatranscriptomeAnnotation": { "additionalProperties": false, "allOf": [ @@ -12153,14 +12124,6 @@ "has_input": { "description": "An input to a process.", "items": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "string" - } - ], "type": "string" }, "type": [ @@ -12464,6 +12427,13 @@ ], "type": "string" }, + "uses_calibration": { + "description": "calibration information is used by a process", + "type": [ + "string", + "null" + ] + }, "version": { "type": [ "string", @@ -12556,14 +12526,6 @@ "has_input": { "description": "An input to a process.", "items": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "string" - } - ], "type": "string" }, "type": "array" @@ -12678,14 +12640,7 @@ ] }, "target_gene": { - "anyOf": [ - { - "$ref": "#/$defs/TextValue" - }, - { - "type": "null" - } - ], + "$ref": "#/$defs/TargetGeneEnum", "description": "Targeted gene or locus name for marker gene studies" }, "target_subfragment": { @@ -12730,7 +12685,7 @@ }, "OntologyClass": { "additionalProperties": false, - "description": "This class is used to represent ontology terms.", + "description": "A representation of class defined in an external ontology.", "properties": { "alternative_identifiers": { "description": "A list of alternative identifiers for the entity.", @@ -13003,68 +12958,6 @@ "title": "Pathway", "type": "object" }, - "PeptideQuantification": { - "additionalProperties": false, - "description": "This is used to link a metaproteomics analysis workflow to a specific peptide sequence and related information", - "properties": { - "all_proteins": { - "description": "the list of protein identifiers that are associated with the peptide sequence", - "items": { - "type": "string" - }, - "type": [ - "array", - "null" - ] - }, - "best_protein": { - "description": "the specific protein identifier most correctly associated with the peptide sequence", - "type": [ - "string", - "null" - ] - }, - "min_q_value": { - "description": "smallest Q-Value associated with the peptide sequence as provided by MSGFPlus tool", - "type": [ - "number", - "null" - ] - }, - "peptide_sequence": { - "type": [ - "string", - "null" - ] - }, - "peptide_spectral_count": { - "description": "sum of filter passing MS2 spectra associated with the peptide sequence within a given LC-MS/MS data file", - "type": [ - "integer", - "null" - ] - }, - "peptide_sum_masic_abundance": { - "description": "combined MS1 extracted ion chromatograms derived from MS2 spectra associated with the peptide sequence from a given LC-MS/MS data file using the MASIC tool", - "type": [ - "integer", - "null" - ] - }, - "type": { - "description": "the class_uri of the class that has been instantiated", - "enum": [ - "nmdc:PeptideQuantification" - ], - "type": "string" - } - }, - "required": [ - "type" - ], - "title": "PeptideQuantification", - "type": "object" - }, "PersonValue": { "additionalProperties": false, "description": "An attribute value representing a person", @@ -13231,14 +13124,6 @@ "has_input": { "description": "An input to a process.", "items": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "string" - } - ], "type": "string" }, "minItems": 2, @@ -13511,62 +13396,6 @@ "title": "ProfilePositionEnum", "type": "string" }, - "ProteinQuantification": { - "additionalProperties": false, - "description": "This is used to link a metaproteomics analysis workflow to a specific protein", - "properties": { - "all_proteins": { - "description": "the list of protein identifiers that are associated with the peptide sequence", - "items": { - "type": "string" - }, - "type": [ - "array", - "null" - ] - }, - "best_protein": { - "description": "the specific protein identifier most correctly associated with the peptide sequence", - "type": [ - "string", - "null" - ] - }, - "peptide_sequence_count": { - "description": "count of peptide sequences grouped to the best_protein", - "type": [ - "integer", - "null" - ] - }, - "protein_spectral_count": { - "description": "sum of filter passing MS2 spectra associated with the best protein within a given LC-MS/MS data file", - "type": [ - "integer", - "null" - ] - }, - "protein_sum_masic_abundance": { - "description": "combined MS1 extracted ion chromatograms derived from MS2 spectra associated with the best protein from a given LC-MS/MS data file using the MASIC tool", - "type": [ - "integer", - "null" - ] - }, - "type": { - "description": "the class_uri of the class that has been instantiated", - "enum": [ - "nmdc:ProteinQuantification" - ], - "type": "string" - } - }, - "required": [ - "type" - ], - "title": "ProteinQuantification", - "type": "object" - }, "Protocol": { "additionalProperties": false, "description": "", @@ -15202,14 +15031,6 @@ "has_input": { "description": "An input to a process.", "items": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "string" - } - ], "type": "string" }, "type": [ @@ -15401,6 +15222,17 @@ "title": "SurfMaterialEnum", "type": "string" }, + "TargetGeneEnum": { + "description": "", + "enum": [ + "16S_rRNA", + "23S_rRNA", + "18S_rRNA", + "28S_rRNA" + ], + "title": "TargetGeneEnum", + "type": "string" + }, "TextValue": { "additionalProperties": false, "description": "A basic string value", diff --git a/nmdc_schema/nmdc_materialized_patterns.schema.json b/nmdc_schema/nmdc_materialized_patterns.schema.json index 56b5e212c4..2a5e51a857 100644 --- a/nmdc_schema/nmdc_materialized_patterns.schema.json +++ b/nmdc_schema/nmdc_materialized_patterns.schema.json @@ -9,7 +9,8 @@ "metaproteomics", "metatranscriptomics", "natural organic matter", - "bulk chemistry" + "bulk chemistry", + "amplicon sequencing assay" ], "title": "AnalysisTypeEnum", "type": "string" @@ -22,7 +23,8 @@ "metaproteome", "metabolome", "lipidome", - "nom" + "nom", + "amplicon_sequencing_assay" ], "title": "AnalyteCategoryEnum", "type": "string" @@ -5579,6 +5581,7 @@ }, "calibration_object": { "description": "the file containing calibration data object", + "pattern": "^(nmdc):dobj-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})$", "type": [ "string", "null" @@ -5784,14 +5787,6 @@ "has_input": { "description": "An input to a process.", "items": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "string" - } - ], "pattern": "^(nmdc):(bsm|procsm)-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})$", "type": "string" }, @@ -6071,14 +6066,6 @@ "has_input": { "description": "An input to a process.", "items": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "string" - } - ], "pattern": "^(nmdc):(bsm|procsm)-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})$", "type": "string" }, @@ -6656,6 +6643,7 @@ "in_manifest": { "description": "one or more combinations of other DataObjects that can be analyzed together", "items": { + "pattern": "^^(nmdc):manif-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})$", "type": "string" }, "type": [ @@ -7071,14 +7059,6 @@ "has_input": { "description": "An input to a process.", "items": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "string" - } - ], "pattern": "^(nmdc):(bsm|procsm)-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})$", "type": "string" }, @@ -7644,14 +7624,6 @@ "has_input": { "description": "An input to a process.", "items": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "string" - } - ], "pattern": "^(nmdc):(bsm|procsm)-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})$", "type": "string" }, @@ -8196,14 +8168,6 @@ "has_input": { "description": "An input to a process.", "items": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "string" - } - ], "pattern": "^(nmdc):(bsm|procsm)-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})$", "type": "string" }, @@ -8779,8 +8743,12 @@ "enum": [ "female", "hermaphrodite", + "non-binary", "male", - "neuter" + "transgender", + "transgender (female to male)", + "transgender (male to female)", + "undeclared" ], "title": "HostSexEnum", "type": "string" @@ -9052,14 +9020,6 @@ "has_input": { "description": "An input to a process.", "items": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "string" - } - ], "pattern": "^(nmdc):(bsm|procsm)-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})$", "type": "string" }, @@ -9674,7 +9634,7 @@ "alternative_identifiers": { "description": "A list of alternative identifiers for the entity.", "items": { - "pattern": "^[a-zA-Z0-9][a-zA-Z0-9_\\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\\-\\/\\.,]*$", + "pattern": "^[a-zA-Z0-9][a-zA-Z0-9_\\.]+:[a-zA-Z0-9_][a-zA-Z0-9_\\-\\/\\.,\\(\\)\\=\\#]*$", "type": "string" }, "type": [ @@ -9759,10 +9719,10 @@ }, "then": { "properties": { - "has_calibration": {} + "generates_calibration": {} }, "required": [ - "has_calibration" + "generates_calibration" ] } }, @@ -9844,8 +9804,8 @@ "null" ] }, - "has_calibration": { - "description": "a calibration instance associated with a process", + "generates_calibration": { + "description": "calibration information is generated a process", "pattern": "^(nmdc):calib-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})$", "type": [ "string", @@ -9872,14 +9832,6 @@ "has_input": { "description": "An input to a process.", "items": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "string" - } - ], "pattern": "^(nmdc):(bsm|procsm)-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})$", "type": "string" }, @@ -10337,6 +10289,14 @@ ], "type": "string" }, + "uses_calibration": { + "description": "calibration information is used by a process", + "pattern": "^(nmdc):calib-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})$", + "type": [ + "string", + "null" + ] + }, "version": { "type": [ "string", @@ -11274,6 +11234,10 @@ "pattern": "^(nmdc):wfmp-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})(\\.[0-9]{1,})$", "type": "string" }, + "metaproteomics_analysis_category": { + "$ref": "#/$defs/MetaproteomicsAnalysisCategoryEnum", + "description": "The category of metaproteomics analysis being performed." + }, "name": { "description": "A human readable label for an entity", "type": [ @@ -11336,6 +11300,7 @@ } }, "required": [ + "metaproteomics_analysis_category", "execution_resource", "git_url", "started_at_time", @@ -11347,6 +11312,15 @@ "title": "MetaproteomicsAnalysis", "type": "object" }, + "MetaproteomicsAnalysisCategoryEnum": { + "description": "The category of metaproteomics analysis being performed.", + "enum": [ + "matched_metagenome", + "in_silico_metagenome" + ], + "title": "MetaproteomicsAnalysisCategoryEnum", + "type": "string" + }, "MetatranscriptomeAnnotation": { "additionalProperties": false, "allOf": [ @@ -12206,14 +12180,6 @@ "has_input": { "description": "An input to a process.", "items": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "string" - } - ], "pattern": "^(nmdc):(bsm|procsm)-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})$", "type": "string" }, @@ -12521,6 +12487,14 @@ ], "type": "string" }, + "uses_calibration": { + "description": "calibration information is used by a process", + "pattern": "^(nmdc):calib-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})$", + "type": [ + "string", + "null" + ] + }, "version": { "type": [ "string", @@ -12615,14 +12589,6 @@ "has_input": { "description": "An input to a process.", "items": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "string" - } - ], "pattern": "^(nmdc):(bsm|procsm)-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})$", "type": "string" }, @@ -12739,14 +12705,7 @@ ] }, "target_gene": { - "anyOf": [ - { - "$ref": "#/$defs/TextValue" - }, - { - "type": "null" - } - ], + "$ref": "#/$defs/TargetGeneEnum", "description": "Targeted gene or locus name for marker gene studies" }, "target_subfragment": { @@ -12791,7 +12750,7 @@ }, "OntologyClass": { "additionalProperties": false, - "description": "This class is used to represent ontology terms.", + "description": "A representation of class defined in an external ontology.", "properties": { "alternative_identifiers": { "description": "A list of alternative identifiers for the entity.", @@ -13064,68 +13023,6 @@ "title": "Pathway", "type": "object" }, - "PeptideQuantification": { - "additionalProperties": false, - "description": "This is used to link a metaproteomics analysis workflow to a specific peptide sequence and related information", - "properties": { - "all_proteins": { - "description": "the list of protein identifiers that are associated with the peptide sequence", - "items": { - "type": "string" - }, - "type": [ - "array", - "null" - ] - }, - "best_protein": { - "description": "the specific protein identifier most correctly associated with the peptide sequence", - "type": [ - "string", - "null" - ] - }, - "min_q_value": { - "description": "smallest Q-Value associated with the peptide sequence as provided by MSGFPlus tool", - "type": [ - "number", - "null" - ] - }, - "peptide_sequence": { - "type": [ - "string", - "null" - ] - }, - "peptide_spectral_count": { - "description": "sum of filter passing MS2 spectra associated with the peptide sequence within a given LC-MS/MS data file", - "type": [ - "integer", - "null" - ] - }, - "peptide_sum_masic_abundance": { - "description": "combined MS1 extracted ion chromatograms derived from MS2 spectra associated with the peptide sequence from a given LC-MS/MS data file using the MASIC tool", - "type": [ - "integer", - "null" - ] - }, - "type": { - "description": "the class_uri of the class that has been instantiated", - "enum": [ - "nmdc:PeptideQuantification" - ], - "type": "string" - } - }, - "required": [ - "type" - ], - "title": "PeptideQuantification", - "type": "object" - }, "PersonValue": { "additionalProperties": false, "description": "An attribute value representing a person", @@ -13292,14 +13189,6 @@ "has_input": { "description": "An input to a process.", "items": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "string" - } - ], "pattern": "^(nmdc):(bsm|procsm)-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})$", "type": "string" }, @@ -13574,62 +13463,6 @@ "title": "ProfilePositionEnum", "type": "string" }, - "ProteinQuantification": { - "additionalProperties": false, - "description": "This is used to link a metaproteomics analysis workflow to a specific protein", - "properties": { - "all_proteins": { - "description": "the list of protein identifiers that are associated with the peptide sequence", - "items": { - "type": "string" - }, - "type": [ - "array", - "null" - ] - }, - "best_protein": { - "description": "the specific protein identifier most correctly associated with the peptide sequence", - "type": [ - "string", - "null" - ] - }, - "peptide_sequence_count": { - "description": "count of peptide sequences grouped to the best_protein", - "type": [ - "integer", - "null" - ] - }, - "protein_spectral_count": { - "description": "sum of filter passing MS2 spectra associated with the best protein within a given LC-MS/MS data file", - "type": [ - "integer", - "null" - ] - }, - "protein_sum_masic_abundance": { - "description": "combined MS1 extracted ion chromatograms derived from MS2 spectra associated with the best protein from a given LC-MS/MS data file using the MASIC tool", - "type": [ - "integer", - "null" - ] - }, - "type": { - "description": "the class_uri of the class that has been instantiated", - "enum": [ - "nmdc:ProteinQuantification" - ], - "type": "string" - } - }, - "required": [ - "type" - ], - "title": "ProteinQuantification", - "type": "object" - }, "Protocol": { "additionalProperties": false, "description": "", @@ -15277,14 +15110,6 @@ "has_input": { "description": "An input to a process.", "items": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "string" - } - ], "pattern": "^(nmdc):(bsm|procsm)-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})$", "type": "string" }, @@ -15478,6 +15303,17 @@ "title": "SurfMaterialEnum", "type": "string" }, + "TargetGeneEnum": { + "description": "", + "enum": [ + "16S_rRNA", + "23S_rRNA", + "18S_rRNA", + "28S_rRNA" + ], + "title": "TargetGeneEnum", + "type": "string" + }, "TextValue": { "additionalProperties": false, "description": "A basic string value", diff --git a/nmdc_schema/nmdc_materialized_patterns.yaml b/nmdc_schema/nmdc_materialized_patterns.yaml index eb4663e510..f9f7d04b1f 100644 --- a/nmdc_schema/nmdc_materialized_patterns.yaml +++ b/nmdc_schema/nmdc_materialized_patterns.yaml @@ -298,65 +298,6 @@ emit_prefixes: - xsd default_prefix: nmdc default_range: string -subsets: - mixs_environmental_triad: - name: mixs_environmental_triad - title: MIxS Environmental Triad - from_schema: https://w3id.org/nmdc/nmdc - mixs extension: - name: mixs extension - from_schema: https://w3id.org/nmdc/nmdc - checklist: - name: checklist - description: A MIxS checklist. These can be combined with packages - from_schema: https://w3id.org/nmdc/nmdc - package: - name: package - description: A MIxS package. These can be combined with checklists - from_schema: https://w3id.org/nmdc/nmdc - checklist_package_combination: - name: checklist_package_combination - description: A combination of a checklist and a package - from_schema: https://w3id.org/nmdc/nmdc - data object subset: - name: data object subset - description: Subset consisting of the data objects that either inputs or outputs - of processes or workflows. - from_schema: https://w3id.org/nmdc/nmdc - data_portal_subset: - name: data_portal_subset - description: Subset consisting of entities that Kitware/nmdc-server use to populate - the data portal. - comments: - - Schema authors are responsible for alerting and supporting Kitware and nmdc-server - authors about changes they will have to make if entities labeled with data_portal_subset - are modified. - - Assignment of the data_portal_subset is incomplete in the schema. - from_schema: https://w3id.org/nmdc/nmdc - environment: - name: environment - from_schema: https://w3id.org/nmdc/nmdc - investigation: - name: investigation - from_schema: https://w3id.org/nmdc/nmdc - nucleic acid sequence source: - name: nucleic acid sequence source - from_schema: https://w3id.org/nmdc/nmdc - proteases: - name: proteases - from_schema: https://w3id.org/nmdc/nmdc - sample subset: - name: sample subset - description: Subset consisting of entities linked to the processing of samples. Currently, - this subset consists of study, omics process, and biosample. - from_schema: https://w3id.org/nmdc/nmdc - sequencing: - name: sequencing - from_schema: https://w3id.org/nmdc/nmdc - workflow subset: - name: workflow subset - description: Subset consisting of just the workflow execution activities - from_schema: https://w3id.org/nmdc/nmdc types: string: name: string @@ -605,16 +546,16 @@ types: - UO:0000233 base: int uri: xsd:long - decimal degree: - name: decimal degree + decimal_degree: + name: decimal_degree description: A decimal degree expresses latitude or longitude as decimal fractions. from_schema: https://w3id.org/nmdc/nmdc see_also: - https://en.wikipedia.org/wiki/Decimal_degrees base: float uri: xsd:decimal - language code: - name: language code + language_code: + name: language_code description: A language code conforming to ISO_639-1 from_schema: https://w3id.org/nmdc/nmdc see_also: @@ -2102,10 +2043,18 @@ enums: text: female hermaphrodite: text: hermaphrodite + non-binary: + text: non-binary male: text: male - neuter: - text: neuter + transgender: + text: transgender + transgender (female to male): + text: transgender (female to male) + transgender (male to female): + text: transgender (male to female) + undeclared: + text: undeclared indoor_space_enum: name: indoor_space_enum from_schema: https://w3id.org/nmdc/nmdc @@ -3168,6 +3117,35 @@ enums: text: low high: text: high + TargetGeneEnum: + name: TargetGeneEnum + from_schema: https://w3id.org/nmdc/nmdc + permissible_values: + 16S_rRNA: + text: 16S_rRNA + description: the small subunit of the bacterial/archean ribosome + aliases: + - 16S rRNA + - 16S ribosomal RNA + narrow_mappings: + - OBI:0002763 + 23S_rRNA: + text: 23S_rRNA + description: the large subunit of the bacterial/archean ribosome + aliases: + - 23S rRNA + - 23S ribosomal RNA + 18S_rRNA: + text: 18S_rRNA + description: the small subunit of the eukaryotic ribosome + aliases: + - 18S rRNA + - 18S ribosomal RNA + 28S_rRNA: + text: 28S_rRNA + description: the large subunit of the eukaryotic ribosome + aliases: + - -28S rRNA - 28S ribosomal RNA CreditEnum: name: CreditEnum comments: @@ -4316,6 +4294,9 @@ enums: text: nom aliases: - natural organic matter + amplicon_sequencing_assay: + text: amplicon_sequencing_assay + meaning: OBI:0002767 ExtractionTargetEnum: name: ExtractionTargetEnum from_schema: https://w3id.org/nmdc/nmdc @@ -4491,17 +4472,57 @@ enums: text: natural organic matter bulk chemistry: text: bulk chemistry + amplicon sequencing assay: + text: amplicon sequencing assay + meaning: OBI:0002767 + title: Amplicon sequencing assay + MetaproteomicsAnalysisCategoryEnum: + name: MetaproteomicsAnalysisCategoryEnum + description: The category of metaproteomics analysis being performed. + from_schema: https://w3id.org/nmdc/nmdc + permissible_values: + matched_metagenome: + text: matched_metagenome + description: A metaproteomics analysis that is matched to a metagenome derived + from the same biosample. + in_silico_metagenome: + text: in_silico_metagenome + description: A metaproteomics analysis that is matched to an in silico generated + metagenome. slots: - has_calibration: - name: has_calibration - description: a calibration instance associated with a process + generates_calibration: + name: generates_calibration + description: calibration information is generated a process + comments: + - A gas chromatography mass spectromery run generates data to calibrate the retention + index + from_schema: https://w3id.org/nmdc/nmdc + range: CalibrationInformation + pattern: ^(nmdc):calib-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})$ + structured_pattern: + syntax: '{id_nmdc_prefix}:calib-{id_shoulder}-{id_blade}$' + interpolated: true + uses_calibration: + name: uses_calibration + description: calibration information is used by a process + comments: + - Retenion index calibration data generated by a gas chromatography mass spectromery + run is used when analyzing metabolomics data from_schema: https://w3id.org/nmdc/nmdc range: CalibrationInformation + pattern: ^(nmdc):calib-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})$ + structured_pattern: + syntax: '{id_nmdc_prefix}:calib-{id_shoulder}-{id_blade}$' + interpolated: true calibration_object: name: calibration_object description: the file containing calibration data object from_schema: https://w3id.org/nmdc/nmdc range: DataObject + pattern: ^(nmdc):dobj-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})$ + structured_pattern: + syntax: '{id_nmdc_prefix}:dobj-{id_shoulder}-{id_blade}$' + interpolated: true internal_calibration: name: internal_calibration description: whether internal calibration was used, if false, external calibration @@ -4609,15 +4630,6 @@ slots: information about how a sample was introduced into the mass spectrometer. from_schema: https://w3id.org/nmdc/nmdc range: ChromatographyConfiguration - metagenome_annotation_id: - name: metagenome_annotation_id - description: The identifier for the analysis activity that generated the functional - annotation results, where the analysis activity is an instance of the/an appropriate - subclass of WorkflowExecution - deprecated: not used. 2024-10 https://github.com/microbiomedata/nmdc-schema/issues/1253 - from_schema: https://w3id.org/nmdc/nmdc - range: WorkflowExecution - required: true gene_function_id: name: gene_function_id description: The identifier for the gene function. @@ -5054,8 +5066,6 @@ slots: subtables and can be reported notes: - no longer in MIxS as of 6.0? - in_subset: - - mixs extension from_schema: https://w3id.org/nmdc/nmdc aliases: - environmental package @@ -5476,50 +5486,6 @@ slots: description: the specific metabolite identifier from_schema: https://w3id.org/nmdc/nmdc range: ChemicalEntity - all_proteins: - name: all_proteins - description: the list of protein identifiers that are associated with the peptide - sequence - deprecated: not used. 2024-11 https://github.com/microbiomedata/nmdc-schema/issues/2250 - from_schema: https://w3id.org/nmdc/nmdc - range: GeneProduct - multivalued: true - best_protein: - name: best_protein - description: the specific protein identifier most correctly associated with the - peptide sequence - deprecated: not used. 2024-11 https://github.com/microbiomedata/nmdc-schema/issues/2250 - from_schema: https://w3id.org/nmdc/nmdc - range: GeneProduct - min_q_value: - name: min_q_value - description: smallest Q-Value associated with the peptide sequence as provided - by MSGFPlus tool - deprecated: not used. 2024-11 https://github.com/microbiomedata/nmdc-schema/issues/2250 - from_schema: https://w3id.org/nmdc/nmdc - see_also: - - OBI:0001442 - range: float - peptide_sequence: - name: peptide_sequence - deprecated: not used. 2024-11 https://github.com/microbiomedata/nmdc-schema/issues/2250 - from_schema: https://w3id.org/nmdc/nmdc - range: string - peptide_spectral_count: - name: peptide_spectral_count - description: sum of filter passing MS2 spectra associated with the peptide sequence - within a given LC-MS/MS data file - deprecated: not used. 2024-11 https://github.com/microbiomedata/nmdc-schema/issues/2250 - from_schema: https://w3id.org/nmdc/nmdc - range: integer - peptide_sum_masic_abundance: - name: peptide_sum_masic_abundance - description: combined MS1 extracted ion chromatograms derived from MS2 spectra - associated with the peptide sequence from a given LC-MS/MS data file using the - MASIC tool - deprecated: not used. 2024-11 https://github.com/microbiomedata/nmdc-schema/issues/2250 - from_schema: https://w3id.org/nmdc/nmdc - range: integer chemical_formula: name: chemical_formula description: A generic grouping for molecular formulae and empirical formulae @@ -5537,27 +5503,6 @@ slots: name: inchi from_schema: https://w3id.org/nmdc/nmdc range: string - peptide_sequence_count: - name: peptide_sequence_count - description: count of peptide sequences grouped to the best_protein - deprecated: not used. 2024-11 https://github.com/microbiomedata/nmdc-schema/issues/2250 - from_schema: https://w3id.org/nmdc/nmdc - range: integer - protein_spectral_count: - name: protein_spectral_count - description: sum of filter passing MS2 spectra associated with the best protein - within a given LC-MS/MS data file - deprecated: not used. 2024-11 https://github.com/microbiomedata/nmdc-schema/issues/2250 - from_schema: https://w3id.org/nmdc/nmdc - range: integer - protein_sum_masic_abundance: - name: protein_sum_masic_abundance - description: combined MS1 extracted ion chromatograms derived from MS2 spectra - associated with the best protein from a given LC-MS/MS data file using the MASIC - tool - deprecated: not used. 2024-11 https://github.com/microbiomedata/nmdc-schema/issues/2250 - from_schema: https://w3id.org/nmdc/nmdc - range: integer smiles: name: smiles description: A string encoding of a molecular graph, no chiral or isotopic information. @@ -6417,31 +6362,6 @@ slots: - altitude is_a: environment field slot_uri: MIXS:0000094 - domain_of: - - agriculture - - air - - built environment - - core - - food-animal and animal feed - - food-farm environment - - food-food production facility - - food-human foods - - host-associated - - human-associated - - human-gut - - human-oral - - human-skin - - human-vaginal - - hydrocarbon resources-cores - - hydrocarbon resources-fluids_swabs - - microbial mat_biofilm - - miscellaneous natural or artificial environment - - plant-associated - - sediment - - soil - - symbiont-associated - - wastewater_sludge - - water range: QuantityValue multivalued: false aminopept_act: @@ -8787,8 +8707,7 @@ slots: EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS' title: broad-scale environmental context examples: - - value: oceanic epipelagic zone biome [ENVO:01000033] for annotating a water - sample from the photic zone in middle of the Atlantic Ocean + - value: oceanic epipelagic zone biome [ENVO:01000035] from_schema: https://w3id.org/nmdc/nmdc aliases: - broad-scale environmental context @@ -8822,10 +8741,7 @@ slots: are accepted in this field. EnvO documentation about how to use the field: https://github.com/EnvironmentOntology/envo/wiki/Using-ENVO-with-MIxS.' title: local environmental context examples: - - value: 'litter layer [ENVO:01000338]; Annotating a pooled sample taken from - various vegetation layers in a forest consider: canopy [ENVO:00000047]|herb - and fern layer [ENVO:01000337]|litter layer [ENVO:01000338]|understory [01000335]|shrub - layer [ENVO:01000336].' + - value: litter layer [ENVO:01000338] from_schema: https://w3id.org/nmdc/nmdc aliases: - local environmental context @@ -8859,9 +8775,7 @@ slots: (e.g. a tree, a leaf, a table top).' title: environmental medium examples: - - value: 'soil [ENVO:00001998]; Annotating a fish swimming in the upper 100 m - of the Atlantic Ocean, consider: ocean water [ENVO:00002151]. Example: Annotating - a duck on a pond consider: pond water [ENVO:00002228]|air [ENVO_00002005]' + - value: soil [ENVO:00001998] from_schema: https://w3id.org/nmdc/nmdc aliases: - environmental medium @@ -10203,7 +10117,7 @@ slots: or https://www.ebi.ac.uk/ols/ontologies/uberon title: host body product examples: - - value: Portion of mucus [fma66938] + - value: mucus [UBERON:0000912] from_schema: https://w3id.org/nmdc/nmdc aliases: - host body product @@ -15574,7 +15488,7 @@ slots: is_a: sequencing field string_serialization: '{text}' slot_uri: MIXS:0000044 - range: TextValue + range: TargetGeneEnum multivalued: false target_subfragment: name: target_subfragment @@ -17225,8 +17139,6 @@ slots: ''doi_category'': ''data_management_plan_doi'', ''doi_provider'': ''gsc''}]' description: Provides a list of two DOIs; specifically, an EMSL award DOI and a publication DOI. - in_subset: - - data_portal_subset from_schema: https://w3id.org/nmdc/nmdc aliases: - Associated DOIs @@ -17315,6 +17227,10 @@ slots: from_schema: https://w3id.org/nmdc/nmdc range: Manifest multivalued: true + pattern: ^^(nmdc):manif-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})$ + structured_pattern: + syntax: ^{id_nmdc_prefix}:manif-{id_shoulder}-{id_blade}$ + interpolated: true manifest_category: name: manifest_category description: The type of context in which the constituent DataObjects can be analyzed @@ -17481,7 +17397,7 @@ slots: name: language description: Should use ISO 639-1 code e.g. "en", "fr" from_schema: https://w3id.org/nmdc/nmdc - range: language code + range: language_code has_raw_value: name: has_raw_value description: The value that was specified for an annotation in raw form, i.e. @@ -17532,7 +17448,7 @@ slots: mappings: - schema:latitude slot_uri: wgs84:lat - range: decimal degree + range: decimal_degree longitude: name: longitude description: longitude @@ -17542,7 +17458,7 @@ slots: mappings: - schema:longitude slot_uri: wgs84:long - range: decimal degree + range: decimal_degree infiltrations: name: infiltrations description: The amount of time it takes to complete each infiltration activity @@ -17618,8 +17534,6 @@ slots: examples: - value: doi:10.46936/10.25585/60000880 description: The DOI links to an electronic document. - in_subset: - - data_portal_subset from_schema: https://w3id.org/nmdc/nmdc aliases: - DOI @@ -17637,8 +17551,6 @@ slots: examples: - value: ess_dive description: The corresponding DOI is associated with ESS-DIVE. - in_subset: - - data_portal_subset from_schema: https://w3id.org/nmdc/nmdc close_mappings: - NCIT:C74932 @@ -17649,8 +17561,6 @@ slots: examples: - value: dataset_doi description: The corresponding DOI is a dataset resource type. - in_subset: - - data_portal_subset from_schema: https://w3id.org/nmdc/nmdc range: DoiCategoryEnum required: true @@ -18536,7 +18446,9 @@ slots: - Date should be formatted as YYYY(-MM(-DD)). Ie, 2021-04-15, 2021-04 and 2021 are all acceptable. examples: - - value: 2021-04-15, 2021-04 and 2021 are all acceptable. + - value: '2021-04-15' + - value: 2021-04 + - value: '2021' from_schema: https://w3id.org/nmdc/nmdc see_also: - MIXS:0000011 @@ -18554,7 +18466,8 @@ slots: comments: - 'Time should be entered as HH:MM(:SS) in GMT. See here for a converter: https://www.worldtimebuddy.com/pst-to-gmt-converter' examples: - - value: 13:33 or 13:33:55 + - value: '813' + - value: '48835' from_schema: https://w3id.org/nmdc/nmdc see_also: - MIXS:0000011 @@ -18573,7 +18486,8 @@ slots: comments: - 'Time should be entered as HH:MM(:SS) in GMT. See here for a converter: https://www.worldtimebuddy.com/pst-to-gmt-converter' examples: - - value: 13:33 or 13:33:55 + - value: '813' + - value: '48835' from_schema: https://w3id.org/nmdc/nmdc see_also: - MIXS:0000011 @@ -18628,12 +18542,11 @@ slots: labeled compounds examples: - value: 13C glucose - - value: H218O + - value: 18O water from_schema: https://w3id.org/nmdc/nmdc see_also: - MIXS:0000751 rank: 16 - string_serialization: '{termLabel} {[termID]}; {timestamp}' slot_group: MIxS Inspired recommended: true micro_biomass_c_meth: @@ -18773,7 +18686,9 @@ slots: - Date should be formatted as YYYY(-MM(-DD)). Ie, 2021-04-15, 2021-04 and 2021 are all acceptable. examples: - - value: 2021-04-15, 2021-04 and 2021 are all acceptable. + - value: '2021-04-15' + - value: 2021-04 + - value: '2021' from_schema: https://w3id.org/nmdc/nmdc see_also: - MIXS:0000011 @@ -18791,7 +18706,8 @@ slots: comments: - 'Time should be entered as HH:MM(:SS) in GMT. See here for a converter: https://www.worldtimebuddy.com/pst-to-gmt-converter' examples: - - value: 13:33 or 13:33:55 + - value: '813' + - value: '48835' from_schema: https://w3id.org/nmdc/nmdc see_also: - MIXS:0000011 @@ -18804,7 +18720,7 @@ slots: description: Select all the data types associated or available for this biosample title: analysis/data type examples: - - value: metagenomics; metabolomics; proteomics + - value: metagenomics; metabolomics; metaproteomics from_schema: https://w3id.org/nmdc/nmdc see_also: - MIxS:investigation_type @@ -18832,17 +18748,16 @@ slots: slot_group: Sample ID recommended: true multivalued: true + metaproteomics_analysis_category: + name: metaproteomics_analysis_category + description: The category of metaproteomics analysis being performed. + from_schema: https://w3id.org/nmdc/nmdc + range: MetaproteomicsAnalysisCategoryEnum + required: true metagenome_assembly_parameter: name: metagenome_assembly_parameter from_schema: https://w3id.org/nmdc/nmdc abstract: true - has_peptide_quantifications: - name: has_peptide_quantifications - deprecated: not used. 2024-11 https://github.com/microbiomedata/nmdc-schema/issues/2250 - from_schema: https://w3id.org/nmdc/nmdc - range: PeptideQuantification - multivalued: true - inlined_as_list: true asm_score: name: asm_score description: A score for comparing metagenomic assembly quality from same sample. @@ -19154,7 +19069,7 @@ classes: is_a: DataGeneration slots: - eluent_introduction_category - - has_calibration + - generates_calibration - has_chromatography_configuration - has_mass_spectrometry_configuration slot_usage: @@ -19164,12 +19079,6 @@ classes: structured_pattern: syntax: '{id_nmdc_prefix}:(dgms|omprc)-{id_shoulder}-{id_blade}$' interpolated: true - has_calibration: - name: has_calibration - pattern: ^(nmdc):calib-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})$ - structured_pattern: - syntax: '{id_nmdc_prefix}:calib-{id_shoulder}-{id_blade}$' - interpolated: true has_chromatography_configuration: name: has_chromatography_configuration pattern: ^(nmdc):chrcon-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})$ @@ -19191,12 +19100,12 @@ classes: equals_string: gas_chromatography postconditions: slot_conditions: - has_calibration: - name: has_calibration + generates_calibration: + name: generates_calibration required: true - description: If eluent_introduction_category is gas_chromatography, then has_calibration + description: If eluent_introduction_category is gas_chromatography, then generates_calibration is required. - title: has_calibration_required_if_gc + title: generates_calibration_required_if_gc - preconditions: slot_conditions: eluent_introduction_category: @@ -19453,14 +19362,12 @@ classes: slot_usage: has_input: name: has_input + range: Sample required: true pattern: ^(nmdc):(bsm|procsm)-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})$ structured_pattern: syntax: '{id_nmdc_prefix}:(bsm|procsm)-{id_shoulder}-{id_blade}$' interpolated: true - any_of: - - range: Biosample - - range: ProcessedSample has_output: name: has_output required: true @@ -19642,13 +19549,11 @@ classes: description: The output mass of the SubSampling Process. has_input: name: has_input + range: Sample pattern: ^(nmdc):(bsm|procsm)-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})$ structured_pattern: syntax: '{id_nmdc_prefix}:(bsm|procsm)-{id_shoulder}-{id_blade}$' interpolated: true - any_of: - - range: Biosample - - range: ProcessedSample has_output: name: has_output description: The subsample. @@ -19687,13 +19592,11 @@ classes: syntax: '{id_nmdc_prefix}:mixpro-{id_shoulder}-{id_blade}$' has_input: name: has_input + range: Sample pattern: ^(nmdc):(bsm|procsm)-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})$ structured_pattern: syntax: '{id_nmdc_prefix}:(bsm|procsm)-{id_shoulder}-{id_blade}$' interpolated: true - any_of: - - range: Biosample - - range: ProcessedSample has_output: name: has_output description: The mixed sample. @@ -19739,13 +19642,11 @@ classes: description: The volume of sample filtered. has_input: name: has_input + range: Sample pattern: ^(nmdc):(bsm|procsm)-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})$ structured_pattern: syntax: '{id_nmdc_prefix}:(bsm|procsm)-{id_shoulder}-{id_blade}$' interpolated: true - any_of: - - range: Biosample - - range: ProcessedSample has_output: name: has_output range: ProcessedSample @@ -19814,13 +19715,11 @@ classes: syntax: '{id_nmdc_prefix}:cspro-{id_shoulder}-{id_blade}$' has_input: name: has_input + range: Sample pattern: ^(nmdc):(bsm|procsm)-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})$ structured_pattern: syntax: '{id_nmdc_prefix}:(bsm|procsm)-{id_shoulder}-{id_blade}$' interpolated: true - any_of: - - range: Biosample - - range: ProcessedSample has_output: name: has_output range: ProcessedSample @@ -19893,8 +19792,6 @@ classes: name: MetagenomeAnnotation description: A workflow execution activity that provides functional and structural annotation of assembled metagenome contigs - in_subset: - - workflow subset from_schema: https://w3id.org/nmdc/nmdc is_a: WorkflowExecution slots: @@ -19965,6 +19862,13 @@ classes: syntax: '{id_nmdc_prefix}:frsite-{id_shoulder}-{id_blade}$' interpolated: true class_uri: nmdc:FieldResearchSite + Sample: + name: Sample + description: A sample is a material entity that can be characterized by an experiment. + from_schema: https://w3id.org/nmdc/nmdc + is_a: MaterialEntity + abstract: true + class_uri: nmdc:Sample Biosample: name: Biosample description: Biological source material which can be characterized by an experiment. @@ -19976,8 +19880,6 @@ classes: annotate the samples. Samples are always associated with a taxon. notes: - could add GOLD and EBI's biosample definitions to the alt_descriptions? - in_subset: - - sample subset from_schema: https://w3id.org/nmdc/nmdc aliases: - sample @@ -19988,7 +19890,7 @@ classes: - OBI:0000747 - NCIT:C43412 - http://purl.obolibrary.org/obo/FBcv_0003024 - is_a: MaterialEntity + is_a: Sample slots: - associated_studies - biosample_categories @@ -21036,13 +20938,11 @@ classes: slot_usage: has_input: name: has_input + range: Sample pattern: ^(nmdc):(bsm|procsm)-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})$ structured_pattern: syntax: '{id_nmdc_prefix}:(bsm|procsm)-{id_shoulder}-{id_blade}$' interpolated: true - any_of: - - range: Biosample - - range: ProcessedSample has_output: name: has_output range: ProcessedSample @@ -21073,7 +20973,7 @@ classes: name: ProcessedSample title: Processed Sample from_schema: https://w3id.org/nmdc/nmdc - is_a: MaterialEntity + is_a: Sample slots: - biomaterial_purity - dna_absorb1 @@ -21140,35 +21040,6 @@ classes: - metabolite_identified - type class_uri: nmdc:MetaboliteIdentification - PeptideQuantification: - name: PeptideQuantification - description: This is used to link a metaproteomics analysis workflow to a specific - peptide sequence and related information - deprecated: not used. 2024-11 https://github.com/microbiomedata/nmdc-schema/issues/2250 - from_schema: https://w3id.org/nmdc/nmdc - slots: - - type - - all_proteins - - best_protein - - min_q_value - - peptide_sequence - - peptide_spectral_count - - peptide_sum_masic_abundance - class_uri: nmdc:PeptideQuantification - ProteinQuantification: - name: ProteinQuantification - description: This is used to link a metaproteomics analysis workflow to a specific - protein - deprecated: not used. 2024-11 https://github.com/microbiomedata/nmdc-schema/issues/2250 - from_schema: https://w3id.org/nmdc/nmdc - slots: - - all_proteins - - best_protein - - peptide_sequence_count - - protein_spectral_count - - protein_sum_masic_abundance - - type - class_uri: nmdc:ProteinQuantification ChemicalEntity: name: ChemicalEntity id_prefixes: @@ -21506,10 +21377,8 @@ classes: class_uri: nmdc:NamedThing OntologyClass: name: OntologyClass - description: This class is used to represent ontology terms. + description: A representation of class defined in an external ontology. from_schema: https://w3id.org/nmdc/nmdc - aliases: - - OntologyClass exact_mappings: - biolink:OntologyClass - owl:Class @@ -21723,8 +21592,6 @@ classes: but no longer for the Study.id comments: - The Study class can include both consortia and research studies. - in_subset: - - sample subset from_schema: https://w3id.org/nmdc/nmdc aliases: - proposal @@ -21882,8 +21749,6 @@ classes: name: DataObject description: An object that primarily consists of symbols that represent information. Files, records, and omics data are examples of data objects. - in_subset: - - data object subset from_schema: https://w3id.org/nmdc/nmdc is_a: InformationObject slots: @@ -21926,8 +21791,6 @@ classes: source: embl.ena description: An experiment contains information about a sequencing experiment including library and instrument details. - in_subset: - - sample subset from_schema: https://w3id.org/nmdc/nmdc aliases: - OmicsProcessing @@ -21950,14 +21813,12 @@ classes: slot_usage: has_input: name: has_input + range: Sample required: true pattern: ^(nmdc):(bsm|procsm)-([0-9][a-z]{0,6}[0-9])-([A-Za-z0-9]{1,})$ structured_pattern: syntax: '{id_nmdc_prefix}:(bsm|procsm)-{id_shoulder}-{id_blade}$' interpolated: true - any_of: - - range: Biosample - - range: ProcessedSample associated_studies: name: associated_studies range: Study @@ -21984,8 +21845,6 @@ classes: comments: - Each instance of this (and all other) subclasses of WorkflowExecution is a distinct run with start and stop times, potentially with different inputs and outputs - in_subset: - - workflow subset from_schema: https://w3id.org/nmdc/nmdc aliases: - analysis @@ -22057,8 +21916,6 @@ classes: comments: - instances of this class may use a de novo assembly strategy in most or all cases relevant to NMDC - in_subset: - - workflow subset from_schema: https://w3id.org/nmdc/nmdc is_a: WorkflowExecution slots: @@ -22107,8 +21964,6 @@ classes: class_uri: nmdc:MetagenomeAssembly MetatranscriptomeAssembly: name: MetatranscriptomeAssembly - in_subset: - - workflow subset from_schema: https://w3id.org/nmdc/nmdc is_a: WorkflowExecution slots: @@ -22157,8 +22012,6 @@ classes: class_uri: nmdc:MetatranscriptomeAssembly MetatranscriptomeAnnotation: name: MetatranscriptomeAnnotation - in_subset: - - workflow subset from_schema: https://w3id.org/nmdc/nmdc is_a: WorkflowExecution slots: @@ -22204,8 +22057,6 @@ classes: name: MetatranscriptomeExpressionAnalysis description: A workflow process that provides expression values and read counts for gene features predicted on the contigs. - in_subset: - - workflow subset from_schema: https://w3id.org/nmdc/nmdc is_a: WorkflowExecution slots: @@ -22233,8 +22084,6 @@ classes: description: A workflow execution activity that uses computational binning tools to group assembled contigs into genomes title: Metagenome-Assembled Genome analysis activity - in_subset: - - workflow subset from_schema: https://w3id.org/nmdc/nmdc is_a: WorkflowExecution slots: @@ -22268,8 +22117,6 @@ classes: description: Initial sequencing activity that precedes any analysis. This activity has output(s) that are the raw sequencing data. title: Metagenome sequencing activity - in_subset: - - workflow subset from_schema: https://w3id.org/nmdc/nmdc is_a: WorkflowExecution slot_usage: @@ -22300,8 +22147,6 @@ classes: adapter trimming, spike-in removal, and human/cat/dog/mouse/microbe contaminant removal title: Read quality control analysis activity - in_subset: - - workflow subset from_schema: https://w3id.org/nmdc/nmdc is_a: WorkflowExecution slots: @@ -22331,8 +22176,6 @@ classes: description: A workflow execution activity that performs taxonomy classification using sequencing reads title: Read based analysis activity - in_subset: - - workflow subset from_schema: https://w3id.org/nmdc/nmdc is_a: WorkflowExecution slot_usage: @@ -22352,12 +22195,11 @@ classes: class_uri: nmdc:ReadBasedTaxonomyAnalysis MetabolomicsAnalysis: name: MetabolomicsAnalysis - in_subset: - - workflow subset from_schema: https://w3id.org/nmdc/nmdc is_a: WorkflowExecution slots: - has_metabolite_identifications + - uses_calibration slot_usage: id: name: id @@ -22375,10 +22217,10 @@ classes: class_uri: nmdc:MetabolomicsAnalysis MetaproteomicsAnalysis: name: MetaproteomicsAnalysis - in_subset: - - workflow subset from_schema: https://w3id.org/nmdc/nmdc is_a: WorkflowExecution + slots: + - metaproteomics_analysis_category slot_usage: id: name: id @@ -22396,10 +22238,10 @@ classes: class_uri: nmdc:MetaproteomicsAnalysis NomAnalysis: name: NomAnalysis - in_subset: - - workflow subset from_schema: https://w3id.org/nmdc/nmdc is_a: WorkflowExecution + slots: + - uses_calibration slot_usage: id: name: id