Skip to content

Commit

Permalink
Merge pull request #584 from hubmapconsortium/kburke/addDatasetTypeAt…
Browse files Browse the repository at this point in the history
…tribute

Kburke/add dataset type attribute
  • Loading branch information
yuanzhou authored Dec 7, 2023
2 parents af000d6 + 741b91d commit 0b21552
Show file tree
Hide file tree
Showing 13 changed files with 394 additions and 584 deletions.
53 changes: 0 additions & 53 deletions entity-api-spec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -406,59 +406,6 @@ components:
- section
- suspension
description: "A code representing the type of specimen. Must be an organ, block, section, or suspension"
specimen_type:
type: string
enum:
- atacseq
- biopsy
- blood
- cell_lysate
- clarity_hydrogel
- codex
- cryosections_curls_from_fresh_frozen_oct
- cryosections_curls_rnalater
- ffpe_block
- ffpe_slide
- fixed_frozen_section_slide
- fixed_tissue_piece
- flash_frozen_liquid_nitrogen
- formalin_fixed_oct_block
- fresh_frozen_oct_block
- fresh_frozen_section_slide
- fresh_frozen_tissue
- fresh_frozen_tissue_section
- fresh_tissue
- frozen_cell_pellet_buffy_coat
- gdna
- module
- nuclei
- nuclei_rnalater
- organ
- organ_piece
- other
- pbmc
- pfa_fixed_frozen_oct_block
- plasma
- protein
- ran_poly_a_enriched
- rna_total
- rnalater_treated_and_stored
- rnaseq
- scatacseq
- scrnaseq
- segment
- seqfish
- sequence_library
- serum
- single_cell_cryopreserved
- snatacseq
- snrnaseq
- tissue_lysate
- wgs
description: "DEPRECATED: No longer a required field. A code representing the type of specimen. Must be one of the codes specified in: [tissue sample types](https://github.com/hubmapconsortium/search-api/blob/main/src/search-schema/data/definitions/enums/tissue_sample_types.yaml)"
specimen_type_other:
type: string
description: "The user provided sample type if the 'other' sample_type is chosen."
protocol_url:
type: string
description: "The protocols.io doi url pointing the protocol under wich the sample was obtained and/or prepared."
Expand Down
252 changes: 102 additions & 150 deletions src/app.py

Large diffs are not rendered by default.

5 changes: 3 additions & 2 deletions src/app_neo4j_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -736,7 +736,7 @@ def get_prov_info(neo4j_driver, param_dict, published_only):
f" WITH ds, FIRSTSAMPLE, DONOR, REVISIONS, METASAMPLE, RUISAMPLE, ORGAN, COLLECT(distinct processed_dataset) AS PROCESSED_DATASET"
f" RETURN ds.uuid, FIRSTSAMPLE, DONOR, RUISAMPLE, ORGAN, ds.hubmap_id, ds.status, ds.group_name,"
f" ds.group_uuid, ds.created_timestamp, ds.created_by_user_email, ds.last_modified_timestamp, "
f" ds.last_modified_user_email, ds.lab_dataset_id, ds.data_types, METASAMPLE, PROCESSED_DATASET, REVISIONS")
f" ds.last_modified_user_email, ds.lab_dataset_id, ds.data_types, METASAMPLE, PROCESSED_DATASET, REVISIONS") # TODO replace ds.data_types with ds.dataset_type when required

logger.info("======get_prov_info() query======")
logger.info(query)
Expand Down Expand Up @@ -834,7 +834,7 @@ def get_individual_prov_info(neo4j_driver, dataset_uuid):
f" WITH ds, FIRSTSAMPLE, DONOR, METASAMPLE, RUISAMPLE, ORGAN, COLLECT(distinct processed_dataset) AS PROCESSED_DATASET"
f" RETURN ds.uuid, FIRSTSAMPLE, DONOR, RUISAMPLE, ORGAN, ds.hubmap_id, ds.status, ds.group_name,"
f" ds.group_uuid, ds.created_timestamp, ds.created_by_user_email, ds.last_modified_timestamp, "
f" ds.last_modified_user_email, ds.lab_dataset_id, ds.data_types, METASAMPLE, PROCESSED_DATASET")
f" ds.last_modified_user_email, ds.lab_dataset_id, ds.data_types, METASAMPLE, PROCESSED_DATASET, ds.dataset_type")
logger.info("======get_prov_info() query======")
logger.info(query)

Expand Down Expand Up @@ -891,6 +891,7 @@ def get_individual_prov_info(neo4j_driver, dataset_uuid):
node_dict = schema_neo4j_queries.node_to_dict(entry)
content_sixteen.append(node_dict)
record_dict['processed_dataset'] = content_sixteen
record_dict['dataset_type'] = record_contents[17] if record_contents[17] is not None else ''
return record_dict


Expand Down
4 changes: 4 additions & 0 deletions src/instance/app.cfg.example
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@ UUID_API_URL = 'http://uuid-api:8080'
# Works regardless of the trailing slash
INGEST_API_URL = 'https://ingest-api.dev.hubmapconsortium.org'

# URL for talking to Ontology API (default for DEV)
# Works regardless of the trailing slash
ONTOLOGY_API_URL = 'https://ontology-api.dev.hubmapconsortium.org'

# A list of URLs for talking to multiple Search API instances (default value used for docker deployment, no token needed)
# Works regardless of the trailing slash /
SEARCH_API_URL_LIST = ['http://search-api:8080']
Expand Down
36 changes: 8 additions & 28 deletions src/schema/provenance_schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,14 @@ ENTITIES:
type: list
required_on_create: true # Only required for create via POST, not update via PUT
description: "The data or assay types contained in this dataset as a json array of strings. Each is an assay code from [assay types](https://github.com/hubmapconsortium/search-api/blob/main/src/search-schema/data/definitions/enums/assay_types.yaml)."
dataset_type:
before_property_create_validators:
- validate_recognized_dataset_type
before_property_update_validators:
- validate_recognized_dataset_type
type: string
required_on_create: false # Once replaces data_types, will be required for create via POST, not update via PUT
description: "The assay types of this Dataset. Valid values are from UBKG are queried by schema_manager.get_valueset_dataset_type() using the Ontology API."
collections:
type: list
transient: true
Expand Down Expand Up @@ -947,33 +955,6 @@ ENTITIES:
- validate_sample_category
before_property_update_validators:
- validate_sample_category

# No logner required on create, specimen_type -> sample_category 12/15/2022
specimen_type:
type: string
#required_on_create: true # Only required for create via POST, not update via PUT
description: "A code representing the type of specimen. Must be one of the codes specified in: [tissue sample types](https://github.com/hubmapconsortium/search-api/blob/main/src/search-schema/data/definitions/enums/tissue_sample_types.yaml)"
# Validate the given value against the definitions: https://github.com/hubmapconsortium/search-api/blob/main/src/search-schema/data/definitions/enums/tissue_sample_types.yaml
# Disabled validation 12/15/2022
# before_property_create_validators:
# - validate_specimen_type
# before_property_update_validators:
# - validate_specimen_type
specimen_type_other:
type: string
description: "The user provided sample type if the 'other' sample_type is chosen."


# specimen_type no logner required on create, will remove this field when removing specimen_type
# Simply always set to 'Unknown' and no need to update 12/15/2022
tissue_type:
type: string
generated: true # Can not be updated via the PUT
#auto_update: true # Will always update automatically if the entity gets updated
description: 'The type of the tissue based on the mapping between type (Block/Section/Suspension) and the specimen_type, default is Unknown'
before_create_trigger: set_tissue_type
#before_update_trigger: set_tissue_type

portal_metadata_upload_files:
type: json_string
description: "A list of relative paths to metadata files"
Expand All @@ -999,7 +980,6 @@ ENTITIES:
immutable: true
description: "The displayname of globus group which the user who created this entity is a member of"
before_create_trigger: set_group_name
# Should be required on create only when specimen_type==organ
organ:
type: string
description: "Organ code specifier, only set if sample_type == organ. Valid values found in: [organ types](https://github.com/hubmapconsortium/search-api/blob/main/src/search-schema/data/definitions/enums/organ_types.yaml)"
Expand Down
13 changes: 5 additions & 8 deletions src/schema/schema_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,23 +2,20 @@
class SchemaConstants(object):
MEMCACHED_TTL = 7200

# Constants used by validators
INGEST_API_APP = 'ingest-api'
INGEST_PIPELINE_APP = 'ingest-pipeline'
HUBMAP_APP_HEADER = 'X-Hubmap-Application'
DATASET_STATUS_PUBLISHED = 'published'

# Used by triggers, all lowercase for easy comparision
ACCESS_LEVEL_PUBLIC = 'public'
ACCESS_LEVEL_CONSORTIUM = 'consortium'
ACCESS_LEVEL_PROTECTED = 'protected'

# Yaml file to parse organ description
ORGAN_TYPES_YAML = 'https://raw.githubusercontent.com/hubmapconsortium/search-api/main/src/search-schema/data/definitions/enums/organ_types.yaml'
ASSAY_TYPES_YAML = 'https://raw.githubusercontent.com/hubmapconsortium/search-api/main/src/search-schema/data/definitions/enums/assay_types.yaml'

# For generating Sample.tissue_type
TISSUE_TYPES_YAML = 'https://raw.githubusercontent.com/hubmapconsortium/search-api/main/src/search-schema/data/definitions/enums/tissue_sample_types.yaml'
UUID_API_ID_ENDPOINT = '/uuid'
INGEST_API_FILE_COMMIT_ENDPOINT = '/file-commit'
INGEST_API_FILE_REMOVE_ENDPOINT = '/file-remove'
ONTOLOGY_API_ASSAY_TYPES_ENDPOINT = '/assaytype?application_context=HUBMAP'
ONTOLOGY_API_ORGAN_TYPES_ENDPOINT = '/organs/by-code?application_context=HUBMAP'

DOI_BASE_URL = 'https://doi.org/'

Expand Down
3 changes: 3 additions & 0 deletions src/schema/schema_errors.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@

class UnimplementedValidatorException(Exception):
pass

class SchemaValidationException(Exception):
pass

Expand Down
Loading

0 comments on commit 0b21552

Please sign in to comment.