diff --git a/microsetta_private_api/admin/admin_impl.py b/microsetta_private_api/admin/admin_impl.py index a747beca..d00e5356 100644 --- a/microsetta_private_api/admin/admin_impl.py +++ b/microsetta_private_api/admin/admin_impl.py @@ -787,6 +787,7 @@ def list_barcode_query_fields(token_info): 'input': 'select', 'values': { "Blood (skin prick)": "Blood (skin prick)", + "Cheek": "Cheek", "Saliva": "Saliva", "Ear wax": "Ear wax", "Forehead": "Forehead", diff --git a/microsetta_private_api/api/_sample.py b/microsetta_private_api/api/_sample.py index 31f6cd1b..b696c6e7 100644 --- a/microsetta_private_api/api/_sample.py +++ b/microsetta_private_api/api/_sample.py @@ -148,11 +148,15 @@ def update_sample_association(account_id, source_id, sample_id, body, # sample_site will not be present if its environmental. this will # default to None if the key is not present sample_site = body.get('sample_site') + + barcode_meta = body.get('barcode_meta') + sample_info = SampleInfo( sample_id, sample_datetime, sample_site, - body["sample_notes"] + body["sample_notes"], + barcode_meta ) sample_repo.update_info(account_id, source_id, sample_info, diff --git a/microsetta_private_api/api/microsetta_private_api.yaml b/microsetta_private_api/api/microsetta_private_api.yaml index 45737c7d..3be2fb1c 100644 --- a/microsetta_private_api/api/microsetta_private_api.yaml +++ b/microsetta_private_api/api/microsetta_private_api.yaml @@ -3589,7 +3589,7 @@ components: nullable: true sample_site: enum: ["Blood (skin prick)", "Saliva", "Ear wax", "Forehead", "Fur", "Hair", "Left hand", "Left leg", "Mouth", "Nares", "Nasal mucus", - "Right hand", "Right leg", "Stool", "Tears", "Torso", "Vaginal mucus", null] + "Right hand", "Right leg", "Stool", "Tears", "Torso", "Vaginal mucus", "Cheek", null] example: "Stool" sample_edit_locked: type: boolean diff --git a/microsetta_private_api/api/tests/test_api.py b/microsetta_private_api/api/tests/test_api.py index 132f9dc1..2717565b 100644 --- a/microsetta_private_api/api/tests/test_api.py +++ b/microsetta_private_api/api/tests/test_api.py @@ -182,6 +182,7 @@ 'account_id': None, 'source_id': None, 'sample_site': None, + 'barcode_meta': {}, 'sample_project_ids': [1]} DUMMY_FILLED_SAMPLE_INFO = { @@ -583,7 +584,8 @@ def create_dummy_sample_objects(filled=False): info_dict["sample_id"], datetime_obj, info_dict["sample_site"], - info_dict["sample_notes"] + info_dict["sample_notes"], + {} ) sample = Sample(info_dict["sample_id"], diff --git a/microsetta_private_api/api/tests/test_integration.py b/microsetta_private_api/api/tests/test_integration.py index b8a1c394..6c3e6ecd 100644 --- a/microsetta_private_api/api/tests/test_integration.py +++ b/microsetta_private_api/api/tests/test_integration.py @@ -1443,6 +1443,7 @@ def _test_edit_sample_info(self, source_type): if store_sample_site: fuzzy_info['sample_site'] = "Tears" fuzzy_info['sample_datetime'] = datetime.datetime.utcnow() + fuzzy_info['barcode_meta'] = {} # Many fields are not writable, each should individually cause failure. readonly_fields = [ diff --git a/microsetta_private_api/db/patches/0146.sql b/microsetta_private_api/db/patches/0146.sql new file mode 100644 index 00000000..8951905e --- /dev/null +++ b/microsetta_private_api/db/patches/0146.sql @@ -0,0 +1,23 @@ +-- Beginning with cheek samples, we're collecting metadata that are explicitly +-- linked to sample collection (unlike surveys, which are implicitly linked +-- to samples via sources), but not globally collected, and therefore don't +-- belong in the ag.ag_kit_barcodes table. A new table will store these +-- fields and could eventually be extended to a much more robust framework. + +-- First, we need to set up an ENUM type to enforce values for the type of +-- product used to last wash their face +CREATE TYPE SAMPLE_SITE_LAST_WASHED_PRODUCT_TYPE AS ENUM ('Soap (includes bar and liquid soap)', 'Foaming face wash', 'Face cleanser', 'Just water', 'Other (e.g. shampoo, body wash, all-in-one or all-over wash)', 'Not sure'); + +-- Then, create the table to store the data +-- Note: the date and time are stored separately because we're not enforcing +-- either as a required field. As such, using a timestamp type would not be +-- appropriate since it forces us into a both or neither paradigm. +CREATE TABLE ag.ag_kit_barcodes_cheek ( + ag_kit_barcode_id UUID NOT NULL PRIMARY KEY, + sample_site_last_washed_date DATE, + sample_site_last_washed_time TIME, + sample_site_last_washed_product SAMPLE_SITE_LAST_WASHED_PRODUCT_TYPE, + + -- Foreign key relationship on ag_kit_barcode_id + CONSTRAINT fk_ag_kit_barcode_id FOREIGN KEY (ag_kit_barcode_id) REFERENCES ag.ag_kit_barcodes (ag_kit_barcode_id) +); diff --git a/microsetta_private_api/model/sample.py b/microsetta_private_api/model/sample.py index 045efd54..a9582fa9 100644 --- a/microsetta_private_api/model/sample.py +++ b/microsetta_private_api/model/sample.py @@ -26,6 +26,7 @@ def __init__(self, sample_id, datetime_collected, site, notes, barcode, self.account_id = account_id self.accession_urls = [] + self.barcode_meta = {} self.kit_id = kit_id self._sample_project_ids = sample_project_ids @@ -33,6 +34,9 @@ def __init__(self, sample_id, datetime_collected, site, notes, barcode, def set_accession_urls(self, accession_urls): self.accession_urls = accession_urls + def set_barcode_meta(self, barcode_meta): + self.barcode_meta = barcode_meta + def get_project_ids(self): return self._sample_project_ids @@ -81,14 +85,16 @@ def to_api(self): "account_id": self.account_id, "sample_projects": list(self.sample_projects), "accession_urls": self.accession_urls, - "kit_id": self.kit_id + "kit_id": self.kit_id, + "barcode_meta": self.barcode_meta } # A SampleInfo represents the set of end user editable fields whose lifetime # matches that of the association between a sample and a source class SampleInfo: - def __init__(self, sample_id, datetime_collected, site, notes): + def __init__(self, sample_id, datetime_collected, site, notes, + barcode_meta=None): self.id = sample_id # NB: datetime_collected may be None if sample not yet used self.datetime_collected = datetime_collected @@ -96,3 +102,8 @@ def __init__(self, sample_id, datetime_collected, site, notes): self.notes = notes # NB: site may be None if sample not yet used self.site = site + + if barcode_meta is None: + self.barcode_meta = {} + else: + self.barcode_meta = barcode_meta diff --git a/microsetta_private_api/repo/metadata_repo/_constants.py b/microsetta_private_api/repo/metadata_repo/_constants.py index c30a9a1d..16342a58 100644 --- a/microsetta_private_api/repo/metadata_repo/_constants.py +++ b/microsetta_private_api/repo/metadata_repo/_constants.py @@ -237,7 +237,7 @@ 'host_body_habitat': 'UBERON:feces', 'env_material': 'feces', 'env_package': 'human-gut', - 'description': 'American Gut Project Stool sample', + 'description': 'Microsetta Initiative Stool sample', 'host_body_site': 'UBERON:feces'}, 'Forehead': { 'host_body_product': 'UBERON:sebum', @@ -356,7 +356,18 @@ 'env_package': 'human-associated', 'description': 'American Gut Project Ear wax sample', 'empo_3': 'Animal secretion', - 'host_body_site': 'UBERON:external auditory meatus'} + 'host_body_site': 'UBERON:external auditory meatus'}, + 'Cheek': { + 'host_body_product': 'UBERON:sebum', + 'sample_type': 'skin of cheek', + 'scientific_name': 'human skin metagenome', + 'taxon_id': '539655', + 'host_body_habitat': 'UBERON:skin', + 'empo_3': 'Animal surface', + 'env_material': 'sebum material', + 'env_package': 'human-skin', + 'description': 'Microsetta Initiative cheek skin sample', + 'host_body_site': 'UBERON:skin of cheek'} } diff --git a/microsetta_private_api/repo/metadata_repo/_repo.py b/microsetta_private_api/repo/metadata_repo/_repo.py index 5690e4c7..1450316f 100644 --- a/microsetta_private_api/repo/metadata_repo/_repo.py +++ b/microsetta_private_api/repo/metadata_repo/_repo.py @@ -710,6 +710,10 @@ def _to_pandas_series(metadata, multiselect_map): index.append(shortname) values.append(answer) + for variable, value in sample_detail.barcode_meta.items(): + index.append(variable) + values.append(value) + for variable, value in sample_invariants.items(): index.append(variable) values.append(value) diff --git a/microsetta_private_api/repo/metadata_repo/tests/test_repo.py b/microsetta_private_api/repo/metadata_repo/tests/test_repo.py index 81311ba1..dc7bad9c 100644 --- a/microsetta_private_api/repo/metadata_repo/tests/test_repo.py +++ b/microsetta_private_api/repo/metadata_repo/tests/test_repo.py @@ -67,7 +67,10 @@ def setUp(self): "sample": MM({ "sample_projects": ["American Gut Project"], "datetime_collected": "2013-10-15T09:30:00", - "site": "Stool" + "site": "Stool", + "barcode_meta": { + "sample_site_last_washed_date": "01/10/2025" + } }), 'survey_answers': [ {'template': 1, @@ -134,7 +137,8 @@ def setUp(self): "sample": MM({ "sample_projects": ["American Gut Project"], "datetime_collected": "2013-10-15T09:30:00", - "site": "Stool" + "site": "Stool", + "barcode_meta": {} }), 'survey_answers': [ {'template': 1, @@ -170,7 +174,8 @@ def setUp(self): "sample": MM({ "sample_projects": ["American Gut Project"], "datetime_collected": "2013-10-15T09:30:00", - "site": "Stool" + "site": "Stool", + "barcode_meta": {} }), 'survey_answers': [ {'template': SurveyTemplateRepo.DIET_ID, @@ -406,13 +411,13 @@ def test_to_pandas_dataframe(self): 'true', 'true', 'false', 'false', UNSPECIFIED, 'okay', 'No', "2013-10-15T09:30:00", '000004216', - 'US:CA', 'CA', '33', '-117'], + 'US:CA', 'CA', '33', '-117', '01/10/2025'], ['XY0004216', 'bar', 'Vegan foo', 'Yes', UNSPECIFIED, UNSPECIFIED, UNSPECIFIED, 'No', 'false', 'true', 'true', 'false', 'foobar', UNSPECIFIED, UNSPECIFIED, "2013-10-15T09:30:00", 'XY0004216', - 'US:CA', 'CA', '33', '-117']], + 'US:CA', 'CA', '33', '-117', 'not provided']], columns=['sample_name', 'host_subject_id', 'diet_type', 'multivitamin', 'probiotic_frequency', @@ -425,7 +430,8 @@ def test_to_pandas_dataframe(self): 'sample2specific', 'abc', 'def', 'collection_timestamp', 'anonymized_name', 'geo_loc_name', - 'state', 'latitude', 'longitude'] + 'state', 'latitude', 'longitude', + 'sample_site_last_washed_date'] ).set_index('sample_name') for k, v in HUMAN_SITE_INVARIANTS['Stool'].items(): @@ -453,7 +459,8 @@ def test_to_pandas_series(self): values = ['foo', '', 'No', 'Unspecified', 'Unspecified', 'Unspecified', 'No', 'true', 'true', 'false', 'false', 'okay', 'No', - '2013-10-15T09:30:00', 'US:CA', 'CA', '33', '-117'] + '2013-10-15T09:30:00', 'US:CA', 'CA', '33', '-117', + '01/10/2025'] index = ['HOST_SUBJECT_ID', 'DIET_TYPE', 'MULTIVITAMIN', 'PROBIOTIC_FREQUENCY', 'VITAMIN_B_SUPPLEMENT_FREQUENCY', 'VITAMIN_D_SUPPLEMENT_FREQUENCY', @@ -461,7 +468,7 @@ def test_to_pandas_series(self): 'ALLERGIC_TO_blahblah', 'ALLERGIC_TO_stuff', 'ALLERGIC_TO_x', 'ALLERGIC_TO_baz', 'abc', 'def', 'COLLECTION_TIMESTAMP', 'GEO_LOC_NAME', 'STATE', 'LATITUDE', - 'LONGITUDE'] + 'LONGITUDE', 'sample_site_last_washed_date'] for k, v in HUMAN_SITE_INVARIANTS['Stool'].items(): values.append(v) diff --git a/microsetta_private_api/repo/sample_repo.py b/microsetta_private_api/repo/sample_repo.py index fad2c421..5641f481 100644 --- a/microsetta_private_api/repo/sample_repo.py +++ b/microsetta_private_api/repo/sample_repo.py @@ -58,6 +58,13 @@ class SampleRepo(BaseRepo): LEFT JOIN ag.source ON ag.ag_kit_barcodes.source_id = ag.source.id""" + SAMPLE_SITE_LAST_WASHED_DATE_FORMAT = "%m/%d/%Y" + # NB: strptime() and strftime() treat the %I and %-I formats differently. + # To properly store and retrieve the time format, we use different formats + # for each function + SAMPLE_SITE_LAST_WASHED_TIME_FORMAT_STRPTIME = "%I:%M %p" + SAMPLE_SITE_LAST_WASHED_TIME_FORMAT_STRFTIME = "%-I:%M %p" + def __init__(self, transaction): super().__init__(transaction) @@ -108,8 +115,10 @@ def _create_sample_obj(self, sample_row): ) sample_status = self.get_sample_status(sample_barcode, scan_timestamp) - return Sample.from_db(*sample_row, sample_projects, sample_status, - sample_project_ids=sample_project_ids) + sample = Sample.from_db(*sample_row, sample_projects, sample_status, + sample_project_ids=sample_project_ids) + sample.set_barcode_meta(self._get_barcode_meta(sample.id)) + return sample # TODO: I'm still not entirely happy with the linking between samples and # sources. The new source_id is direct (and required for environmental @@ -292,7 +301,8 @@ def get_sample(self, account_id, source_id, sample_id): barcode = self._get_sample_barcode_from_id(sample_id) cur.execute(sql, (barcode, account_id, source_id, sample_id)) sample_row = cur.fetchone() - return self._create_sample_obj(sample_row) + sample = self._create_sample_obj(sample_row) + return sample def update_info(self, account_id, source_id, sample_info, override_locked=False): @@ -337,6 +347,106 @@ def update_info(self, account_id, source_id, sample_info, sample_info.id )) + # NB: We run this even if the barcode_meta dict is empty, as that + # means the answers associated with the last sample update should be + # purged. This applies both to the normal sample update process, as + # well as dissociate_sample(). + if sample_info.barcode_meta is None: + sample_info.barcode_meta = {} + else: + b_m = self._validate_barcode_meta( + sample_info.site, sample_info.barcode_meta + ) + if b_m is False: + raise RepoException("Invalid barcode_meta fields or values") + else: + sample_info.barcode_meta = b_m + self._update_barcode_meta(sample_info.id, sample_info.barcode_meta) + + def _update_barcode_meta(self, sample_id, barcode_meta): + """Update barcode-specific metadata + + NB: As with validation, deferring on a more elegant way to handle + table selection for where to store this data. + + Parameters + ---------- + sample_id : str, uuid + The associated sample ID for which to store metadata + barcode_meta : dict + The key:value pairs to store + """ + with self._transaction.cursor() as cur: + # First, delete existing values + cur.execute( + "DELETE FROM ag.ag_kit_barcodes_cheek " + "WHERE ag_kit_barcode_id = %s", + (sample_id, ) + ) + # Then, insert new values if the dict isn't empty + if len(barcode_meta) > 0: + cur.execute( + "INSERT INTO ag.ag_kit_barcodes_cheek " + "(ag_kit_barcode_id, sample_site_last_washed_date, " + "sample_site_last_washed_time, " + "sample_site_last_washed_product) " + "VALUES (%s, %s, %s, %s)", + (sample_id, barcode_meta['sample_site_last_washed_date'], + barcode_meta['sample_site_last_washed_time'], + barcode_meta['sample_site_last_washed_product']) + ) + + def _get_barcode_meta(self, sample_id): + """ Retrieve any barcode-specific metadata + + Parameters + ---------- + sample_id : str, uuid + The associated sample ID for which to retrieve metadata + + Returns + ------- + dict + The sample-specific metadata, or an empty dict + """ + with self._transaction.dict_cursor() as cur: + cur.execute( + "SELECT sample_site_last_washed_date, " + "sample_site_last_washed_time, " + "sample_site_last_washed_product " + "FROM ag.ag_kit_barcodes_cheek " + "WHERE ag_kit_barcode_id = %s", + (sample_id, ) + ) + row = cur.fetchone() + if row is None: + return {} + else: + # We need to transform the date/time fields back to what the + # interface works with + ret_dict = { + 'sample_site_last_washed_date': row[ + 'sample_site_last_washed_date' + ], + 'sample_site_last_washed_time': row[ + 'sample_site_last_washed_time' + ], + 'sample_site_last_washed_product': row[ + 'sample_site_last_washed_product' + ] + } + if ret_dict['sample_site_last_washed_date'] is not None: + ret_dict['sample_site_last_washed_date'] = \ + ret_dict['sample_site_last_washed_date'].strftime( + self.SAMPLE_SITE_LAST_WASHED_DATE_FORMAT + ) + if ret_dict['sample_site_last_washed_time'] is not None: + ret_dict['sample_site_last_washed_time'] = \ + ret_dict['sample_site_last_washed_time'].strftime( + self.SAMPLE_SITE_LAST_WASHED_TIME_FORMAT_STRFTIME + ) + return ret_dict + def associate_sample(self, account_id, source_id, sample_id, override_locked=False): with self._transaction.cursor() as cur: @@ -467,3 +577,62 @@ def scrub(self, account_id, source_id, sample_id): raise RepoException("Invalid source / sample relation") else: return True + + def _validate_barcode_meta(self, sample_site, barcode_meta): + """ Validate the barcode_meta fields/values provided + + NB: I'm deferring on a more elegant validation system until/unless + we decide whether barcode-specific metadata will remain in one table + per sample site, a key-value pairing table, or something else. + + Parameters + ---------- + sample_site : str + The sample site + barcode_meta : dict + Key:Value pairings of field_name:field_value + + Returns + ------- + Dict with database-ready values if valid, else False + """ + + # If the barcode_meta dict is empty, we can immediately pass it. This + # will allow the repo to purge existing values without adding a new + # record. + if len(barcode_meta) == 0: + return {} + + # Only Cheek samples should have barcode_meta values. If it's not a + # Cheek sample, immediately fail it. + if sample_site != "Cheek": + return False + + ret_dict = {} + bc_valid = True + for fn, fv in barcode_meta.items(): + if fn == "sample_site_last_washed_date": + try: + ret_val = datetime.datetime.strptime( + fv, + self.SAMPLE_SITE_LAST_WASHED_DATE_FORMAT + ) + ret_dict[fn] = ret_val + except ValueError: + bc_valid = False + elif fn == "sample_site_last_washed_time": + try: + ret_val = datetime.datetime.strptime( + fv, + self.SAMPLE_SITE_LAST_WASHED_TIME_FORMAT_STRPTIME + ) + ret_dict[fn] = ret_val + except ValueError: + bc_valid = False + elif fn == "sample_site_last_washed_product": + # The ENUM type in the database will validate the value + ret_dict[fn] = fv + else: + bc_valid = False + + return False if bc_valid is False else ret_dict diff --git a/microsetta_private_api/repo/tests/test_sample.py b/microsetta_private_api/repo/tests/test_sample.py index e917ed43..4fb2d58b 100644 --- a/microsetta_private_api/repo/tests/test_sample.py +++ b/microsetta_private_api/repo/tests/test_sample.py @@ -3,6 +3,8 @@ from microsetta_private_api.repo.admin_repo import AdminRepo from microsetta_private_api.exceptions import RepoException from microsetta_private_api.repo.transaction import Transaction +from microsetta_private_api.model.sample import SampleInfo +import datetime class SampleTests(unittest.TestCase): @@ -168,6 +170,97 @@ def test_get_supplied_kit_id_by_sample(self): ) self.assertEqual(kit_id, supplied_kit_id) + def test_validate_barcode_meta_pass(self): + with Transaction() as t: + sample_repo = SampleRepo(t) + + # Build a barcode_meta dict matching what we expect for cheek + # samples with all fields completed + bc_meta = { + "sample_site_last_washed_date": "01/10/2025", + "sample_site_last_washed_time": "9:30 AM", + "sample_site_last_washed_product": "Face cleanser" + } + bc_valid = sample_repo._validate_barcode_meta("Cheek", bc_meta) + self.assertNotEqual(bc_valid, False) + + # Confirm that empty dicts pass, regardless of site + bc_meta = {} + bc_valid = sample_repo._validate_barcode_meta("Stool", bc_meta) + self.assertEqual(bc_valid, {}) + + def test_validate_barcode_meta_fail(self): + with Transaction() as t: + sample_repo = SampleRepo(t) + # Try using an invalid field name + bc_meta = { + "my_life_story": "I've done stuff and things" + } + bc_valid = sample_repo._validate_barcode_meta("Cheek", bc_meta) + self.assertFalse(bc_valid) + + # Try using an invalid site + bc_meta = { + "sample_site_last_washed_product": "Face cleanser" + } + bc_valid = sample_repo._validate_barcode_meta("Stool", bc_meta) + self.assertFalse(bc_valid) + + def test_update_barcode_meta_via_update_info(self): + # We're going to use a stable sample and override_locked to test + # the barcode meta update via update_info() + sample_id = "d8592c74-85f0-2135-e040-8a80115d6401" + bc_meta = { + "sample_site_last_washed_date": "01/10/2025", + "sample_site_last_washed_time": "9:30 AM", + "sample_site_last_washed_product": "Face cleanser" + } + sample_info = SampleInfo( + sample_id, + datetime.datetime.now(), + "Cheek", + "", + bc_meta + ) + + with Transaction() as t: + account_id, source_id = self._get_source_from_sample(t, sample_id) + + sample_repo = SampleRepo(t) + sample_repo.update_info(account_id, source_id, sample_info, True) + + sample = sample_repo.get_sample(account_id, source_id, sample_id) + self.assertEqual(bc_meta, sample.barcode_meta) + + def test_get_barcode_meta(self): + # First, we need to set the barcode metadata. Same process as + # test_update_barcode_meta_via_update_info() + sample_id = "d8592c74-85f0-2135-e040-8a80115d6401" + bc_meta = { + "sample_site_last_washed_date": "01/10/2025", + "sample_site_last_washed_time": "9:30 AM", + "sample_site_last_washed_product": "Face cleanser" + } + sample_info = SampleInfo( + sample_id, + datetime.datetime.now(), + "Cheek", + "", + bc_meta + ) + + with Transaction() as t: + account_id, source_id = self._get_source_from_sample(t, sample_id) + + sample_repo = SampleRepo(t) + sample_repo.update_info(account_id, source_id, sample_info, True) + + # Then, we'll get the sample and confirm that the sample's + # barcode_meta property matches the above input + sample = sample_repo.get_sample(account_id, source_id, sample_id) + print(sample.barcode_meta) + self.assertEqual(bc_meta, sample.barcode_meta) + if __name__ == '__main__': unittest.main()