From 405f3626f5966fa49ee8ee055447966245319728 Mon Sep 17 00:00:00 2001 From: DerekFurstPitt Date: Tue, 24 Oct 2023 14:29:47 -0400 Subject: [PATCH 1/4] Modified queries that use the number of relationships to the nearest sample ancestor to determine whether it was primary or processed to instead rely on the creation_action property of its activity ancestor. Fixed misc instances of <-()-[]<-() throughout --- src/app_neo4j_queries.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/app_neo4j_queries.py b/src/app_neo4j_queries.py index fa613c0e..1af4e565 100644 --- a/src/app_neo4j_queries.py +++ b/src/app_neo4j_queries.py @@ -579,8 +579,9 @@ def get_prov_info(neo4j_driver, param_dict, published_only): if published_only: published_only_query_string = f" AND toUpper(ds.status) = 'PUBLISHED'" published_only_revisions_string = f" WHERE toUpper(rev.status) = 'PUBLISHED'" - query = (f"MATCH (ds:Dataset)<-[:ACTIVITY_OUTPUT]-(a)<-[:ACTIVITY_INPUT]-(firstSample:Sample)<-[*]-(donor:Donor)" + query = (f"MATCH (ds:Dataset)<-[:ACTIVITY_OUTPUT]-(a:ACTIVITY)<-[*]-(firstSample:Sample)<-[*]-(donor:Donor)" f"WHERE not (ds)-[:REVISION_OF]->(:Dataset)" + f" AND NOT toLower(a.creation_action) ENDS WITH 'process'" f"{group_uuid_query_string}" f"{dataset_status_query_string}" f"{published_only_query_string}" @@ -598,7 +599,8 @@ def get_prov_info(neo4j_driver, param_dict, published_only): f" {organ_query_string} (donor)-[:ACTIVITY_INPUT]->(oa)-[:ACTIVITY_OUTPUT]->(organ:Sample {{sample_category:'organ'}})-[*]->(ds)" f" {organ_where_clause}" f" WITH ds, FIRSTSAMPLE, DONOR, REVISIONS, METASAMPLE, RUISAMPLE, COLLECT(DISTINCT organ) AS ORGAN " - f" OPTIONAL MATCH (ds)-[:ACTIVITY_INPUT]->(a3)-[:ACTIVITY_OUTPUT]->(processed_dataset:Dataset)" + f" OPTIONAL MATCH (ds)-[*]->(a3)-[:ACTIVITY_OUTPUT]->(processed_dataset:Dataset)" + f" WHERE toLower(a3.creation_action) ENDS WITH 'process'" f" WITH ds, FIRSTSAMPLE, DONOR, REVISIONS, METASAMPLE, RUISAMPLE, ORGAN, COLLECT(distinct processed_dataset) AS PROCESSED_DATASET" f" RETURN ds.uuid, FIRSTSAMPLE, DONOR, RUISAMPLE, ORGAN, ds.hubmap_id, ds.status, ds.group_name," f" ds.group_uuid, ds.created_timestamp, ds.created_by_user_email, ds.last_modified_timestamp, " @@ -695,7 +697,8 @@ def get_individual_prov_info(neo4j_driver, dataset_uuid): # specimen_type -> sample_category 12/15/2022 f" OPTIONAL match (donor)-[:ACTIVITY_INPUT]->(oa)-[:ACTIVITY_OUTPUT]->(organ:Sample {{sample_category:'organ'}})-[*]->(ds)" f" WITH ds, FIRSTSAMPLE, DONOR, METASAMPLE, RUISAMPLE, COLLECT(distinct organ) AS ORGAN " - f" OPTIONAL MATCH (ds)-[:ACTIVITY_INPUT]->(a3)-[:ACTIVITY_OUTPUT]->(processed_dataset:Dataset)" + f" OPTIONAL MATCH (ds)-[*]->(a3)-[:ACTIVITY_OUTPUT]->(processed_dataset:Dataset)" + f" WHERE WHERE toLower(a3.creation_action) ENDS WITH 'process'" f" WITH ds, FIRSTSAMPLE, DONOR, METASAMPLE, RUISAMPLE, ORGAN, COLLECT(distinct processed_dataset) AS PROCESSED_DATASET" f" RETURN ds.uuid, FIRSTSAMPLE, DONOR, RUISAMPLE, ORGAN, ds.hubmap_id, ds.status, ds.group_name," f" ds.group_uuid, ds.created_timestamp, ds.created_by_user_email, ds.last_modified_timestamp, " @@ -867,7 +870,7 @@ def get_sample_prov_info(neo4j_driver, param_dict, public_only): # specimen_type -> sample_category 12/15/2022 f" OPTIONAL MATCH (s)<-[*]-(organ:Sample{{sample_category: 'organ'}})" f" WITH s, organ, d" - f" MATCH (s)<-[]-()<-[]-(da)" + f" MATCH (s)<-[:ACTIVITY_OUTPUT]-(:ACTIVITY)<-[:ACTIVITY_INPUT]-(da)" f" RETURN s.uuid, s.lab_tissue_sample_id, s.group_name, s.created_by_user_email, s.metadata, s.rui_location," f" d.uuid, d.metadata, organ.uuid, organ.sample_category, organ.metadata, da.uuid, da.entity_type, " f"s.sample_category, organ.organ, s.organ, s.hubmap_id, s.submission_id, organ.hubmap_id, organ.submission_id, " @@ -976,7 +979,7 @@ def get_paired_dataset(neo4j_driver, uuid, data_type, search_depth): number_of_jumps = f"*..{search_depth}" data_type = f"['{data_type}']" query = ( - f'MATCH (ds:Dataset)<-[*]-(s:Sample) WHERE ds.uuid = "{uuid}" AND (:Dataset)<-[]-()<-[]-(s)' + f'MATCH (ds:Dataset)<-[*]-(s:Sample) WHERE ds.uuid = "{uuid}" AND (:Dataset)<-[:ACTIVITY_OUTPUT]-(:ACTIVITY)<-[:ACTIVITY_INPUT]-(s)' f'MATCH (ods)<-[{number_of_jumps}]-(s) WHERE ods.data_types = "{data_type}"' f'return ods.uuid as uuid, ods.status as status' ) From 00177fbe2cf7ce010676c74133e7e2bfc9264c04 Mon Sep 17 00:00:00 2001 From: DerekFurstPitt Date: Tue, 24 Oct 2023 14:30:42 -0400 Subject: [PATCH 2/4] Modified queries that use the number of relationships to the nearest sample ancestor to determine whether it was primary or processed to instead rely on the creation_action property of its activity ancestor. Fixed misc instances of -()-[]<-() throughout --- src/app_neo4j_queries.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/app_neo4j_queries.py b/src/app_neo4j_queries.py index 1af4e565..f3c34bdd 100644 --- a/src/app_neo4j_queries.py +++ b/src/app_neo4j_queries.py @@ -686,7 +686,7 @@ def get_prov_info(neo4j_driver, param_dict, published_only): """ def get_individual_prov_info(neo4j_driver, dataset_uuid): query = (f"MATCH (ds:Dataset {{uuid: '{dataset_uuid}'}})<-[*]-(firstSample:Sample)<-[*]-(donor:Donor)" - f" WHERE (:Dataset)<-[]-()<-[]-(firstSample)" + f" WHERE (:Dataset)<-[:ACTIVITY_OUTPUT]-(:ACTIVITY)<-[:ACTIVITY_INPUT]-(firstSample)" f" WITH ds, COLLECT(distinct donor) AS DONOR, COLLECT(distinct firstSample) AS FIRSTSAMPLE" f" OPTIONAL MATCH (ds)<-[*]-(metaSample:Sample)" f" WHERE NOT metaSample.metadata IS NULL AND NOT TRIM(metaSample.metadata) = ''" From 7c39d77c120ec30e669ca1dd6f5cd8716c1987f0 Mon Sep 17 00:00:00 2001 From: DerekFurstPitt Date: Tue, 24 Oct 2023 14:56:46 -0400 Subject: [PATCH 3/4] Fixed a typo in prov_info query and fixed some spacing for consistency --- src/app_neo4j_queries.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/app_neo4j_queries.py b/src/app_neo4j_queries.py index f3c34bdd..93a1e539 100644 --- a/src/app_neo4j_queries.py +++ b/src/app_neo4j_queries.py @@ -579,8 +579,8 @@ def get_prov_info(neo4j_driver, param_dict, published_only): if published_only: published_only_query_string = f" AND toUpper(ds.status) = 'PUBLISHED'" published_only_revisions_string = f" WHERE toUpper(rev.status) = 'PUBLISHED'" - query = (f"MATCH (ds:Dataset)<-[:ACTIVITY_OUTPUT]-(a:ACTIVITY)<-[*]-(firstSample:Sample)<-[*]-(donor:Donor)" - f"WHERE not (ds)-[:REVISION_OF]->(:Dataset)" + query = (f"MATCH (ds:Dataset)<-[:ACTIVITY_OUTPUT]-(a:Activity)<-[*]-(firstSample:Sample)<-[*]-(donor:Donor)" + f" WHERE not (ds)-[:REVISION_OF]->(:Dataset)" f" AND NOT toLower(a.creation_action) ENDS WITH 'process'" f"{group_uuid_query_string}" f"{dataset_status_query_string}" From 04c38e7127c83d697e94ba80f3dd9999f3462edc Mon Sep 17 00:00:00 2001 From: DerekFurstPitt Date: Tue, 24 Oct 2023 15:25:55 -0400 Subject: [PATCH 4/4] Fixed misc typos after changing generic patterns to explicit ones throughout --- src/app_neo4j_queries.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/app_neo4j_queries.py b/src/app_neo4j_queries.py index 93a1e539..129f528c 100644 --- a/src/app_neo4j_queries.py +++ b/src/app_neo4j_queries.py @@ -686,7 +686,7 @@ def get_prov_info(neo4j_driver, param_dict, published_only): """ def get_individual_prov_info(neo4j_driver, dataset_uuid): query = (f"MATCH (ds:Dataset {{uuid: '{dataset_uuid}'}})<-[*]-(firstSample:Sample)<-[*]-(donor:Donor)" - f" WHERE (:Dataset)<-[:ACTIVITY_OUTPUT]-(:ACTIVITY)<-[:ACTIVITY_INPUT]-(firstSample)" + f" WHERE (:Dataset)<-[:ACTIVITY_OUTPUT]-(:Activity)<-[:ACTIVITY_INPUT]-(firstSample)" f" WITH ds, COLLECT(distinct donor) AS DONOR, COLLECT(distinct firstSample) AS FIRSTSAMPLE" f" OPTIONAL MATCH (ds)<-[*]-(metaSample:Sample)" f" WHERE NOT metaSample.metadata IS NULL AND NOT TRIM(metaSample.metadata) = ''" @@ -698,7 +698,7 @@ def get_individual_prov_info(neo4j_driver, dataset_uuid): f" OPTIONAL match (donor)-[:ACTIVITY_INPUT]->(oa)-[:ACTIVITY_OUTPUT]->(organ:Sample {{sample_category:'organ'}})-[*]->(ds)" f" WITH ds, FIRSTSAMPLE, DONOR, METASAMPLE, RUISAMPLE, COLLECT(distinct organ) AS ORGAN " f" OPTIONAL MATCH (ds)-[*]->(a3)-[:ACTIVITY_OUTPUT]->(processed_dataset:Dataset)" - f" WHERE WHERE toLower(a3.creation_action) ENDS WITH 'process'" + f" WHERE toLower(a3.creation_action) ENDS WITH 'process'" f" WITH ds, FIRSTSAMPLE, DONOR, METASAMPLE, RUISAMPLE, ORGAN, COLLECT(distinct processed_dataset) AS PROCESSED_DATASET" f" RETURN ds.uuid, FIRSTSAMPLE, DONOR, RUISAMPLE, ORGAN, ds.hubmap_id, ds.status, ds.group_name," f" ds.group_uuid, ds.created_timestamp, ds.created_by_user_email, ds.last_modified_timestamp, " @@ -870,7 +870,7 @@ def get_sample_prov_info(neo4j_driver, param_dict, public_only): # specimen_type -> sample_category 12/15/2022 f" OPTIONAL MATCH (s)<-[*]-(organ:Sample{{sample_category: 'organ'}})" f" WITH s, organ, d" - f" MATCH (s)<-[:ACTIVITY_OUTPUT]-(:ACTIVITY)<-[:ACTIVITY_INPUT]-(da)" + f" MATCH (s)<-[:ACTIVITY_OUTPUT]-(:Activity)<-[:ACTIVITY_INPUT]-(da)" f" RETURN s.uuid, s.lab_tissue_sample_id, s.group_name, s.created_by_user_email, s.metadata, s.rui_location," f" d.uuid, d.metadata, organ.uuid, organ.sample_category, organ.metadata, da.uuid, da.entity_type, " f"s.sample_category, organ.organ, s.organ, s.hubmap_id, s.submission_id, organ.hubmap_id, organ.submission_id, " @@ -979,7 +979,7 @@ def get_paired_dataset(neo4j_driver, uuid, data_type, search_depth): number_of_jumps = f"*..{search_depth}" data_type = f"['{data_type}']" query = ( - f'MATCH (ds:Dataset)<-[*]-(s:Sample) WHERE ds.uuid = "{uuid}" AND (:Dataset)<-[:ACTIVITY_OUTPUT]-(:ACTIVITY)<-[:ACTIVITY_INPUT]-(s)' + f'MATCH (ds:Dataset)<-[*]-(s:Sample) WHERE ds.uuid = "{uuid}" AND (:Dataset)<-[:ACTIVITY_OUTPUT]-(:Activity)<-[:ACTIVITY_INPUT]-(s)' f'MATCH (ods)<-[{number_of_jumps}]-(s) WHERE ods.data_types = "{data_type}"' f'return ods.uuid as uuid, ods.status as status' )