From defb6c2e982ef7d4a1b8a85b6d833c007aacf380 Mon Sep 17 00:00:00 2001 From: freemabd Date: Fri, 22 Nov 2024 13:15:50 -0600 Subject: [PATCH] config with vid as primary id --- ui/src/criteria/filterableGroup.tsx | 2 +- .../core/FilterableGroupFilterBuilder.java | 2 +- .../criteriaselector/variant/selector.json | 15 ++++ .../criteriaselector/variant/variant.json | 81 +++++++++++++++++++ .../datamapping/aouCT/entity/variant/all.sql | 47 +++++++++++ .../aouCT/entity/variant/entity.json | 22 +++++ .../variantPerson/entityGroup.json | 14 ++++ .../entitygroup/variantPerson/idPairs.sql | 3 + .../variantPerson/rollupCounts.sql | 2 + .../underlay/aouSC2023Q3R2/underlay.json | 9 ++- .../FilterableGroupFilterBuilderTest.java | 4 +- 11 files changed, 195 insertions(+), 6 deletions(-) create mode 100644 underlay/src/main/resources/config/criteria/aouCT/criteriaselector/variant/selector.json create mode 100644 underlay/src/main/resources/config/criteria/aouCT/criteriaselector/variant/variant.json create mode 100644 underlay/src/main/resources/config/datamapping/aouCT/entity/variant/all.sql create mode 100644 underlay/src/main/resources/config/datamapping/aouCT/entity/variant/entity.json create mode 100644 underlay/src/main/resources/config/datamapping/aouCT/entitygroup/variantPerson/entityGroup.json create mode 100644 underlay/src/main/resources/config/datamapping/aouCT/entitygroup/variantPerson/idPairs.sql create mode 100644 underlay/src/main/resources/config/datamapping/aouCT/entitygroup/variantPerson/rollupCounts.sql diff --git a/ui/src/criteria/filterableGroup.tsx b/ui/src/criteria/filterableGroup.tsx index 11b925bbe..5e1e518b9 100644 --- a/ui/src/criteria/filterableGroup.tsx +++ b/ui/src/criteria/filterableGroup.tsx @@ -1054,7 +1054,7 @@ function generateFilters( filterType: tanagra.FilterFilterTypeEnum.Attribute, filterUnion: { attributeFilter: { - attribute: "variant_id", + attribute: "id", operator: tanagra.AttributeFilterOperatorEnum.Equals, values: [literalFromDataValue(query)], }, diff --git a/underlay/src/main/java/bio/terra/tanagra/filterbuilder/impl/core/FilterableGroupFilterBuilder.java b/underlay/src/main/java/bio/terra/tanagra/filterbuilder/impl/core/FilterableGroupFilterBuilder.java index 37c890cd6..9fb7647e7 100644 --- a/underlay/src/main/java/bio/terra/tanagra/filterbuilder/impl/core/FilterableGroupFilterBuilder.java +++ b/underlay/src/main/java/bio/terra/tanagra/filterbuilder/impl/core/FilterableGroupFilterBuilder.java @@ -181,7 +181,7 @@ private static EntityFilter generateFilterForQuery( underlay, entity, entity.getAttribute("rs_number"), NaryOperator.IN, List.of(literal)); } else if (query.matches("[0-9]+-[0-9]+-[A-Z]+-[A-Z]+")) { return new AttributeFilter( - underlay, entity, entity.getAttribute("variant_id"), BinaryOperator.EQUALS, literal); + underlay, entity, entity.getAttribute("id"), BinaryOperator.EQUALS, literal); } else { return new TextSearchFilter( underlay, entity, TextSearchOperator.EXACT_MATCH, query, entity.getAttribute("gene")); diff --git a/underlay/src/main/resources/config/criteria/aouCT/criteriaselector/variant/selector.json b/underlay/src/main/resources/config/criteria/aouCT/criteriaselector/variant/selector.json new file mode 100644 index 000000000..6834b3f0f --- /dev/null +++ b/underlay/src/main/resources/config/criteria/aouCT/criteriaselector/variant/selector.json @@ -0,0 +1,15 @@ +{ + "name": "tanagra-variant", + "displayName": "SNP/Indel Variant", + "isEnabledForCohorts": true, + "isEnabledForDataFeatureSets": false, + "display": { + "category": "Genomics", + "tags": null + }, + "filterBuilder": "core.FilterableGroupFilterBuilder", + "plugin": "filterableGroup", + "pluginConfig": null, + "pluginConfigFile": "variant.json", + "modifiers": null +} diff --git a/underlay/src/main/resources/config/criteria/aouCT/criteriaselector/variant/variant.json b/underlay/src/main/resources/config/criteria/aouCT/criteriaselector/variant/variant.json new file mode 100644 index 000000000..4546bce61 --- /dev/null +++ b/underlay/src/main/resources/config/criteria/aouCT/criteriaselector/variant/variant.json @@ -0,0 +1,81 @@ +{ + "columns": [ + { + "key": "id", + "widthString": "100%", + "title": "Variant id" + }, + { + "key": "gene", + "widthDouble": 100, + "title": "Gene" + }, + { + "key": "rs_number", + "widthDouble": 100, + "title": "RS number" + }, + { + "key": "consequence", + "widthDouble": 100, + "title": "Consequence" + }, + { + "key": "clinvar_significance", + "widthDouble": 100, + "title": "ClinVar significance" + }, + { + "key": "protein_change", + "widthDouble": 100, + "title": "Protein change" + }, + { + "key": "allele_count", + "widthDouble": 100, + "title": "Allele count" + }, + { + "key": "allele_number", + "widthDouble": 100, + "title": "Allele number" + }, + { + "key": "allele_frequency", + "widthDouble": 100, + "title": "Allele frequency" + }, + { + "key": "t_item_count", + "widthDouble": 150, + "title": "Participant count" + } + ], + "entityGroup": "variantPerson", + "valueConfigs": [ + { + "attribute": "gene", + "title": "Gene" + }, + { + "attribute": "consequence", + "title": "Consequence" + }, + { + "attribute": "clinvar_significance", + "title": "ClinVar significance" + }, + { + "attribute": "allele_count", + "title": "Allele count" + }, + { + "attribute": "allele_number", + "title": "Allele number" + }, + { + "attribute": "allele_frequency", + "title": "Allele frequency" + } + ] +} diff --git a/underlay/src/main/resources/config/datamapping/aouCT/entity/variant/all.sql b/underlay/src/main/resources/config/datamapping/aouCT/entity/variant/all.sql new file mode 100644 index 000000000..fdfee150d --- /dev/null +++ b/underlay/src/main/resources/config/datamapping/aouCT/entity/variant/all.sql @@ -0,0 +1,47 @@ +WITH sorted_transcripts AS ( + SELECT vid, + consequence, + aa_change, + contig, + position, + ref_allele, + alt_allele, + dbsnp_rsid, + transcript, + dna_change_in_transcript, + clinvar_classification, + gvs_all_ac, + gvs_all_an, + gvs_all_af, + ROW_NUMBER() OVER(PARTITION BY vid ORDER BY CASE ARRAY_TO_STRING(consequence, ', ') + WHEN 'upstream_gene_variant' + THEN 4 + WHEN 'downstream_gene_variant' + THEN 5 + ELSE 1 + END + ASC + ) AS row_number + FROM `${omopDataset}.prep_vat` + WHERE is_canonical_transcript OR transcript IS NULL + ORDER BY vid, row_number), + genes AS ( + SELECT vid, ARRAY_TO_STRING(ARRAY_AGG(DISTINCT gene_symbol IGNORE NULLS ORDER BY gene_symbol), ', ') AS genes + FROM `${omopDataset}.prep_vat` + GROUP BY vid + ) +SELECT + sorted_transcripts.vid, + genes.genes as gene_symbol, + sorted_transcripts.dbsnp_rsid, + sorted_transcripts.consequence, + sorted_transcripts.aa_change, + sorted_transcripts.clinvar_classification, + sorted_transcripts.gvs_all_ac, + sorted_transcripts.gvs_all_an, + sorted_transcripts.gvs_all_af, + sorted_transcripts.contig, + sorted_transcripts.position +FROM sorted_transcripts, genes +WHERE genes.vid = sorted_transcripts.vid + AND (sorted_transcripts.row_number =1 or sorted_transcripts.transcript is NULL) \ No newline at end of file diff --git a/underlay/src/main/resources/config/datamapping/aouCT/entity/variant/entity.json b/underlay/src/main/resources/config/datamapping/aouCT/entity/variant/entity.json new file mode 100644 index 000000000..17365d0aa --- /dev/null +++ b/underlay/src/main/resources/config/datamapping/aouCT/entity/variant/entity.json @@ -0,0 +1,22 @@ +{ + "name": "variant", + "allInstancesSqlFile": "all.sql", + "attributes": [ + { "name": "id", "dataType": "STRING", "valueFieldName": "vid" }, + { "name": "gene", "dataType": "STRING", "valueFieldName": "gene_symbol", "isComputeDisplayHint": true }, + { "name": "rs_number", "dataType": "STRING", "isDataTypeRepeated": true, "valueFieldName": "dbsnp_rsid" }, + { "name": "consequence", "dataType": "STRING", "isDataTypeRepeated": true, "isComputeDisplayHint": true }, + { "name": "protein_change", "dataType": "STRING", "valueFieldName": "aa_change" }, + { "name": "clinvar_significance", "dataType": "STRING", "isDataTypeRepeated": true, "valueFieldName": "clinvar_classification", "isComputeDisplayHint": true }, + { "name": "allele_count", "dataType": "INT64", "valueFieldName": "gvs_all_ac", "isComputeDisplayHint": true }, + { "name": "allele_number", "dataType": "INT64", "valueFieldName": "gvs_all_an", "isComputeDisplayHint": true }, + { "name": "allele_frequency", "dataType": "DOUBLE", "valueFieldName": "gvs_all_af", "isComputeDisplayHint": true }, + { "name": "contig", "dataType": "STRING" }, + { "name": "position", "dataType": "INT64" } + ], + "idAttribute": "id", + "textSearch": { + "attributes": [ "id", "gene", "rs_number" ] + }, + "optimizeGroupByAttributes": [ "id" ] +} \ No newline at end of file diff --git a/underlay/src/main/resources/config/datamapping/aouCT/entitygroup/variantPerson/entityGroup.json b/underlay/src/main/resources/config/datamapping/aouCT/entitygroup/variantPerson/entityGroup.json new file mode 100644 index 000000000..0dbb9f2ef --- /dev/null +++ b/underlay/src/main/resources/config/datamapping/aouCT/entitygroup/variantPerson/entityGroup.json @@ -0,0 +1,14 @@ +{ + "name": "variantPerson", + "groupEntity": "variant", + "itemsEntity": "person", + "idPairsSqlFile": "idPairs.sql", + "useSourceIdPairsSql": true, + "groupEntityIdFieldName": "id", + "itemsEntityIdFieldName": "flattened_person_id", + "rollupCountsSql": { + "sqlFile": "rollupCounts.sql", + "entityIdFieldName": "id", + "rollupCountFieldName": "num_persons" + } +} diff --git a/underlay/src/main/resources/config/datamapping/aouCT/entitygroup/variantPerson/idPairs.sql b/underlay/src/main/resources/config/datamapping/aouCT/entitygroup/variantPerson/idPairs.sql new file mode 100644 index 000000000..284391e8c --- /dev/null +++ b/underlay/src/main/resources/config/datamapping/aouCT/entitygroup/variantPerson/idPairs.sql @@ -0,0 +1,3 @@ +SELECT DISTINCT vid as id, flattened_person_id +FROM `${omopDataset}.cb_variant_to_person` +CROSS JOIN UNNEST(person_ids) AS flattened_person_id diff --git a/underlay/src/main/resources/config/datamapping/aouCT/entitygroup/variantPerson/rollupCounts.sql b/underlay/src/main/resources/config/datamapping/aouCT/entitygroup/variantPerson/rollupCounts.sql new file mode 100644 index 000000000..6e8915947 --- /dev/null +++ b/underlay/src/main/resources/config/datamapping/aouCT/entitygroup/variantPerson/rollupCounts.sql @@ -0,0 +1,2 @@ +SELECT vid as id, ARRAY_LENGTH(person_ids) AS num_persons +FROM (SELECT DISTINCT vid, person_ids FROM `${omopDataset}.cb_variant_to_person`) diff --git a/underlay/src/main/resources/config/underlay/aouSC2023Q3R2/underlay.json b/underlay/src/main/resources/config/underlay/aouSC2023Q3R2/underlay.json index a0707de71..fad3714d5 100644 --- a/underlay/src/main/resources/config/underlay/aouSC2023Q3R2/underlay.json +++ b/underlay/src/main/resources/config/underlay/aouSC2023Q3R2/underlay.json @@ -58,7 +58,9 @@ "aouRT/surveySocialDeterminantsOfHealth", "aouRT/surveyCovidVaccine", "aouRT/surveyCope", - "aouRT/surveyOccurrence" + "aouRT/surveyOccurrence", + + "aouCT/variant" ], "groupItemsEntityGroups": [ "aouRT/brandIngredientConcept", @@ -77,7 +79,9 @@ "aouRT/weightPerson", "aouRT/bmiPerson", "aouRT/waistCircumferencePerson", - "aouRT/hipCircumferencePerson" + "aouRT/hipCircumferencePerson", + + "aouCT/variantPerson" ], "criteriaOccurrenceEntityGroups": [ "aouRT/conditionPerson", @@ -144,6 +148,7 @@ "aouCT/longReadWGS", "aouCT/globalDiversityArray", "aouCT/structuralVariants", + "aouCT/variant", "aouRT/bloodPressure", "aouRT/heartRate", "aouRT/height", diff --git a/underlay/src/test/java/bio/terra/tanagra/filterbuilder/FilterableGroupFilterBuilderTest.java b/underlay/src/test/java/bio/terra/tanagra/filterbuilder/FilterableGroupFilterBuilderTest.java index 0014ad8eb..44fb0be33 100644 --- a/underlay/src/test/java/bio/terra/tanagra/filterbuilder/FilterableGroupFilterBuilderTest.java +++ b/underlay/src/test/java/bio/terra/tanagra/filterbuilder/FilterableGroupFilterBuilderTest.java @@ -143,7 +143,7 @@ void selectAllQueryFilter() { EntityFilter cohortFilter = filterBuilder.buildForCohort(underlay, List.of(selectionData)); assertEquals(expectedCohortFilter, cohortFilter); - // query format: variant_id ("[0-9]+-[0-9]+-[A-Z]+-[A-Z]+") + // query format: id ("[0-9]+-[0-9]+-[A-Z]+-[A-Z]+") query = "12-34-AB-CD"; data = FilterableGroup.newBuilder() @@ -155,7 +155,7 @@ void selectAllQueryFilter() { new AttributeFilter( underlay, entity_variant, - entity_variant.getAttribute("variant_id"), + entity_variant.getAttribute("id"), BinaryOperator.EQUALS, Literal.forString(query)); expectedSubFilter =