From 292e66a2d3a50923a899bc4c61ab500ec9a2e8eb Mon Sep 17 00:00:00 2001 From: ramari16 Date: Tue, 22 Oct 2024 10:23:36 -0400 Subject: [PATCH] Fix bucket index loading for non-variant explorer environments (#123) --- .../processing/GenomicProcessorNodeImpl.java | 5 +---- .../hpds/processing/VariantService.java | 18 +++++++++++++----- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/GenomicProcessorNodeImpl.java b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/GenomicProcessorNodeImpl.java index 68e0140b..ea612b89 100644 --- a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/GenomicProcessorNodeImpl.java +++ b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/GenomicProcessorNodeImpl.java @@ -35,8 +35,6 @@ public class GenomicProcessorNodeImpl implements GenomicProcessor { private final VariantService variantService; - private final VariantMetadataIndex variantMetadataIndex; - private final String HOMOZYGOUS_VARIANT = "1/1"; private final String HETEROZYGOUS_VARIANT = "0/1"; @@ -46,7 +44,6 @@ public GenomicProcessorNodeImpl(String genomicDataDirectory) { this.genomicDataDirectory = genomicDataDirectory; this.variantService = new VariantService(genomicDataDirectory); this.patientVariantJoinHandler = new PatientVariantJoinHandler(variantService); - this.variantMetadataIndex = VariantMetadataIndex.createInstance(genomicDataDirectory); infoStores = new HashMap<>(); File genomicDataDirectoryFile = new File(this.genomicDataDirectory); @@ -402,6 +399,6 @@ public List getInfoColumnMeta() { @Override public Map> getVariantMetadata(Collection variantList) { - return variantMetadataIndex.findByMultipleVariantSpec(variantList); + return variantService.findByMultipleVariantSpec(variantList); } } diff --git a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/VariantService.java b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/VariantService.java index 0981a6d1..c8f94850 100644 --- a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/VariantService.java +++ b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/VariantService.java @@ -1,9 +1,6 @@ package edu.harvard.hms.dbmi.avillach.hpds.processing; -import edu.harvard.hms.dbmi.avillach.hpds.data.genotype.BucketIndexBySample; -import edu.harvard.hms.dbmi.avillach.hpds.data.genotype.VariableVariantMasks; -import edu.harvard.hms.dbmi.avillach.hpds.data.genotype.VariantMasks; -import edu.harvard.hms.dbmi.avillach.hpds.data.genotype.VariantStore; +import edu.harvard.hms.dbmi.avillach.hpds.data.genotype.*; import edu.harvard.hms.dbmi.avillach.hpds.data.genotype.caching.VariantBucketHolder; import edu.harvard.hms.dbmi.avillach.hpds.storage.FileBackedByteIndexedStorage; import edu.harvard.hms.dbmi.avillach.hpds.storage.FileBackedJavaIndexedStorage; @@ -38,6 +35,8 @@ public class VariantService { private String[] variantIndex = null; private BucketIndexBySample bucketIndex; + private final VariantMetadataIndex variantMetadataIndex; + public String[] getVariantIndex() { return variantIndex; } @@ -60,6 +59,8 @@ public VariantService(String genomicDataDirectory) { BUCKET_INDEX_BY_SAMPLE_FILE = genomicDataDirectory + "BucketIndexBySample.javabin"; variantStore = loadVariantStore(); + + this.variantMetadataIndex = VariantMetadataIndex.createInstance(genomicDataDirectory); try { loadGenomicCacheFiles(); } catch (Exception e) { @@ -162,7 +163,10 @@ private void loadGenomicCacheFiles() throws FileNotFoundException, IOException, log.info("Found " + variantIndex.length + " total variants."); } } - if(variantStore.getPatientIds().length > 0 && !new File(BUCKET_INDEX_BY_SAMPLE_FILE).exists()) { + // todo: not loading bucket index when there is no variant metadata index is a temporary fix for non-variant explorer environments + // once we start building the bucket index as part of the ETL, we can remove this check and leverage the bucket index + // for all genomic queries + if(variantStore.getPatientIds().length > 0 && variantMetadataIndex != null && !new File(BUCKET_INDEX_BY_SAMPLE_FILE).exists()) { log.info("creating new " + BUCKET_INDEX_BY_SAMPLE_FILE); bucketIndex = new BucketIndexBySample(variantStore, genomicDataDirectory); try ( @@ -204,4 +208,8 @@ public List getMasksForDbSnpSpec(String variantName) { public BigInteger emptyBitmask() { return variantStore.emptyBitmask(); } + + public Map> findByMultipleVariantSpec(Collection variantList) { + return variantMetadataIndex.findByMultipleVariantSpec(variantList); + } }