diff --git a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/flatvcf/FlatVCFProcessor.java b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/flatvcf/FlatVCFProcessor.java index b6fa85d8..925bacdd 100644 --- a/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/flatvcf/FlatVCFProcessor.java +++ b/processing/src/main/java/edu/harvard/hms/dbmi/avillach/hpds/processing/flatvcf/FlatVCFProcessor.java @@ -22,6 +22,7 @@ import java.nio.file.Files; import java.nio.file.Path; import java.util.*; +import java.util.stream.Stream; @Component public class FlatVCFProcessor implements HpdsProcessor { @@ -138,9 +139,9 @@ public void runVcfExcerptQuery(Query query, boolean includePatientData) throws I log.info("Running VCF Extract query"); - Collection variantList = abstractProcessor.getInfoStore("Gene_with_variant").getAllValues().keys(); + Collection genes = abstractProcessor.getInfoStore("Gene_with_variant").getAllValues().keys(); - log.debug("variantList Size " + variantList.size()); + log.debug("variantList Size " + genes.size()); PhenoCube idCube = null; if(!ID_CUBE_NAME.contentEquals("NONE")) { @@ -195,8 +196,11 @@ public void runVcfExcerptQuery(Query query, boolean includePatientData) throws I writer.writeRow(builder.toString()); VariantBucketHolder variantMaskBucketHolder = new VariantBucketHolder(); + //loop over the variants identified, and build an output row - variantList.stream() + genes.stream() + .map(this::createSingleGeneQuery) + .flatMap(this::safeGetVariantList) .map(variant -> metadataIndex.findByMultipleVariantSpec(List.of(variant))) .filter(Objects::nonNull) .flatMap(m -> m.entrySet().stream()) @@ -206,6 +210,22 @@ public void runVcfExcerptQuery(Query query, boolean includePatientData) throws I writer.complete(); } + private Stream safeGetVariantList(Query query) { + try { + return abstractProcessor.getVariantList(query).stream(); + } catch (IOException e) { + return Stream.empty(); + } + } + + private Query createSingleGeneQuery(String gene) { + var q = new Query(); + Query.VariantInfoFilter v = new Query.VariantInfoFilter(); + v.categoryVariantInfoFilters = Map.of("Gene_with_variant", new String[] {gene}); + q.setVariantInfoFilters(List.of(v)); + return q; + } + private String createRow(boolean includePatientData, Map.Entry entry, VariantBucketHolder variantMaskBucketHolder, BigInteger patientMasks, Map patientIndexMap) { StringBuilder stringBuilder = new StringBuilder(); String variantSpec = entry.getKey();