Skip to content

Commit

Permalink
ALS-6330: Fix test compile issues. Add variant list processor integra…
Browse files Browse the repository at this point in the history
…tion test. Fix bug caused by bitmask paddings
  • Loading branch information
ramari16 committed Jun 14, 2024
1 parent 9d17741 commit 396606b
Show file tree
Hide file tree
Showing 16 changed files with 149 additions and 24 deletions.
4 changes: 0 additions & 4 deletions integration-test/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,6 @@
<groupId>edu.harvard.hms.dbmi.avillach.hpds</groupId>
<artifactId>etl</artifactId>
</dependency>
<dependency>
<groupId>edu.harvard.hms.dbmi.avillach.hpds</groupId>
<artifactId>data</artifactId>
</dependency>
<dependency>
<groupId>edu.harvard.hms.dbmi.avillach.hpds</groupId>
<artifactId>service</artifactId>
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ public String runVcfExcerptQuery(Query query, boolean includePatientData) throws
log.debug("identified " + patientSubset.size() + " patients from query");
Map<String, Integer> patientIndexMap = new LinkedHashMap<String, Integer>(); //keep a map for quick index lookups
VariantMask patientMasks = abstractProcessor.createMaskForPatientSet(patientSubset);
int index = 2; //variant bitmasks are bookended with '11'
int index = 0;


for(String patientId : abstractProcessor.getPatientIds()) {
Expand Down Expand Up @@ -270,13 +270,13 @@ public String runVcfExcerptQuery(Query query, boolean includePatientData) throws

// Patient count = (hetero mask | homo mask) & patient mask
VariantMask heteroOrHomoMask = orNullableMasks(heteroMask, homoMask);
int patientCount = heteroOrHomoMask == null ? 0 : (heteroOrHomoMask.intersection(patientMasks).bitCount() - 4);
int patientCount = heteroOrHomoMask == null ? 0 : (heteroOrHomoMask.intersection(patientMasks).bitCount());

int bitCount = masks.heterozygousMask == null? 0 : (masks.heterozygousMask.bitCount() - 4);
bitCount += masks.homozygousMask == null? 0 : (masks.homozygousMask.bitCount() - 4);
int bitCount = masks.heterozygousMask == null? 0 : (masks.heterozygousMask.bitCount());
bitCount += masks.homozygousMask == null? 0 : (masks.homozygousMask.bitCount());

//count how many patients have genomic data available
Integer patientsWithVariantsCount = patientMasks.bitCount() - 4;
Integer patientsWithVariantsCount = patientMasks.bitCount();


// (patients with/total) in subset \t (patients with/total) out of subset.
Expand Down
5 changes: 5 additions & 0 deletions service/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@
<groupId>edu.harvard.hms.dbmi.avillach.hpds</groupId>
<artifactId>data</artifactId>
</dependency>
<dependency>
<groupId>edu.harvard.hms.dbmi.avillach.hpds</groupId>
<artifactId>etl</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>edu.harvard.hms.dbmi.avillach.hpds</groupId>
<artifactId>client-api</artifactId>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
SMALL_JOB_LIMIT = 100
SMALL_TASK_THREADS = 1
LARGE_TASK_THREADS = 1
VCF_EXCERPT_ENABLED=true

hpds.genomicProcessor.impl=local
HPDS_GENOMIC_DATA_DIRECTORY=target/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,12 @@
import java.util.*;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;


@ExtendWith(SpringExtension.class)
@EnableAutoConfiguration
@Disabled
//@SpringBootTest(classes = edu.harvard.hms.dbmi.avillach.hpds.service.HpdsApplication.class)
@SpringBootTest(classes = edu.harvard.hms.dbmi.avillach.hpds.service.HpdsApplication.class)
@ActiveProfiles("integration-test")
public class AbstractProcessorIntegrationTest {

Expand All @@ -46,7 +46,10 @@ public void getPatientSubsetForQuery_validGeneWithVariantQuery() {
query.setVariantInfoFilters(variantInfoFilters);

Set<Integer> idList = abstractProcessor.getPatientSubsetForQuery(query);
assertEquals(36, idList.size());
assertEquals(16, idList.size());
assertTrue(idList.contains(200972));
assertTrue(idList.contains(200971));
assertTrue(idList.contains(200975));
}

@Test
Expand All @@ -59,7 +62,7 @@ public void getPatientSubsetForQuery_validGeneWithMultipleVariantQuery() {
query.setVariantInfoFilters(variantInfoFilters);

Set<Integer> idList = abstractProcessor.getPatientSubsetForQuery(query);
assertEquals(39, idList.size());
assertEquals(22, idList.size());
}

@Test
Expand All @@ -73,7 +76,7 @@ public void getPatientSubsetForQuery_validGeneWithVariantQueryAndNumericQuery()
query.setNumericFilters(Map.of("\\open_access-1000Genomes\\data\\SYNTHETIC_AGE\\", new Filter.DoubleFilter(35.0, 45.0)));

Set<Integer> idList = abstractProcessor.getPatientSubsetForQuery(query);
assertEquals(8, idList.size());
assertEquals(4, idList.size());
}

@Test
Expand Down Expand Up @@ -210,7 +213,7 @@ public void getPatientSubsetForQuery_validContinuousGenomicFilter() {
query.setVariantInfoFilters(variantInfoFilters);

Set<Integer> idList = abstractProcessor.getPatientSubsetForQuery(query);
assertEquals(24, idList.size());
assertEquals(8, idList.size());
}

// todo: test variant filters that use the phenotipic query, and edge cases
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
package edu.harvard.hms.dbmi.avillach.hpds.test;

import edu.harvard.hms.dbmi.avillach.hpds.data.query.Filter;
import edu.harvard.hms.dbmi.avillach.hpds.data.query.Query;
import edu.harvard.hms.dbmi.avillach.hpds.processing.VariantListProcessor;
import edu.harvard.hms.dbmi.avillach.hpds.test.util.BuildIntegrationTestEnvironment;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.test.context.ActiveProfiles;
import org.springframework.test.context.junit.jupiter.SpringExtension;

import java.io.IOException;
import java.util.*;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;

@ExtendWith(SpringExtension.class)
@EnableAutoConfiguration
@SpringBootTest(classes = edu.harvard.hms.dbmi.avillach.hpds.service.HpdsApplication.class)
@ActiveProfiles("integration-test")
public class VariantListProcessorIntegrationTest {

private static Logger log = LoggerFactory.getLogger(VariantListProcessorIntegrationTest.class);

@Autowired
private VariantListProcessor variantListProcessor;

@BeforeAll
public static void beforeAll() {
BuildIntegrationTestEnvironment instance = BuildIntegrationTestEnvironment.INSTANCE;
}

@Test
public void runVcfExcerptQuery_validGeneWithVariantQuery() throws IOException {
Query query = new Query();
List<Query.VariantInfoFilter> variantInfoFilters = new ArrayList<>();
Query.VariantInfoFilter variantInfoFilter = new Query.VariantInfoFilter();
variantInfoFilter.categoryVariantInfoFilters = Map.of("Gene_with_variant", new String[]{"LOC102723996"});
variantInfoFilters.add(variantInfoFilter);
query.setVariantInfoFilters(variantInfoFilters);

String vcfExerpt = variantListProcessor.runVcfExcerptQuery(query, true);
log.debug(vcfExerpt);
String[] vcfExcerptLines = vcfExerpt.split("\\n");

int totalExpectedPatients = 16;
int totalExpectedVariants = 4;

// there should be a line per variant, plus one line for the header
assertEquals(totalExpectedVariants + 1, vcfExcerptLines.length);
List<String> header = Arrays.asList(vcfExcerptLines[0].split("\\t"));
String[] variantLines = Arrays.copyOfRange(vcfExcerptLines, 1, vcfExcerptLines.length);
Arrays.stream(variantLines).forEach(line -> {
String[] columns = line.split("\\t");
assertEquals("chr21", columns[0]);
int patientCount = 0;
for (String column : columns) {
if ("1/1".equals(column) || "0/1".equals(column))
patientCount++;
}
assertTrue(patientCount > 0);
assertEquals(patientCount + "/" + totalExpectedPatients, getValueAtColumn(columns, header, "Patients with this variant in subset"));
assertEquals("LOC102723996", getValueAtColumn(columns, header, "Gene_with_variant"));
});
}


@Test
public void runVcfExcerptQuery_validGeneWithVariantAndPhenoQuery() throws IOException {
Query query = new Query();
List<Query.VariantInfoFilter> variantInfoFilters = new ArrayList<>();
Query.VariantInfoFilter variantInfoFilter = new Query.VariantInfoFilter();
variantInfoFilter.categoryVariantInfoFilters = Map.of("Gene_with_variant", new String[]{"LOC102723996"});
variantInfoFilters.add(variantInfoFilter);
query.setVariantInfoFilters(variantInfoFilters);
query.setNumericFilters(Map.of("\\open_access-1000Genomes\\data\\SYNTHETIC_AGE\\", new Filter.DoubleFilter(35.0, 45.0)));

String vcfExerpt = variantListProcessor.runVcfExcerptQuery(query, true);
log.debug(vcfExerpt);
String[] vcfExcerptLines = vcfExerpt.split("\\n");

int totalExpectedPatients = 4;
int totalExpectedVariants = 2;

// there should be a line per variant, plus one line for the header
assertEquals(totalExpectedVariants + 1, vcfExcerptLines.length);
List<String> header = Arrays.asList(vcfExcerptLines[0].split("\\t"));
String[] variantLines = Arrays.copyOfRange(vcfExcerptLines, 1, vcfExcerptLines.length);
Arrays.stream(variantLines).forEach(line -> {
String[] columns = line.split("\\t");
assertEquals("chr21", columns[0]);
int patientCount = 0;
for (String column : columns) {
if ("1/1".equals(column) || "0/1".equals(column))
patientCount++;
}
assertTrue(patientCount > 0);
assertEquals(patientCount + "/" + totalExpectedPatients, getValueAtColumn(columns, header, "Patients with this variant in subset"));
assertEquals("LOC102723996", getValueAtColumn(columns, header, "Gene_with_variant"));
});
}

@Test
public void runVcfExcerptQuery_validQueryNoResults() throws IOException {
Query query = new Query();
List<Query.VariantInfoFilter> variantInfoFilters = new ArrayList<>();
Query.VariantInfoFilter variantInfoFilter = new Query.VariantInfoFilter();
variantInfoFilter.categoryVariantInfoFilters = Map.of("Gene_with_variant", new String[]{"LOC102723996"});
variantInfoFilters.add(variantInfoFilter);
query.setVariantInfoFilters(variantInfoFilters);
query.setNumericFilters(Map.of("\\open_access-1000Genomes\\data\\SYNTHETIC_AGE\\", new Filter.DoubleFilter(0.0, 1.0)));

String vcfExerpt = variantListProcessor.runVcfExcerptQuery(query, true);
assertEquals("No Variants Found", vcfExerpt);

}

private static String getValueAtColumn(String[] rowColumns, List<String> header, String key) {
return rowColumns[header.indexOf(key)];
}
}

Large diffs are not rendered by default.

0 comments on commit 396606b

Please sign in to comment.