Skip to content

Commit

Permalink
ALS-6330: Fix variant metadata merging bug, refactor variant model, a…
Browse files Browse the repository at this point in the history
…dd tests
  • Loading branch information
ramari16 committed Jun 18, 2024
1 parent b1d6559 commit 2223f66
Show file tree
Hide file tree
Showing 9 changed files with 50 additions and 39 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ public VariantMetadataIndex() throws IOException {
* @param variantSpec
* @return
*/
public String[] findBySingleVariantSpec(String variantSpec, VariantBucketHolder<String[]> bucketCache) {
public Set<String> findBySingleVariantSpec(String variantSpec, VariantBucketHolder<String[]> bucketCache) {
try {
String[] segments = variantSpec.split(",");
if (segments.length < 2) {
Expand All @@ -78,7 +78,7 @@ public String[] findBySingleVariantSpec(String variantSpec, VariantBucketHolder<
|| chrOffset != bucketCache.lastChunkOffset) {
FileBackedByteIndexedStorage<Integer, ConcurrentHashMap<String, String[]>> ContigFbbis = indexMap.get(contig);
if(ContigFbbis == null) {
return new String[0];
return Set.of();
}
bucketCache.lastValue = ContigFbbis.get(chrOffset);
bucketCache.lastContig = contig;
Expand All @@ -88,20 +88,20 @@ public String[] findBySingleVariantSpec(String variantSpec, VariantBucketHolder<
if( bucketCache.lastValue != null) {
if(bucketCache.lastValue.get(variantSpec) == null) {
log.warn("No variant data found for spec " + variantSpec);
return new String[0];
return Set.of();
}
return bucketCache.lastValue.get(variantSpec);
return Set.of(bucketCache.lastValue.get(variantSpec));
}
log.warn("No bucket found for spec " + variantSpec + " in bucket " + chrOffset);
return new String[0];
return Set.of();

} catch (UncheckedIOException e) {
log.warn("IOException caught looking up variantSpec : " + variantSpec, e);
return new String[0];
return Set.of();
}
}

public Map<String, String[]> findByMultipleVariantSpec(Collection<String> varientSpecList) {
public Map<String, Set<String>> findByMultipleVariantSpec(Collection<String> varientSpecList) {
// log.debug("SPEC list " + varientSpecList.size() + " :: " + Arrays.deepToString(varientSpecList.toArray()));

VariantBucketHolder<String[]> bucketCache = new VariantBucketHolder<String[]>();
Expand Down Expand Up @@ -164,7 +164,7 @@ public synchronized void flush() throws IOException {
if(contigFbbis == null) {
log.info("creating new file for " + contig);
String filePath = fileStoragePrefix + "_" + contig + ".bin";
contigFbbis = new FileBackedJavaIndexedStorage(Integer.class, (Class<ConcurrentHashMap<String, String[]>>)(Class<?>) ConcurrentHashMap.class, new File(filePath));
contigFbbis = new FileBackedJavaIndexedStorage(Integer.class, ConcurrentHashMap.class, new File(filePath));
indexMap.put(contig, contigFbbis);
}

Expand Down Expand Up @@ -221,20 +221,24 @@ public static void merge(VariantMetadataIndex variantMetadataIndex1, VariantMeta
String filePath = outputDirectory + VARIANT_METADATA_STORAGE_FILE_PREFIX + "_" + contig + ".bin";
FileBackedByteIndexedStorage<Integer, ConcurrentHashMap<String, String[]>> mergedFbbis = new FileBackedJavaIndexedStorage(Integer.class, ConcurrentHashMap.class, new File(filePath));

Map<Integer, ConcurrentHashMap<String, String[]>> mergedMockFbbis = new HashMap<>();

FileBackedByteIndexedStorage<Integer, ConcurrentHashMap<String, String[]>> fbbis1 = variantMetadataIndex1.indexMap.get(contig);
FileBackedByteIndexedStorage<Integer, ConcurrentHashMap<String, String[]>> fbbis2 = variantMetadataIndex2.indexMap.get(contig);

fbbis1.keys().forEach(key -> {
mergedFbbis.put(key, fbbis1.get(key));
mergedMockFbbis.put(key, fbbis1.get(key));
});
fbbis2.keys().forEach(key -> {
ConcurrentHashMap<String, String[]> metadataMap = mergedFbbis.get(key);
ConcurrentHashMap<String, String[]> metadataMap = mergedMockFbbis.get(key);
if (metadataMap == null) {
mergedFbbis.put(key, fbbis2.get(key));
mergedMockFbbis.put(key, fbbis2.get(key));
} else {
metadataMap.putAll(fbbis2.get(key));
}
});

mergedMockFbbis.forEach(mergedFbbis::put);
mergedFbbis.complete();
merged.indexMap.put(contig, mergedFbbis);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,5 +30,5 @@ public interface GenomicProcessor {
List<InfoColumnMeta> getInfoColumnMeta();

// todo: make the map value a Set instead of array
Map<String, String[]> getVariantMetadata(Collection<String> variantList);
Map<String, Set<String>> getVariantMetadata(Collection<String> variantList);
}
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ public List<InfoColumnMeta> getInfoColumnMeta() {
}

@Override
public Map<String, String[]> getVariantMetadata(Collection<String> variantList) {
public Map<String, Set<String>> getVariantMetadata(Collection<String> variantList) {
return null;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -402,7 +402,7 @@ public List<InfoColumnMeta> getInfoColumnMeta() {
}

@Override
public Map<String, String[]> getVariantMetadata(Collection<String> variantList) {
public Map<String, Set<String>> getVariantMetadata(Collection<String> variantList) {
return variantMetadataIndex.findByMultipleVariantSpec(variantList);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -141,27 +141,21 @@ public List<InfoColumnMeta> getInfoColumnMeta() {
}

@Override
public Map<String, String[]> getVariantMetadata(Collection<String> variantList) {
// this is overly complicated because of the array type.
// todo: update this when we change the method signature from array to set
public Map<String, Set<String>> getVariantMetadata(Collection<String> variantList) {
ConcurrentHashMap<String, Set<String>> result = new ConcurrentHashMap<>();
nodes.stream()
.map(node -> node.getVariantMetadata(variantList))
.forEach(variantMap -> {
variantMap.entrySet().forEach(entry -> {
Set<String> metadata = result.get(entry.getKey());
if (metadata != null) {
metadata.addAll(Set.of(entry.getValue()));
metadata.addAll(entry.getValue());
} else {
result.put(entry.getKey(), new HashSet<>(Set.of(entry.getValue())));
result.put(entry.getKey(), new HashSet<>(entry.getValue()));
}
});
});
return result.entrySet().stream()
.collect(Collectors.toMap(
Map.Entry::getKey,
entry -> entry.getValue().toArray(new String[] {})
));
return result;
}

private List<InfoColumnMeta> initInfoColumnsMeta() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -160,11 +160,11 @@ public List<InfoColumnMeta> getInfoColumnMeta() {
}

@Override
public Map<String, String[]> getVariantMetadata(Collection<String> variantList) {
public Map<String, Set<String>> getVariantMetadata(Collection<String> variantList) {
return nodes.parallelStream()
.map(node -> node.getVariantMetadata(variantList))
.reduce((p1, p2) -> {
Map<String, String[]> mapCopy = new HashMap<>(p1);
Map<String, Set<String>> mapCopy = new HashMap<>(p1);
mapCopy.putAll(p2);
return mapCopy;
}).orElseGet(Map::of);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ public void runQuery(Query query, AsyncResult asyncResult)
*
* This should not actually do any filtering based on bitmasks, just INFO columns.
*
* @param incomingQuery
* @param query
* @return a List of VariantSpec strings that would be eligible to filter patients if the incomingQuery was run as a COUNT query.
* @throws IOException
*/
Expand All @@ -96,7 +96,7 @@ public String runVariantListQuery(Query query) throws IOException {
/**
* Process only variantInfoFilters to count the number of variants that would be included in evaluating the query.
*
* @param incomingQuery
* @param query
* @return the number of variants that would be used to filter patients if the incomingQuery was run as a COUNT query.
* @throws IOException
*/
Expand All @@ -119,7 +119,7 @@ public int runVariantCount(Query query) throws IOException {
* The default patientId header value can be overridden by passing the ID_CUBE_NAME environment variable to
* the java VM.
*
* @param Query A VCF_EXCERPT type query
* @param query A VCF_EXCERPT type query
* @param includePatientData whether to include patient specific data
* @return A Tab-separated string with one line per variant and one column per patient (plus variant data columns)
* @throws IOException
Expand All @@ -141,12 +141,12 @@ public String runVcfExcerptQuery(Query query, boolean includePatientData) throws

log.debug("variantList Size " + variantList.size());

Map<String, String[]> metadata = genomicProcessor.getVariantMetadata(variantList);
Map<String, Set<String>> metadata = genomicProcessor.getVariantMetadata(variantList);

log.debug("metadata size " + metadata.size());

// Sort the variantSpecs so that the user doesn't lose their mind
TreeMap<String, String[]> metadataSorted = new TreeMap<>((o1, o2) -> {
TreeMap<String, Set<String>> metadataSorted = new TreeMap<>((o1, o2) -> {
return new VariantSpec(o1).compareTo(new VariantSpec(o2));
});
metadataSorted.putAll(metadata);
Expand Down Expand Up @@ -217,7 +217,7 @@ public String runVcfExcerptQuery(Query query, boolean includePatientData) throws
VariantBucketHolder<VariableVariantMasks> variantMaskBucketHolder = new VariantBucketHolder<VariableVariantMasks>();

//loop over the variants identified, and build an output row
metadata.forEach((String variantSpec, String[] variantMetadata)->{
metadata.forEach((String variantSpec, Set<String> variantMetadata)->{

String[] variantDataColumns = variantSpec.split(",");
//4 fixed columns in variant ID (CHROM POSITION REF ALT)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ public List<InfoColumnMeta> getInfoColumnMeta() {
}

@Override
public Map<String, String[]> getVariantMetadata(Collection<String> variantList) {
public Map<String, Set<String>> getVariantMetadata(Collection<String> variantList) {
throw new RuntimeException("Not implemented yet");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -129,16 +129,29 @@ public void getVariantList_oneNode_returnVariants() {
assertEquals(Set.of("variant1", "variant2"), variantList);
}

@Test
public void getVariantMetadata_mixedEmptyVariants_mergedCorrectly() {
List<String> variantList = List.of("variant1", "variant2", "variant3");
when(mockProcessor1.getVariantMetadata(variantList)).thenReturn(Map.of());
when(mockProcessor2.getVariantMetadata(variantList)).thenReturn(Map.of("variant1", Set.of("metadata1", "metadata2")));
when(mockProcessor3.getVariantMetadata(variantList)).thenReturn(Map.of("variant3", Set.of("metadata31", "metadata32")));

Map<String, Set<String>> variantMetadata = parentProcessor.getVariantMetadata(variantList);
assertEquals(Set.of("metadata1", "metadata2"), variantMetadata.get("variant1"));
assertEquals(Set.of("metadata31", "metadata32"), variantMetadata.get("variant3"));
assertEquals(2, variantMetadata.size());
}

@Test
public void getVariantMetadata_overlappingVariants_mergedCorrectly() {
List<String> variantList = List.of("variant1", "variant2", "variant3");
when(mockProcessor1.getVariantMetadata(variantList)).thenReturn(Map.of("variant1", new String[]{"metadata1", "metadata2"}));
when(mockProcessor2.getVariantMetadata(variantList)).thenReturn(Map.of("variant1", new String[]{"metadata1", "metadata3"}));
when(mockProcessor3.getVariantMetadata(variantList)).thenReturn(Map.of("variant3", new String[]{"metadata31", "metadata32"}));
when(mockProcessor1.getVariantMetadata(variantList)).thenReturn(Map.of("variant1", Set.of("metadata1", "metadata2")));
when(mockProcessor2.getVariantMetadata(variantList)).thenReturn(Map.of("variant1", Set.of("metadata1", "metadata3")));
when(mockProcessor3.getVariantMetadata(variantList)).thenReturn(Map.of("variant3", Set.of("metadata31", "metadata32")));

Map<String, String[]> variantMetadata = parentProcessor.getVariantMetadata(variantList);
assertEquals(Set.of("metadata1", "metadata2", "metadata3"), Set.of(variantMetadata.get("variant1")));
assertEquals(Set.of("metadata31", "metadata32"), Set.of(variantMetadata.get("variant3")));
Map<String, Set<String>> variantMetadata = parentProcessor.getVariantMetadata(variantList);
assertEquals(Set.of("metadata1", "metadata2", "metadata3"), variantMetadata.get("variant1"));
assertEquals(Set.of("metadata31", "metadata32"), variantMetadata.get("variant3"));
assertEquals(2, variantMetadata.size());
}
}

0 comments on commit 2223f66

Please sign in to comment.