Skip to content

Commit

Permalink
Fix bug where set based masks were not getting offest when merged
Browse files Browse the repository at this point in the history
  • Loading branch information
ramari16 committed Mar 22, 2024
1 parent 3cd01c3 commit 60b16ba
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -154,13 +154,22 @@ public static BigInteger emptyBitmask(int length) {
return emptyBitmask;
}

public VariableVariantMasks append(VariableVariantMasks variantMasks) {
/*public VariableVariantMasks append(VariableVariantMasks variantMasks) {
VariableVariantMasks appendedMasks = new VariableVariantMasks();
appendedMasks.homozygousMask = appendMask(this.homozygousMask, variantMasks.homozygousMask, this.length, variantMasks.length);
appendedMasks.heterozygousMask = appendMask(this.heterozygousMask, variantMasks.heterozygousMask, this.length, variantMasks.length);
appendedMasks.homozygousNoCallMask = appendMask(this.homozygousNoCallMask, variantMasks.homozygousNoCallMask, this.length, variantMasks.length);
appendedMasks.heterozygousNoCallMask = appendMask(this.heterozygousNoCallMask, variantMasks.heterozygousNoCallMask, this.length, variantMasks.length);
return appendedMasks;
}*/

public static VariableVariantMasks append(VariableVariantMasks masks1, int length1, VariableVariantMasks masks2, int length2) {
VariableVariantMasks appendedMasks = new VariableVariantMasks();
appendedMasks.homozygousMask = appendMask(masks1.homozygousMask, masks2.homozygousMask, length1, length2);
appendedMasks.heterozygousMask = appendMask(masks1.heterozygousMask, masks2.heterozygousMask, length1, length2);
appendedMasks.homozygousNoCallMask = appendMask(masks1.homozygousNoCallMask, masks2.homozygousNoCallMask, length1, length2);
appendedMasks.heterozygousNoCallMask = appendMask(masks1.heterozygousNoCallMask, masks2.heterozygousNoCallMask, length1, length2);
return appendedMasks;
}

public static VariantMask appendMask(VariantMask variantMask1, VariantMask variantMask2, int length1, int length2) {
Expand Down Expand Up @@ -238,7 +247,7 @@ private static VariantMask append(VariantMaskSparseImpl variantMask1, VariantMas
for (Integer patientId : variantMask1.patientIndexes) {
mask = mask.setBit(patientId + 2);
}
// todo: explain this. it is not intuitive
// We start writing mask 2 where mask 1 ends. So the 0th index of mask 2 is now following the last bit of mask 1
for (Integer patientId : variantMask2.patientIndexes) {
mask = mask.setBit(patientId + length1 + 2);
}
Expand All @@ -247,7 +256,9 @@ private static VariantMask append(VariantMaskSparseImpl variantMask1, VariantMas
else {
Set<Integer> patientIndexSet = new HashSet<>();
patientIndexSet.addAll(variantMask1.patientIndexes);
patientIndexSet.addAll(variantMask2.patientIndexes);
// The indexes for mask 2 are shifted by the length of mask 1, corresponding to the corresponding patient id array
// for mask 2 being appended to those of mask 1
patientIndexSet.addAll(variantMask2.patientIndexes.stream().map(i -> i + length1).collect(Collectors.toSet()));
return new VariantMaskSparseImpl(patientIndexSet);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
import org.slf4j.LoggerFactory;

import java.io.*;
import java.math.BigInteger;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentSkipListSet;
Expand All @@ -24,6 +23,9 @@ public class GenomicDatasetMerger {
private final VariantStore variantStore1;
private final VariantStore variantStore2;

private final int variantStore1PatientCount;
private final int variantStore2PatientCount;

private final Map<String, FileBackedByteIndexedInfoStore> infoStores1;
private final Map<String, FileBackedByteIndexedInfoStore> infoStores2;

Expand All @@ -33,7 +35,9 @@ public class GenomicDatasetMerger {

public GenomicDatasetMerger(VariantStore variantStore1, VariantStore variantStore2, Map<String, FileBackedByteIndexedInfoStore> infoStores1, Map<String, FileBackedByteIndexedInfoStore> infoStores2, String outputDirectory) {
this.variantStore1 = variantStore1;
this.variantStore1PatientCount = variantStore1.getPatientIds().length;
this.variantStore2 = variantStore2;
this.variantStore2PatientCount = variantStore2.getPatientIds().length;
this.mergedVariantStore = new VariantStore();
this.infoStores1 = infoStores1;
this.infoStores2 = infoStores2;
Expand Down Expand Up @@ -238,21 +242,21 @@ public FileBackedJsonIndexStorage<Integer, ConcurrentHashMap<String, VariableVar
// appended to a null, or be replaced with an empty bitmask otherwise
variantMasks2 = new VariableVariantMasks(variantStore2.getPatientIds().length);
}
mergedMasks.put(entry.getKey(), entry.getValue().append(variantMasks2));

VariableVariantMasks mergeResult = VariableVariantMasks.append(entry.getValue(), variantStore1PatientCount, variantMasks2, variantStore2PatientCount);
mergedMasks.put(entry.getKey(), mergeResult);
}
// Any entry in the second set that is not in the merged set can be merged with an empty variant mask,
// if there were a corresponding entry in set 1, it would have been merged in the previous loop
for (Map.Entry<String, VariableVariantMasks> entry : masks2.entrySet()) {
if (!mergedMasks.containsKey(entry.getKey())) {
mergedMasks.put(entry.getKey(), new VariableVariantMasks(variantStore1.getPatientIds().length).append(entry.getValue()));
VariableVariantMasks appendedMasks = VariableVariantMasks.append(new VariableVariantMasks(), variantStore1PatientCount, entry.getValue(), variantStore2PatientCount);
mergedMasks.put(entry.getKey(), appendedMasks);
}
}
if (merged.keys().contains(key)) {
log.warn("Merged already contains key: " + key);
} else {
if (key == 61713) {
log.info("Loop 1 adding masks to key 61713: " + Joiner.on(",").join(mergedMasks.keySet()));
}
merged.put(key, mergedMasks);
}
});
Expand All @@ -263,19 +267,27 @@ public FileBackedJsonIndexStorage<Integer, ConcurrentHashMap<String, VariableVar
Map<String, VariableVariantMasks> masks2 = variantMaskStorage2.get(key);
for (Map.Entry<String, VariableVariantMasks> entry : masks2.entrySet()) {
if (!mergedMasks.containsKey(entry.getKey())) {
mergedMasks.put(entry.getKey(), new VariableVariantMasks(variantStore1.getPatientIds().length).append(entry.getValue()));
VariableVariantMasks appendedMasks = VariableVariantMasks.append(new VariableVariantMasks(), variantStore1PatientCount, entry.getValue(), variantStore2PatientCount);
mergedMasks.put(entry.getKey(), appendedMasks);
}
}
if (merged.keys().contains(key)) {
log.warn("Second loop: merged already contains key: " + key);
} else {
if (key == 61713) {
log.info("Loop 2 adding masks to key 61713: " + Joiner.on(",").join(mergedMasks.keySet()));
}
merged.put(key, mergedMasks);
}
}
});

merged.keys().stream().sorted().limit(3).forEach(key -> {
ConcurrentHashMap<String, VariableVariantMasks> maskMap = merged.get(key);
maskMap.keySet().stream().sorted().limit(5).forEach(variantSpec -> {
VariableVariantMasks variableVariantMasks = maskMap.get(variantSpec);
Set<Integer> patientsWithVariant = VariableVariantMasks.patientMaskToPatientIdSet(variableVariantMasks.heterozygousMask.union(variableVariantMasks.homozygousMask), Arrays.asList(mergedVariantStore.getPatientIds()));
log.info("Patients with variant [" + variantSpec + "]: " + Joiner.on(",").join(patientsWithVariant));
});

});
return merged;
}

Expand Down

0 comments on commit 60b16ba

Please sign in to comment.