Skip to content

Commit

Permalink
fix(subsetter): fix bugs in subsetter
Browse files Browse the repository at this point in the history
  • Loading branch information
markwoon committed Aug 13, 2024
1 parent 308268f commit 3a88f7b
Show file tree
Hide file tree
Showing 3 changed files with 75 additions and 29 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ public class GenePhenotype {
private Map<String, String> m_haplotypes;
@Expose
@SerializedName("activityValues")
private Map<String,String> m_activityValues = new HashMap<>();
private Map<String, String> m_activityValues = new HashMap<>();
@Expose
@SerializedName("diplotypes")
private SortedSet<DiplotypeRecord> m_diplotypes;
Expand Down Expand Up @@ -179,9 +179,27 @@ public String toString() {

public void addHaplotypeRecord(String name, @Nullable String activityValue, @Nullable String functionValue,
@Nullable String lookupKey) {
if (lookupKey == null) {
lookupKey = isActivityGene() ? activityValue : functionValue;
}
if (isActivityGene()) {
if (activityValue == null) {
throw new IllegalStateException("Cannot add activity gene haplotype without activity value");
}
} else {
if (functionValue == null) {
throw new IllegalStateException("Cannot add function gene haplotype without function value");
}
}
if (lookupKey == null) {
throw new IllegalStateException("Cannot add haplotype without lookupKey");
}
HaplotypeRecord hr = new HaplotypeRecord(name, activityValue, functionValue, lookupKey);
m_namedAlleles.add(hr);
m_haplotypes.put(name, hr.getLookupKey());
if (hr.getActivityValue() != null) {
m_activityValues.put(name, hr.getActivityValue());
}
}

public boolean update(String allele, @Nullable String activityScore, @Nullable String function, DataSource src) {
Expand All @@ -191,7 +209,9 @@ public boolean update(String allele, @Nullable String activityScore, @Nullable S
.orElse(null);
if (hr == null) {
// not all haplotypes will have a phenotype associated with it
return false;
System.out.println("Adding phenotype for " + m_gene + " " + allele);
addHaplotypeRecord(allele, activityScore, function, null);
return true;
}
boolean gotChange = false;
boolean modified = false;
Expand All @@ -200,11 +220,12 @@ public boolean update(String allele, @Nullable String activityScore, @Nullable S
System.out.println("New " + src + " activity score for " + m_gene + " " + allele);
modified = true;
} else if (!hr.getActivityValue().equals(activityScore)){
System.out.println("Overwriting " + src + " activity score for " + m_gene + " " + allele +
" (" + hr.getActivityValue() + " to " + activityScore + ")");
modified = true;
if (Double.parseDouble(hr.getActivityValue()) != Double.parseDouble(activityScore)) {
System.out.println("Overwriting " + src + " activity score for " + m_gene + " " + allele +
" (" + hr.getActivityValue() + " to " + activityScore + ")");
modified = true;
}
}
hr.setActivityValue(activityScore);
} else if (hr.getActivityValue() != null){
System.out.println("Nulling out " + src + " activity score for " + m_gene + " " + allele);
modified = true;
Expand All @@ -221,12 +242,11 @@ public boolean update(String allele, @Nullable String activityScore, @Nullable S
if (hr.getFunctionValue() == null) {
System.out.println("New " + src + " function for " + m_gene + " " + allele);
modified = true;
} else if (!hr.getFunctionValue().equals(function)){
} else if (!hr.getFunctionValue().equals(function)) {
System.out.println("Overwriting " + src + " function for " + m_gene + " " + allele + " (" +
hr.getFunctionValue() + " to " + function + ")");
modified = true;
}
hr.setFunctionValue(function);
} else if (hr.getFunctionValue() != null){
System.out.println("Nulling out " + src + " function for " + m_gene + " " + allele);
modified = true;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,10 @@ private Map<String,Object> compile(ReportContext reportContext) {
}

if (geneReport.isNoData()) {
noDataGenes.add(symbol);
// checking if allele definition exists for this gene because of subsetting
if (getEnv().getDefinitionReader().getGenes().contains(symbol)) {
noDataGenes.add(symbol);
}
continue;
}

Expand Down
63 changes: 43 additions & 20 deletions src/main/java/org/pharmgkb/pharmcat/subsetter/Subsetter.java
Original file line number Diff line number Diff line change
Expand Up @@ -252,10 +252,12 @@ private void exportDefinitionFiles(Path dir, Path defsDir) throws IOException {
if (!Files.exists(defsDir)) {
Files.createDirectories(defsDir);
} else {
deleteObsoleteFiles(defsDir, "_translation.json", m_allowList.keySet(), "definitions");
deleteObsoleteFiles(defsDir, ".json", m_allowList.keySet(), "definition");
deleteObsoleteFiles(defsDir, ".vcf", m_allowList.keySet(), "definition");
deleteObsoleteFiles(defsDir, ".bgz", m_allowList.keySet(), "definition");
deleteObsoleteFiles(defsDir, ".csi", m_allowList.keySet(), "definition");
}

DataSerializer dataSerializer = new DataSerializer();
Set<DefinitionExemption> exemptions = new HashSet<>();
try (VcfHelper vcfHelper = new VcfHelper()) {
for (GeneData gd : m_geneData.values()) {
Expand Down Expand Up @@ -303,31 +305,40 @@ private void exportDefinitionFiles(Path dir, Path defsDir) throws IOException {

// export
Path jsonFile = defsDir.resolve(gd.gene + "_translation.json");
dataSerializer.serializeToJson(gd.definitionFile, jsonFile);
DataSerializer.serializeToJson(gd.definitionFile, jsonFile);
//System.out.println("\tWrote " + jsonFile);

DefinitionExemption exemption = m_definitionReader.getExemption(gd.gene);
if (exemption != null) {
if (exemption.getGene().equals("CYP2C9")) {
if (!m_geneData.containsKey("CYP4F2") || !m_geneData.containsKey("VKORC1")) {
// remove extra position only used for warfarin
exemption.getExtraPositions().stream()
.filter(p -> "rs12777823".equals(p.getRsid()))
.findAny()
.ifPresent(p -> exemption.getExtraPositions().remove(p));
}
}
exemptions.add(exemption);
}
}
}

// write definitions
Path exemptionsFile = defsDir.resolve(DataManager.EXEMPTIONS_JSON_FILE_NAME);;
dataSerializer.serializeToJson(exemptions, exemptionsFile);
Path exemptionsFile = defsDir.resolve(DataManager.EXEMPTIONS_JSON_FILE_NAME);
DataSerializer.serializeToJson(exemptions, exemptionsFile);
// generate positions.vcf
DataManager.exportVcfData(defsDir);
Files.copy(defsDir.resolve(DataManager.POSITIONS_VCF),
Files.move(defsDir.resolve(DataManager.POSITIONS_VCF),
dir.resolve(DataManager.POSITIONS_VCF), StandardCopyOption.REPLACE_EXISTING);
Files.copy(defsDir.resolve(DataManager.POSITIONS_VCF + ".bgz"),
Files.move(defsDir.resolve(DataManager.POSITIONS_VCF + ".bgz"),
dir.resolve(DataManager.POSITIONS_VCF + ".bgz"), StandardCopyOption.REPLACE_EXISTING);
Files.copy(defsDir.resolve(DataManager.POSITIONS_VCF + ".bgz.csi"),
Files.move(defsDir.resolve(DataManager.POSITIONS_VCF + ".bgz.csi"),
dir.resolve(DataManager.POSITIONS_VCF + ".bgz.csi"), StandardCopyOption.REPLACE_EXISTING);

Files.copy(defsDir.resolve(DataManager.UNIALLELIC_POSITIONS_VCF + ".bgz"),
Files.move(defsDir.resolve(DataManager.UNIALLELIC_POSITIONS_VCF + ".bgz"),
dir.resolve(DataManager.UNIALLELIC_POSITIONS_VCF + ".bgz"), StandardCopyOption.REPLACE_EXISTING);
Files.copy(defsDir.resolve(DataManager.UNIALLELIC_POSITIONS_VCF + ".bgz.csi"),
Files.move(defsDir.resolve(DataManager.UNIALLELIC_POSITIONS_VCF + ".bgz.csi"),
dir.resolve(DataManager.UNIALLELIC_POSITIONS_VCF + ".bgz.csi"), StandardCopyOption.REPLACE_EXISTING);

if (!m_reportAsReference.isEmpty()) {
Expand Down Expand Up @@ -365,8 +376,8 @@ private void exportPhenotypes(Path dir) throws IOException {
deleteObsoleteFiles(cpicDir, ".json", genes, "CPIC phenotypes");
}

DataSerializer dataSerializer = new DataSerializer();
SortedSet<String> modified = new TreeSet<>(writePhenotypes(genes, m_phenotypeMap.getCpicGenes(), cpicDir, dataSerializer, DataSource.CPIC));
SortedSet<String> modified = new TreeSet<>(writePhenotypes(genes, m_phenotypeMap.getCpicGenes(), cpicDir,
DataSource.CPIC));

/*
Path dpwgDir = dir.resolve("dpwg");
Expand All @@ -385,10 +396,11 @@ private void exportPhenotypes(Path dir) throws IOException {
}

private Set<String> writePhenotypes(Collection<String> genes, Collection<GenePhenotype> phenotypes, Path dir,
DataSerializer dataSerializer, DataSource src) throws IOException {
DataSource src) throws IOException {
Set<String> changed = new HashSet<>();
for (GenePhenotype gp : phenotypes) {
if (genes.contains(gp.getGene())) {
boolean updatedFunctions = false;
if (m_functionOverrides.containsKey(gp.getGene())) {
GeneData geneData = m_geneData.get(gp.getGene());
// geneData can be null for outside-call only genes (e.g. CYP2D6)
Expand All @@ -403,18 +415,25 @@ private Set<String> writePhenotypes(Collection<String> genes, Collection<GenePhe
System.out.println("WARNING: extra definition for " + na.getName() + " but no function was specified");
continue;
}
System.out.println("New named allele: " + gp.getGene() + " " + na.getName());
updatedFunctions = true;
gp.addHaplotypeRecord(na.getName(), fd.activityScore, fd.function, null);
}
}

for (FunctionData fd : m_functionOverrides.get(gp.getGene())) {
gp.update(fd.allele, fd.activityScore, fd.function, src);
if (gp.update(fd.allele, fd.activityScore, fd.function, src)) {
updatedFunctions = true;
}
}
changed.add(gp.getGene());
}
// export
Path jsonFile = dir.resolve(gp.getGene() + ".json");
dataSerializer.serializeToJson(gp, jsonFile);
if (updatedFunctions) {
gp.generateDiplotypes();
}
DataSerializer.serializeToJson(gp, jsonFile);
//System.out.println("\tWrote " + jsonFile);
}
}
Expand Down Expand Up @@ -478,13 +497,13 @@ public static void main(String[] args) {
}

Path dataDir = cliHelper.getValidDirectory("i", true);
Path baseDefDir = dataDir.resolve("definitions");
Path baseDefDir = dataDir.resolve("definition");
if (cliHelper.hasOption("d") || cliHelper.hasOption("pos") || cliHelper.hasOption("a")) {
if (!Files.isDirectory(baseDefDir)) {
System.out.println("Cannot find 'definitions' subdirectory in " + dataDir);
}
}
Path basePhenoDir = dataDir.resolve("phenotypes");
Path basePhenoDir = dataDir.resolve("phenotype");
if (cliHelper.hasOption("pc")) {
if (!Files.isDirectory(basePhenoDir)) {
System.out.println("Cannot find 'phenotypes' subdirectory in " + dataDir);
Expand Down Expand Up @@ -524,7 +543,9 @@ public static void main(String[] args) {
subsetter.parseFunctionOverrides(file);
}

boolean updatedDefinitions = subsetter.updateDefinitions();
if (!subsetter.updateDefinitions()) {
System.out.println("No definitions updated.");
}

if (cliHelper.hasOption("o")) {
Path outDir = cliHelper.getValidDirectory("o", true);
Expand Down Expand Up @@ -753,9 +774,11 @@ private void parseFunctionOverrides(Path xlsxFile) throws IOException {
if (activityCell.getCellType() == CellType.NUMERIC) {
double val = activityCell.getNumericCellValue();
if (val == 1) {
activity = "1";
activity = "1.0";
} else if (val == 0) {
activity = "0";
activity = "0.0";
} else if (val == 2) {
activity = "2.0";
} else {
activity = Double.toString(val);
}
Expand Down

0 comments on commit 3a88f7b

Please sign in to comment.