From 0f1753e3e775692a8863a99abd7d0a545c72d63f Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Thu, 20 Jun 2024 18:58:52 +0200 Subject: [PATCH] adding check against inadvertently log-2 transformed splicing fold change column in HBADEALS file. --- isopret-cli/pom.xml | 2 +- isopret-core/pom.xml | 2 +- .../impl/rnaseqdata/RnaSeqResultLine.java | 11 ++++++++ .../impl/rnaseqdata/RnaSeqResultsParser.java | 25 ++++++++++++------- isopret-data/pom.xml | 2 +- isopret-exception/pom.xml | 2 +- isopret-gui/pom.xml | 2 +- isopret-io/pom.xml | 2 +- pom.xml | 2 +- 9 files changed, 34 insertions(+), 16 deletions(-) diff --git a/isopret-cli/pom.xml b/isopret-cli/pom.xml index 4938672..c2a0f1a 100644 --- a/isopret-cli/pom.xml +++ b/isopret-cli/pom.xml @@ -6,7 +6,7 @@ org.jax.isopret isopret - 1.3.1 + 1.3.2 isopret-cli diff --git a/isopret-core/pom.xml b/isopret-core/pom.xml index e271f54..b2e8d69 100644 --- a/isopret-core/pom.xml +++ b/isopret-core/pom.xml @@ -6,7 +6,7 @@ org.jax.isopret isopret - 1.3.1 + 1.3.2 isopret-core diff --git a/isopret-core/src/main/java/org/jax/isopret/core/impl/rnaseqdata/RnaSeqResultLine.java b/isopret-core/src/main/java/org/jax/isopret/core/impl/rnaseqdata/RnaSeqResultLine.java index e99ca89..f890d93 100644 --- a/isopret-core/src/main/java/org/jax/isopret/core/impl/rnaseqdata/RnaSeqResultLine.java +++ b/isopret-core/src/main/java/org/jax/isopret/core/impl/rnaseqdata/RnaSeqResultLine.java @@ -46,5 +46,16 @@ public boolean isIsoform() { } + /** + * Check for a common user error -- the splice fold change is "raw" (not logarithm), which the expression + * fold change is log2. These are separate lines in the HBADEALS result file. + * @return true if the line does not have negative and thus potentially log2-transformed splicing value + */ + public boolean isValid() { + final double EPSILON = 0.001; + return !isIsoform() || !((expFC + EPSILON) < 0); + } + + } diff --git a/isopret-core/src/main/java/org/jax/isopret/core/impl/rnaseqdata/RnaSeqResultsParser.java b/isopret-core/src/main/java/org/jax/isopret/core/impl/rnaseqdata/RnaSeqResultsParser.java index 8c392fd..08d544e 100644 --- a/isopret-core/src/main/java/org/jax/isopret/core/impl/rnaseqdata/RnaSeqResultsParser.java +++ b/isopret-core/src/main/java/org/jax/isopret/core/impl/rnaseqdata/RnaSeqResultsParser.java @@ -11,7 +11,12 @@ import java.util.*; /** - * Parse the HBA-DEALS output file, e.g., + * Parse the HBA-DEALS output file. + * The HBA-DEALS output file contains 4 columns. The first column is the gene name, the second is the transcript name, the third is the fold change, + * and the fourth is 1-probability of differential expression or proportion(splicing), which is the posterior error probability (PEP). + * Entries that refer to expression have ‘Expression’ in their second column. If isoform.level is FALSE, entries that refer to differential + * splicing of the gene will have ‘Splicing’ in their second column entry. + * The fold change for expression is given as log2 fold change, and for splicing as fold change. *

* Gene Isoform ExplogFC/FC P * ENSG00000160710 Expression 1.54770825394965 0 @@ -87,7 +92,12 @@ private Map parseResults(BufferedReader br) throws } int found_symbol = 0; + int invalid_lines = 0; for (RnaSeqResultLine hline : lines) { + if (! hline.isValid()) { + invalid_lines++; + continue; + } AccessionNumber ensgAccession = hline.geneAccession(); // if we cannot find symbol, just show the accession if (hgncMap.containsKey(ensgAccession)) { GeneModel model = hgncMap.get(ensgAccession); @@ -114,6 +124,11 @@ private Map parseResults(BufferedReader br) throws if (! unfound.isEmpty()) { LOGGER.info("Could not find symbols for {} accessions.", unfound.size()); } + if (invalid_lines > 0) { + String errmsg = String.format("%d invalid lines (splicing fold change negative-but only expr values should be log2). Fix before continuing.", + invalid_lines); + throw new IsopretRuntimeException(errmsg); + } return resultsMap; } @@ -146,12 +161,4 @@ public static Map parse(File file, return parser.ensgAcc2geneResultMap; } - - private Map getEnsgAcc2geneResultMap() { - return this.ensgAcc2geneResultMap; - } - - - - } diff --git a/isopret-data/pom.xml b/isopret-data/pom.xml index 954219c..b2e1c1f 100644 --- a/isopret-data/pom.xml +++ b/isopret-data/pom.xml @@ -6,7 +6,7 @@ org.jax.isopret isopret - 1.3.1 + 1.3.2 isopret-data diff --git a/isopret-exception/pom.xml b/isopret-exception/pom.xml index 2e54ffb..73be55f 100644 --- a/isopret-exception/pom.xml +++ b/isopret-exception/pom.xml @@ -6,7 +6,7 @@ org.jax.isopret isopret - 1.3.1 + 1.3.2 isopret-exception diff --git a/isopret-gui/pom.xml b/isopret-gui/pom.xml index 1f50c7d..ef363d3 100644 --- a/isopret-gui/pom.xml +++ b/isopret-gui/pom.xml @@ -9,7 +9,7 @@ org.jax.isopret isopret - 1.3.1 + 1.3.2 isopret-gui diff --git a/isopret-io/pom.xml b/isopret-io/pom.xml index 0aefd90..443e474 100644 --- a/isopret-io/pom.xml +++ b/isopret-io/pom.xml @@ -6,7 +6,7 @@ org.jax.isopret isopret - 1.3.1 + 1.3.2 jar isopret-io diff --git a/pom.xml b/pom.xml index 43a9834..7970f7f 100644 --- a/pom.xml +++ b/pom.xml @@ -10,7 +10,7 @@ org.jax.isopret isopret - 1.3.1 + 1.3.2 isopret-core isopret-cli