Merge pull request #171 from TheJacksonLaboratory/develop

Develop
TheJacksonLaboratory · Dec 20, 2023 · 54c3e8e · 54c3e8e
2 parents 17ac1a7 + a5b76ce
commit 54c3e8e
Show file tree

Hide file tree

Showing 56 changed files with 215 additions and 289 deletions.
diff --git a/isopret-cli/pom.xml b/isopret-cli/pom.xml
@@ -6,7 +6,7 @@
     <parent>
         <groupId>org.jax.isopret</groupId>
         <artifactId>isopret</artifactId>
-        <version>1.1.19</version>
+        <version>1.2.0</version>
     </parent>
 
     <artifactId>isopret-cli</artifactId>

diff --git a/isopret-core/pom.xml b/isopret-core/pom.xml
@@ -6,7 +6,7 @@
     <parent>
         <groupId>org.jax.isopret</groupId>
         <artifactId>isopret</artifactId>
-        <version>1.1.19</version>
+        <version>1.2.0</version>
     </parent>
 
     <artifactId>isopret-core</artifactId>

diff --git a/isopret-core/src/main/java/org/jax/isopret/core/analysis/InterproFisherExact.java b/isopret-core/src/main/java/org/jax/isopret/core/analysis/InterproFisherExact.java
@@ -1,9 +1,9 @@
 package org.jax.isopret.core.analysis;
 
-import org.jax.isopret.core.impl.rnaseqdata.TranscriptResultImpl;
 import org.jax.isopret.data.InterproEntry;
 import org.jax.isopret.data.AccessionNumber;
 import org.jax.isopret.model.AnnotatedGene;
+import org.jax.isopret.model.TranscriptResult;
 import org.monarchinitiative.phenol.analysis.stats.Hypergeometric;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -39,7 +39,7 @@ public InterproFisherExact(List<AnnotatedGene> annotatedGeneList, double splicin
         for (var agene : annotatedGeneList) {
             //Map<AccessionNumber, List<DisplayInterproAnnotation>> m = agene.getTranscriptToInterproHitMap();
             Map<AccessionNumber, Set<InterproEntry>> uniqIntproSetMap = agene.getTranscriptToUniqueInterproMap();
-            Set<TranscriptResultImpl> results = agene.getHbaDealsResult().getTranscriptResults();
+            Set<TranscriptResult> results = agene.getHbaDealsResult().getTranscriptResults();
             for (var res : results) {
                 AccessionNumber accession = res.getTranscriptId();
                 if (! uniqIntproSetMap.containsKey(accession)) {

diff --git a/isopret-core/src/main/java/org/jax/isopret/core/analysis/IsopretStats.java b/isopret-core/src/main/java/org/jax/isopret/core/analysis/IsopretStats.java
@@ -1,6 +1,8 @@
 package org.jax.isopret.core.analysis;
 
 import org.jax.isopret.core.impl.rnaseqdata.RnaSeqAnalysisMethod;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import java.io.*;
 import java.text.DateFormat;
@@ -41,6 +43,7 @@
  * @author Peter N Robinson
  */
 public class IsopretStats {
+    private final static Logger LOGGER = LoggerFactory.getLogger(IsopretStats.class);
 
    private final Map<String, String> data;
    /** errors and warnings encountered during the analysis */
@@ -60,7 +63,7 @@ public void display() {
         try (Writer stdout =  new BufferedWriter(new OutputStreamWriter(System.out))) {
             write(stdout);
         } catch (IOException e) {
-            e.printStackTrace();
+            LOGGER.error("Could not display statistics: {}", e.getMessage());
         }
     }
     /** Output to TSV file. */
@@ -69,7 +72,7 @@ public void writeToFile(String filename) {
         try (Writer bw =  new BufferedWriter(new FileWriter(file))) {
             write(bw);
         } catch (IOException e) {
-            e.printStackTrace();
+            LOGGER.error("Could not write statistics: {}", e.getMessage());
         }
     }
 

diff --git a/isopret-core/src/main/java/org/jax/isopret/core/analysis/TranscriptToGeneStats.java b/isopret-core/src/main/java/org/jax/isopret/core/analysis/TranscriptToGeneStats.java
@@ -28,7 +28,7 @@ public void display() {
         try (Writer stdout =  new BufferedWriter(new OutputStreamWriter(System.out))) {
             write(stdout);
         } catch (IOException e) {
-            e.printStackTrace();
+            LOGGER.error("Could not display TranscriptToGeneStats: {}", e.getMessage());
         }
     }
 
@@ -37,7 +37,7 @@ public void writeToFile(String filename) {
         try (Writer bw =  new BufferedWriter(new FileWriter(file))) {
             write(bw);
         } catch (IOException e) {
-            e.printStackTrace();
+            LOGGER.error("Could not write TranscriptToGeneStats: {}", e.getMessage());
         }
     }
 

diff --git a/isopret-core/src/main/java/org/jax/isopret/core/configuration/IsopretDataResolver.java b/isopret-core/src/main/java/org/jax/isopret/core/configuration/IsopretDataResolver.java
@@ -31,30 +31,6 @@ public class IsopretDataResolver {
     private static final String ISOFORM_FUNCTION_BP_FILENAME = "isoform_function_list_bp.txt";
     private static final String ISOFORM_FUNCTION_CC_URL = ZENODO_BASE_URL + "files/isoform_function_list_cc.txt?download=1";
     private static final String ISOFORM_FUNCTION_CC_FILENAME = "isoform_function_list_cc.txt";
-   /*
-    private static final DownloadItem go = makeItem(GO_JSON_URL, GO_JSON);
-    private static final DownloadItem jannovarHg38 = makeItem(JANNOVAR_ZENODO_URL, JANNOVAR_FILENAME);
-    private static final DownloadItem hgnc = makeItem(HGNC_URL,HGNC_FILENAME);
-    private static final DownloadItem interproDomainDesc = makeItem(INTERPRO_DOMAIN_DESC_URL, INTERPRO_DOMAIN_DESC_FILENAME);
-    private static final DownloadItem interproDomains = makeItem(INTERPRO_DOMAINS_URL, INTERPRO_DOMAINS_FILENAME);
-    private static final DownloadItem isoformFunctionMf = makeItem(ISOFORM_FUNCTION_MF_URL, ISOFORM_FUNCTION_MF_FILENAME);
-    private static final DownloadItem isoformFunctionBp = makeItem(ISOFORM_FUNCTION_BP_URL, ISOFORM_FUNCTION_BP_FILENAME);
-    private static final DownloadItem isoformFunctionCc = makeItem(ISOFORM_FUNCTION_CC_URL, ISOFORM_FUNCTION_CC_FILENAME);
-    static DownloadItem makeItem(String urlString, String base) {
-        try {
-            URL url = new URL(urlString);
-            return new DownloadItem(url, base);
-        } catch (MalformedURLException e) {
-            // should never happen
-            throw new IsopretRuntimeException("Could not create URL from " + urlString);
-        }
-    }
-    private static final Set<DownloadItem> allDownloadItems = Set.of(go,
-            jannovarHg38, hgnc, interproDomainDesc, interproDomains,
-            isoformFunctionMf, isoformFunctionBp, isoformFunctionCc);
-    */
-
-
 
     private final Path dataDirectory;
 
@@ -106,19 +82,4 @@ public Path interproDomainsPath() {
         return dataDirectory.resolve(INTERPRO_DOMAINS_FILENAME);
     }
 
-
-    /** @return Download data for Gene Ontology (go.json)>
-    public static DownloadItem go() { return go; }
-    /** @return Download data for Gene Ontology (go.json)>
-    public static DownloadItem jannovarHg38() { return jannovarHg38; }
-    public static DownloadItem hgnc() { return hgnc; }
-    public static DownloadItem interproDomainDesc() { return interproDomainDesc; }
-    public static DownloadItem interproDomains() { return interproDomains; }
-    public static DownloadItem isoformFunctionMf() { return isoformFunctionMf; }
-    public static DownloadItem isoformFunctionBp() { return isoformFunctionBp; }
-    public static DownloadItem isoformFunctionCc() { return isoformFunctionCc; }
-    public static Set<DownloadItem> allDownloadItems() { return allDownloadItems; }
-*/
-
-
 }
diff --git a/isopret-core/src/main/java/org/jax/isopret/core/impl/go/IsopretAssociationContainer.java b/isopret-core/src/main/java/org/jax/isopret/core/impl/go/IsopretAssociationContainer.java
@@ -77,7 +77,6 @@ public Map<TermId, List<TermId>> getOntologyTermToDomainItemsMap() {
     public Set<TermId> getDomainItemsAnnotatedByGoTerm(TermId goTermId) {
         Set<TermId> domainItemSet = new HashSet<>();
         Set<TermId> descendentSet = OntologyAlgorithm.getDescendents(this.ontology, goTermId);
-       // descendentSet.add(goTermId);
         for (Map.Entry<TermId, IsopretAnnotations> entry : associationMap.entrySet()) {
             TermId gene = entry.getKey();
             for (TermId ontologyTermId : entry.getValue().getAnnotatingTermIds()) {
@@ -108,13 +107,12 @@ public Map<TermId, DirectAndIndirectTermAnnotations> getAssociationMap(Set<TermI
                 /* In this step add the direct annotations only */
                 TermId ontologyTermId = ita.getTermId();
                 // check if the term is in the ontology (sometimes, obsoletes are used in the bla32 files)
-                Term term = this.ontology.getTermMap().get(ontologyTermId);
-                if (term == null) {
+                Optional<Term> termOpt = ontology.termForTermId(ontologyTermId);
+                if (termOpt.isEmpty()) {
                     ontology_term_not_found++;
                     LOGGER.warn("Unable to retrieve ontology term {} (omitted).", ontologyTermId.getValue());
                     continue;
                 }
-                // if necessary, replace with the latest primary term id
                 ontologyTermId = this.ontology.getPrimaryTermId(ontologyTermId);
                 directAnnotationMap.computeIfAbsent(domainTermId, k -> new HashSet<>()).add(ontologyTermId);
             }
@@ -134,8 +132,9 @@ public Map<TermId, DirectAndIndirectTermAnnotations> getAssociationMap(Set<TermI
                 annotationMap.putIfAbsent(ontologyId, new DirectAndIndirectTermAnnotations(ontologyId));
                 annotationMap.get(ontologyId).addDirectAnnotatedItem(domainItemTermId);
                 // In addition to the direct annotation, the gene is also indirectly annotated
-                // to all of the GO Term's ancestors
-                Set<TermId> ancs = OntologyAlgorithm.getAncestorTerms(ontology, ontologyId, false);
+                // to all the GO Term's ancestors
+                //Set<TermId> ancs = OntologyAlgorithm.getAncestorTerms(ontology,  ontology.getRootTermId(), ontologyId, false);
+                Iterable<TermId> ancs =  ontology.graph().getAncestors(ontologyId, false);
                 for (TermId ancestor : ancs) {
                     annotationMap.putIfAbsent(ancestor, new DirectAndIndirectTermAnnotations(ancestor));
                     annotationMap.get(ancestor).addIndirectAnnotatedItem(domainItemTermId);
@@ -153,7 +152,7 @@ public Set<TermId> getAllAnnotatedGenes() {
     /**
      * TODO ADD TEST
      * @param termId a Gene Ontology id
-     * @return List of genes/transcripts annotation by this GO term
+     * @return set of genes/transcripts annotation by this GO term
      */
     @Override
     public Set<TermId> getDomainItemsAnnotatedByOntologyTerm(TermId termId) {

diff --git a/isopret-core/src/main/java/org/jax/isopret/core/impl/go/TranscriptFunctionFileParser.java b/isopret-core/src/main/java/org/jax/isopret/core/impl/go/TranscriptFunctionFileParser.java
@@ -64,7 +64,7 @@ private static Map<TermId, Set<TermId>> parseFunctionFile(File file, Ontology on
         } catch (IOException e) {
             throw new PhenolRuntimeException("Could not import isoform_function_list.txt :" + e.getMessage());
         }
-        if (notFound.size() > 0) {
+        if (!notFound.isEmpty()) {
             int n_missing = notFound.size(); // keep logger sane
             LOGGER.warn("Could not find " + n_missing + " terms in Ontology");
         }

diff --git a/isopret-core/src/main/java/org/jax/isopret/core/impl/rnaseqdata/GeneResultImpl.java b/isopret-core/src/main/java/org/jax/isopret/core/impl/rnaseqdata/GeneResultImpl.java
@@ -4,6 +4,7 @@
 import org.jax.isopret.model.GeneModel;
 import org.jax.isopret.model.GeneResult;
 import org.jax.isopret.model.GeneSymbolAccession;
+import org.jax.isopret.model.TranscriptResult;
 
 import java.util.*;
 import java.util.stream.Collectors;
@@ -21,9 +22,9 @@ public class GeneResultImpl implements GeneResult, Comparable<GeneResultImpl> {
     /** Accession number of the gene, e.g., ENSG00000001167. */
     private final AccessionNumber geneAccession;
     private final GeneModel geneModel;
-    private double expressionFoldChange;
+    private double expressionLog2FoldChange;
     private double expressionP;
-    private final Map<AccessionNumber, TranscriptResultImpl> transcriptMap;
+    private final Map<AccessionNumber, TranscriptResult> transcriptMap;
 
 
 
@@ -34,12 +35,12 @@ public GeneResultImpl(AccessionNumber geneAccession, GeneModel sym) {
     }
     @Override
     public void addExpressionResult(double fc, double p) {
-        this.expressionFoldChange = fc;
+        this.expressionLog2FoldChange = fc;
         this.expressionP = p;
     }
     @Override
     public void addTranscriptResult(AccessionNumber isoform, double expFC, double P) {
-        TranscriptResultImpl tresult = new TranscriptResultImpl(isoform, expFC, P);
+        TranscriptResult tresult = new TranscriptResultImpl(isoform, expFC, P);
         transcriptMap.putIfAbsent(isoform, tresult);
     }
 
@@ -61,10 +62,16 @@ public GeneModel getGeneModel() {
     public GeneSymbolAccession getGeneSymbolAccession() {
         return new GeneSymbolAccession(geneModel.geneSymbol(), geneAccession);
     }
+    @Override
+    public double getExpressionLog2FoldChange() {
+        return expressionLog2FoldChange;
+    }
+
     @Override
     public double getExpressionFoldChange() {
-        return expressionFoldChange;
+        return Math.pow(expressionLog2FoldChange, 2.0);
     }
+
     @Override
     public double getExpressionP() {
         return expressionP;
@@ -74,12 +81,12 @@ public List<Double> getSplicingPlist() {
         return this.transcriptMap
                 .values()
                 .stream()
-                .map(TranscriptResultImpl::getPvalue)
+                .map(TranscriptResult::getPvalue)
                 .collect(Collectors.toList());
     }
 
     @Override
-    public Map<AccessionNumber, TranscriptResultImpl> getTranscriptMap() {
+    public Map<AccessionNumber, TranscriptResult> getTranscriptMap() {
         return Collections.unmodifiableMap(transcriptMap);
     }
 
@@ -123,7 +130,7 @@ public double getSmallestSplicingP() {
         return this.transcriptMap
                 .values()
                 .stream()
-                .map(TranscriptResultImpl::getPvalue)
+                .map(TranscriptResult::getPvalue)
                 .min(Double::compareTo)
                 .orElse(1.0);
     }
@@ -132,7 +139,7 @@ private double minp() {
         return Math.min(getExpressionP(), getSmallestSplicingP());
     }
     @Override
-    public Set<TranscriptResultImpl> getTranscriptResults() {
+    public Set<TranscriptResult> getTranscriptResults() {
         return new HashSet<>(this.transcriptMap.values());
     }
 

diff --git a/isopret-core/src/main/java/org/jax/isopret/core/impl/rnaseqdata/HbaDealsThresholder.java b/isopret-core/src/main/java/org/jax/isopret/core/impl/rnaseqdata/HbaDealsThresholder.java
@@ -3,6 +3,8 @@
 import org.jax.isopret.data.AccessionNumber;
 import org.jax.isopret.model.GeneModel;
 import org.jax.isopret.model.GeneResult;
+import org.jax.isopret.model.TranscriptResult;
+
 import org.monarchinitiative.phenol.ontology.data.TermId;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -163,7 +165,7 @@ public Set<TermId> getAllTranscriptTermIds() {
                 .map(GeneResult::getTranscriptMap)
                 .map(Map::values)
                 .flatMap(Collection::stream)
-                .map(TranscriptResultImpl::getTranscriptId)
+                .map(TranscriptResult::getTranscriptId)
                 .map(AccessionNumber::toTermId)
                 .collect(Collectors.toSet());
     }
@@ -184,7 +186,7 @@ public Set<TermId> dasIsoformTermIds() {
                 .map(Map::values)
                 .flatMap(Collection::stream)
                 .filter(r -> r.isSignificant(this.splicingThreshold ))
-               .map(TranscriptResultImpl::getTranscriptId)
+               .map(TranscriptResult::getTranscriptId)
                 .map(AccessionNumber::toTermId)
                 .collect(Collectors.toSet());
     }

diff --git a/...t-core/src/main/java/org/jax/isopret/core/impl/rnaseqdata/IsoformSpecificThresholder.java b/...t-core/src/main/java/org/jax/isopret/core/impl/rnaseqdata/IsoformSpecificThresholder.java
@@ -2,6 +2,7 @@
 
 import org.jax.isopret.data.AccessionNumber;
 import org.jax.isopret.model.GeneResult;
+import org.jax.isopret.model.TranscriptResult;
 import org.monarchinitiative.phenol.analysis.AssociationContainer;
 import org.monarchinitiative.phenol.analysis.StudySet;
 import org.monarchinitiative.phenol.ontology.data.TermId;
@@ -155,14 +156,14 @@ private IsoformSpecificThresholder(Map<AccessionNumber, GeneResult> results,
                 .stream()
                 .flatMap(r -> r.getTranscriptResults().stream())
                 .filter(tr -> tr.getPvalue() <= splicingPepThreshold)
-                .map(TranscriptResultImpl::getTranscriptId)
+                .map(TranscriptResult::getTranscriptId)
                 .map(AccessionNumber::toTermId)
                 .collect(Collectors.toSet());
         Set<TermId> dasIsoformPopulation = results
                 .values()
                 .stream()
                 .flatMap(r -> r.getTranscriptResults().stream())
-                .map(TranscriptResultImpl::getTranscriptId)
+                .map(TranscriptResult::getTranscriptId)
                 .map(AccessionNumber::toTermId)
                 .collect(Collectors.toSet());
         LOGGER.info("DAS: {} study set and {} population genes", dasIsoformStudy.size(), dasIsoformPopulation.size());

diff --git a/...-core/src/main/java/org/jax/isopret/core/impl/rnaseqdata/PosteriorErrorProbThreshold.java b/...-core/src/main/java/org/jax/isopret/core/impl/rnaseqdata/PosteriorErrorProbThreshold.java
@@ -10,14 +10,14 @@
  * false-discovery rate (FDR) over the current dataset. Explanations can be found in
  * Käll L, Storey JD, MacCoss MJ, Noble WS. Posterior error probabilities and false discovery rates:
  * two sides of the same coin. J Proteome Res. 2008 Jan;7(1):40-4. PMID:18052118.
- * In brief, the The q-value of a test measures the proportion of false positives incurred (called the false discovery
+ * In brief, the q-value of a test measures the proportion of false positives incurred (called the false discovery
  * rate) when that particular test is called significant (thus, the q-value is the minimum FDR at which the test would be
  * called significant in the context of the current dataset). The local FDR (another name for the PEP) measures the
  * posterior probability the null hypothesis is true given the test's p-value. Whereas the FDR measures the error rate
  * associated with a collection of tests, the PEP measures the probability of error for a single test.
  * The mean value of all PEPs that are called significant corresponds to the FDR at the corresponding threshold.
  * Our goal is to set the FDR to a given threshold (by default, 0.01) and to define the PEP threshold that calls
- * as many tests as possible signficant while still maintaining this threshold. We apply the additional criterion that
+ * as many tests as possible significant while still maintaining this threshold. We apply the additional criterion that
  * if the PEP is larger than 0.25 the test is not called significant, regardless of the FDR.
  * @author Peter N Robinson
  */

diff --git a/isopret-core/src/main/java/org/jax/isopret/core/impl/rnaseqdata/RnaSeqResultLine.java b/isopret-core/src/main/java/org/jax/isopret/core/impl/rnaseqdata/RnaSeqResultLine.java
@@ -5,14 +5,14 @@
 
 
 /**
- * With the exception of the header line, the HBA-DEALS and the edgeR results file (from our script to
+ * Except for the header line, the HBA-DEALS and the edgeR results file (from our script to
  * analyze both DGE and DAS) are the same. This class is therefore used to parse lines from either
  * HBA-DEALS or edgeR. Note that the objects are only used for parsing and are not a part of the
  * analysis model. Each line will be transformed either to a {@link org.jax.isopret.model.GeneResult} or
  * a {@link org.jax.isopret.model.TranscriptResult}.
  * @param geneAccession ENSEMBL accession number for a gene, e.g., ENSG00000139618
  * @param isoform ENSEMBL accession number for a transcript (can be null), e.g., ENST00000560355.1
- * @param expFC expression fold change (from HBA-DEALS)
+ * @param expFC expression fold change (from HBA-DEALS). For expression: log2 FC; for splicing: FC.
  * @param raw_p raw p-value (from HBA-DEALS)
  * @author Peter N Robinson
  */

diff --git a/isopret-core/src/main/java/org/jax/isopret/model/AnnotatedGene.java b/isopret-core/src/main/java/org/jax/isopret/model/AnnotatedGene.java
@@ -1,6 +1,5 @@
 package org.jax.isopret.model;
 
-import org.jax.isopret.core.impl.rnaseqdata.TranscriptResultImpl;
 import org.jax.isopret.data.AccessionNumber;
 import org.jax.isopret.data.InterproEntry;
 import org.jax.isopret.data.Transcript;
@@ -53,7 +52,7 @@ public AnnotatedGene(List<Transcript> transcripts,
 
         this.hbaDealsResult = result;
         // use HBA Deals results to filter for transcripts that are actually expressed
-        Map<AccessionNumber, TranscriptResultImpl> transcriptMap = result.getTranscriptMap();
+        Map<AccessionNumber, TranscriptResult> transcriptMap = result.getTranscriptMap();
         expressedTranscripts = transcripts
                 .stream()
                 .filter(t -> transcriptMap.containsKey(t.accessionId()))