Skip to content

Commit

Permalink
Merge pull request #171 from TheJacksonLaboratory/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
pnrobinson authored Dec 20, 2023
2 parents 17ac1a7 + a5b76ce commit 54c3e8e
Show file tree
Hide file tree
Showing 56 changed files with 215 additions and 289 deletions.
2 changes: 1 addition & 1 deletion isopret-cli/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
<parent>
<groupId>org.jax.isopret</groupId>
<artifactId>isopret</artifactId>
<version>1.1.19</version>
<version>1.2.0</version>
</parent>

<artifactId>isopret-cli</artifactId>
Expand Down
2 changes: 1 addition & 1 deletion isopret-core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
<parent>
<groupId>org.jax.isopret</groupId>
<artifactId>isopret</artifactId>
<version>1.1.19</version>
<version>1.2.0</version>
</parent>

<artifactId>isopret-core</artifactId>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
package org.jax.isopret.core.analysis;

import org.jax.isopret.core.impl.rnaseqdata.TranscriptResultImpl;
import org.jax.isopret.data.InterproEntry;
import org.jax.isopret.data.AccessionNumber;
import org.jax.isopret.model.AnnotatedGene;
import org.jax.isopret.model.TranscriptResult;
import org.monarchinitiative.phenol.analysis.stats.Hypergeometric;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand Down Expand Up @@ -39,7 +39,7 @@ public InterproFisherExact(List<AnnotatedGene> annotatedGeneList, double splicin
for (var agene : annotatedGeneList) {
//Map<AccessionNumber, List<DisplayInterproAnnotation>> m = agene.getTranscriptToInterproHitMap();
Map<AccessionNumber, Set<InterproEntry>> uniqIntproSetMap = agene.getTranscriptToUniqueInterproMap();
Set<TranscriptResultImpl> results = agene.getHbaDealsResult().getTranscriptResults();
Set<TranscriptResult> results = agene.getHbaDealsResult().getTranscriptResults();
for (var res : results) {
AccessionNumber accession = res.getTranscriptId();
if (! uniqIntproSetMap.containsKey(accession)) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package org.jax.isopret.core.analysis;

import org.jax.isopret.core.impl.rnaseqdata.RnaSeqAnalysisMethod;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.*;
import java.text.DateFormat;
Expand Down Expand Up @@ -41,6 +43,7 @@
* @author Peter N Robinson
*/
public class IsopretStats {
private final static Logger LOGGER = LoggerFactory.getLogger(IsopretStats.class);

private final Map<String, String> data;
/** errors and warnings encountered during the analysis */
Expand All @@ -60,7 +63,7 @@ public void display() {
try (Writer stdout = new BufferedWriter(new OutputStreamWriter(System.out))) {
write(stdout);
} catch (IOException e) {
e.printStackTrace();
LOGGER.error("Could not display statistics: {}", e.getMessage());
}
}
/** Output to TSV file. */
Expand All @@ -69,7 +72,7 @@ public void writeToFile(String filename) {
try (Writer bw = new BufferedWriter(new FileWriter(file))) {
write(bw);
} catch (IOException e) {
e.printStackTrace();
LOGGER.error("Could not write statistics: {}", e.getMessage());
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ public void display() {
try (Writer stdout = new BufferedWriter(new OutputStreamWriter(System.out))) {
write(stdout);
} catch (IOException e) {
e.printStackTrace();
LOGGER.error("Could not display TranscriptToGeneStats: {}", e.getMessage());
}
}

Expand All @@ -37,7 +37,7 @@ public void writeToFile(String filename) {
try (Writer bw = new BufferedWriter(new FileWriter(file))) {
write(bw);
} catch (IOException e) {
e.printStackTrace();
LOGGER.error("Could not write TranscriptToGeneStats: {}", e.getMessage());
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,30 +31,6 @@ public class IsopretDataResolver {
private static final String ISOFORM_FUNCTION_BP_FILENAME = "isoform_function_list_bp.txt";
private static final String ISOFORM_FUNCTION_CC_URL = ZENODO_BASE_URL + "files/isoform_function_list_cc.txt?download=1";
private static final String ISOFORM_FUNCTION_CC_FILENAME = "isoform_function_list_cc.txt";
/*
private static final DownloadItem go = makeItem(GO_JSON_URL, GO_JSON);
private static final DownloadItem jannovarHg38 = makeItem(JANNOVAR_ZENODO_URL, JANNOVAR_FILENAME);
private static final DownloadItem hgnc = makeItem(HGNC_URL,HGNC_FILENAME);
private static final DownloadItem interproDomainDesc = makeItem(INTERPRO_DOMAIN_DESC_URL, INTERPRO_DOMAIN_DESC_FILENAME);
private static final DownloadItem interproDomains = makeItem(INTERPRO_DOMAINS_URL, INTERPRO_DOMAINS_FILENAME);
private static final DownloadItem isoformFunctionMf = makeItem(ISOFORM_FUNCTION_MF_URL, ISOFORM_FUNCTION_MF_FILENAME);
private static final DownloadItem isoformFunctionBp = makeItem(ISOFORM_FUNCTION_BP_URL, ISOFORM_FUNCTION_BP_FILENAME);
private static final DownloadItem isoformFunctionCc = makeItem(ISOFORM_FUNCTION_CC_URL, ISOFORM_FUNCTION_CC_FILENAME);
static DownloadItem makeItem(String urlString, String base) {
try {
URL url = new URL(urlString);
return new DownloadItem(url, base);
} catch (MalformedURLException e) {
// should never happen
throw new IsopretRuntimeException("Could not create URL from " + urlString);
}
}
private static final Set<DownloadItem> allDownloadItems = Set.of(go,
jannovarHg38, hgnc, interproDomainDesc, interproDomains,
isoformFunctionMf, isoformFunctionBp, isoformFunctionCc);
*/



private final Path dataDirectory;

Expand Down Expand Up @@ -106,19 +82,4 @@ public Path interproDomainsPath() {
return dataDirectory.resolve(INTERPRO_DOMAINS_FILENAME);
}


/** @return Download data for Gene Ontology (go.json)>
public static DownloadItem go() { return go; }
/** @return Download data for Gene Ontology (go.json)>
public static DownloadItem jannovarHg38() { return jannovarHg38; }
public static DownloadItem hgnc() { return hgnc; }
public static DownloadItem interproDomainDesc() { return interproDomainDesc; }
public static DownloadItem interproDomains() { return interproDomains; }
public static DownloadItem isoformFunctionMf() { return isoformFunctionMf; }
public static DownloadItem isoformFunctionBp() { return isoformFunctionBp; }
public static DownloadItem isoformFunctionCc() { return isoformFunctionCc; }
public static Set<DownloadItem> allDownloadItems() { return allDownloadItems; }
*/


}
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,6 @@ public Map<TermId, List<TermId>> getOntologyTermToDomainItemsMap() {
public Set<TermId> getDomainItemsAnnotatedByGoTerm(TermId goTermId) {
Set<TermId> domainItemSet = new HashSet<>();
Set<TermId> descendentSet = OntologyAlgorithm.getDescendents(this.ontology, goTermId);
// descendentSet.add(goTermId);
for (Map.Entry<TermId, IsopretAnnotations> entry : associationMap.entrySet()) {
TermId gene = entry.getKey();
for (TermId ontologyTermId : entry.getValue().getAnnotatingTermIds()) {
Expand Down Expand Up @@ -108,13 +107,12 @@ public Map<TermId, DirectAndIndirectTermAnnotations> getAssociationMap(Set<TermI
/* In this step add the direct annotations only */
TermId ontologyTermId = ita.getTermId();
// check if the term is in the ontology (sometimes, obsoletes are used in the bla32 files)
Term term = this.ontology.getTermMap().get(ontologyTermId);
if (term == null) {
Optional<Term> termOpt = ontology.termForTermId(ontologyTermId);
if (termOpt.isEmpty()) {
ontology_term_not_found++;
LOGGER.warn("Unable to retrieve ontology term {} (omitted).", ontologyTermId.getValue());
continue;
}
// if necessary, replace with the latest primary term id
ontologyTermId = this.ontology.getPrimaryTermId(ontologyTermId);
directAnnotationMap.computeIfAbsent(domainTermId, k -> new HashSet<>()).add(ontologyTermId);
}
Expand All @@ -134,8 +132,9 @@ public Map<TermId, DirectAndIndirectTermAnnotations> getAssociationMap(Set<TermI
annotationMap.putIfAbsent(ontologyId, new DirectAndIndirectTermAnnotations(ontologyId));
annotationMap.get(ontologyId).addDirectAnnotatedItem(domainItemTermId);
// In addition to the direct annotation, the gene is also indirectly annotated
// to all of the GO Term's ancestors
Set<TermId> ancs = OntologyAlgorithm.getAncestorTerms(ontology, ontologyId, false);
// to all the GO Term's ancestors
//Set<TermId> ancs = OntologyAlgorithm.getAncestorTerms(ontology, ontology.getRootTermId(), ontologyId, false);
Iterable<TermId> ancs = ontology.graph().getAncestors(ontologyId, false);
for (TermId ancestor : ancs) {
annotationMap.putIfAbsent(ancestor, new DirectAndIndirectTermAnnotations(ancestor));
annotationMap.get(ancestor).addIndirectAnnotatedItem(domainItemTermId);
Expand All @@ -153,7 +152,7 @@ public Set<TermId> getAllAnnotatedGenes() {
/**
* TODO ADD TEST
* @param termId a Gene Ontology id
* @return List of genes/transcripts annotation by this GO term
* @return set of genes/transcripts annotation by this GO term
*/
@Override
public Set<TermId> getDomainItemsAnnotatedByOntologyTerm(TermId termId) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ private static Map<TermId, Set<TermId>> parseFunctionFile(File file, Ontology on
} catch (IOException e) {
throw new PhenolRuntimeException("Could not import isoform_function_list.txt :" + e.getMessage());
}
if (notFound.size() > 0) {
if (!notFound.isEmpty()) {
int n_missing = notFound.size(); // keep logger sane
LOGGER.warn("Could not find " + n_missing + " terms in Ontology");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import org.jax.isopret.model.GeneModel;
import org.jax.isopret.model.GeneResult;
import org.jax.isopret.model.GeneSymbolAccession;
import org.jax.isopret.model.TranscriptResult;

import java.util.*;
import java.util.stream.Collectors;
Expand All @@ -21,9 +22,9 @@ public class GeneResultImpl implements GeneResult, Comparable<GeneResultImpl> {
/** Accession number of the gene, e.g., ENSG00000001167. */
private final AccessionNumber geneAccession;
private final GeneModel geneModel;
private double expressionFoldChange;
private double expressionLog2FoldChange;
private double expressionP;
private final Map<AccessionNumber, TranscriptResultImpl> transcriptMap;
private final Map<AccessionNumber, TranscriptResult> transcriptMap;



Expand All @@ -34,12 +35,12 @@ public GeneResultImpl(AccessionNumber geneAccession, GeneModel sym) {
}
@Override
public void addExpressionResult(double fc, double p) {
this.expressionFoldChange = fc;
this.expressionLog2FoldChange = fc;
this.expressionP = p;
}
@Override
public void addTranscriptResult(AccessionNumber isoform, double expFC, double P) {
TranscriptResultImpl tresult = new TranscriptResultImpl(isoform, expFC, P);
TranscriptResult tresult = new TranscriptResultImpl(isoform, expFC, P);
transcriptMap.putIfAbsent(isoform, tresult);
}

Expand All @@ -61,10 +62,16 @@ public GeneModel getGeneModel() {
public GeneSymbolAccession getGeneSymbolAccession() {
return new GeneSymbolAccession(geneModel.geneSymbol(), geneAccession);
}
@Override
public double getExpressionLog2FoldChange() {
return expressionLog2FoldChange;
}

@Override
public double getExpressionFoldChange() {
return expressionFoldChange;
return Math.pow(expressionLog2FoldChange, 2.0);
}

@Override
public double getExpressionP() {
return expressionP;
Expand All @@ -74,12 +81,12 @@ public List<Double> getSplicingPlist() {
return this.transcriptMap
.values()
.stream()
.map(TranscriptResultImpl::getPvalue)
.map(TranscriptResult::getPvalue)
.collect(Collectors.toList());
}

@Override
public Map<AccessionNumber, TranscriptResultImpl> getTranscriptMap() {
public Map<AccessionNumber, TranscriptResult> getTranscriptMap() {
return Collections.unmodifiableMap(transcriptMap);
}

Expand Down Expand Up @@ -123,7 +130,7 @@ public double getSmallestSplicingP() {
return this.transcriptMap
.values()
.stream()
.map(TranscriptResultImpl::getPvalue)
.map(TranscriptResult::getPvalue)
.min(Double::compareTo)
.orElse(1.0);
}
Expand All @@ -132,7 +139,7 @@ private double minp() {
return Math.min(getExpressionP(), getSmallestSplicingP());
}
@Override
public Set<TranscriptResultImpl> getTranscriptResults() {
public Set<TranscriptResult> getTranscriptResults() {
return new HashSet<>(this.transcriptMap.values());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import org.jax.isopret.data.AccessionNumber;
import org.jax.isopret.model.GeneModel;
import org.jax.isopret.model.GeneResult;
import org.jax.isopret.model.TranscriptResult;

import org.monarchinitiative.phenol.ontology.data.TermId;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand Down Expand Up @@ -163,7 +165,7 @@ public Set<TermId> getAllTranscriptTermIds() {
.map(GeneResult::getTranscriptMap)
.map(Map::values)
.flatMap(Collection::stream)
.map(TranscriptResultImpl::getTranscriptId)
.map(TranscriptResult::getTranscriptId)
.map(AccessionNumber::toTermId)
.collect(Collectors.toSet());
}
Expand All @@ -184,7 +186,7 @@ public Set<TermId> dasIsoformTermIds() {
.map(Map::values)
.flatMap(Collection::stream)
.filter(r -> r.isSignificant(this.splicingThreshold ))
.map(TranscriptResultImpl::getTranscriptId)
.map(TranscriptResult::getTranscriptId)
.map(AccessionNumber::toTermId)
.collect(Collectors.toSet());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import org.jax.isopret.data.AccessionNumber;
import org.jax.isopret.model.GeneResult;
import org.jax.isopret.model.TranscriptResult;
import org.monarchinitiative.phenol.analysis.AssociationContainer;
import org.monarchinitiative.phenol.analysis.StudySet;
import org.monarchinitiative.phenol.ontology.data.TermId;
Expand Down Expand Up @@ -155,14 +156,14 @@ private IsoformSpecificThresholder(Map<AccessionNumber, GeneResult> results,
.stream()
.flatMap(r -> r.getTranscriptResults().stream())
.filter(tr -> tr.getPvalue() <= splicingPepThreshold)
.map(TranscriptResultImpl::getTranscriptId)
.map(TranscriptResult::getTranscriptId)
.map(AccessionNumber::toTermId)
.collect(Collectors.toSet());
Set<TermId> dasIsoformPopulation = results
.values()
.stream()
.flatMap(r -> r.getTranscriptResults().stream())
.map(TranscriptResultImpl::getTranscriptId)
.map(TranscriptResult::getTranscriptId)
.map(AccessionNumber::toTermId)
.collect(Collectors.toSet());
LOGGER.info("DAS: {} study set and {} population genes", dasIsoformStudy.size(), dasIsoformPopulation.size());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,14 @@
* false-discovery rate (FDR) over the current dataset. Explanations can be found in
* Käll L, Storey JD, MacCoss MJ, Noble WS. Posterior error probabilities and false discovery rates:
* two sides of the same coin. J Proteome Res. 2008 Jan;7(1):40-4. PMID:18052118.
* In brief, the The q-value of a test measures the proportion of false positives incurred (called the false discovery
* In brief, the q-value of a test measures the proportion of false positives incurred (called the false discovery
* rate) when that particular test is called significant (thus, the q-value is the minimum FDR at which the test would be
* called significant in the context of the current dataset). The local FDR (another name for the PEP) measures the
* posterior probability the null hypothesis is true given the test's p-value. Whereas the FDR measures the error rate
* associated with a collection of tests, the PEP measures the probability of error for a single test.
* The mean value of all PEPs that are called significant corresponds to the FDR at the corresponding threshold.
* Our goal is to set the FDR to a given threshold (by default, 0.01) and to define the PEP threshold that calls
* as many tests as possible signficant while still maintaining this threshold. We apply the additional criterion that
* as many tests as possible significant while still maintaining this threshold. We apply the additional criterion that
* if the PEP is larger than 0.25 the test is not called significant, regardless of the FDR.
* @author Peter N Robinson
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,14 @@


/**
* With the exception of the header line, the HBA-DEALS and the edgeR results file (from our script to
* Except for the header line, the HBA-DEALS and the edgeR results file (from our script to
* analyze both DGE and DAS) are the same. This class is therefore used to parse lines from either
* HBA-DEALS or edgeR. Note that the objects are only used for parsing and are not a part of the
* analysis model. Each line will be transformed either to a {@link org.jax.isopret.model.GeneResult} or
* a {@link org.jax.isopret.model.TranscriptResult}.
* @param geneAccession ENSEMBL accession number for a gene, e.g., ENSG00000139618
* @param isoform ENSEMBL accession number for a transcript (can be null), e.g., ENST00000560355.1
* @param expFC expression fold change (from HBA-DEALS)
* @param expFC expression fold change (from HBA-DEALS). For expression: log2 FC; for splicing: FC.
* @param raw_p raw p-value (from HBA-DEALS)
* @author Peter N Robinson
*/
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
package org.jax.isopret.model;

import org.jax.isopret.core.impl.rnaseqdata.TranscriptResultImpl;
import org.jax.isopret.data.AccessionNumber;
import org.jax.isopret.data.InterproEntry;
import org.jax.isopret.data.Transcript;
Expand Down Expand Up @@ -53,7 +52,7 @@ public AnnotatedGene(List<Transcript> transcripts,

this.hbaDealsResult = result;
// use HBA Deals results to filter for transcripts that are actually expressed
Map<AccessionNumber, TranscriptResultImpl> transcriptMap = result.getTranscriptMap();
Map<AccessionNumber, TranscriptResult> transcriptMap = result.getTranscriptMap();
expressedTranscripts = transcripts
.stream()
.filter(t -> transcriptMap.containsKey(t.accessionId()))
Expand Down
Loading

0 comments on commit 54c3e8e

Please sign in to comment.