-
Notifications
You must be signed in to change notification settings - Fork 11
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Optimal Hard Thresholding to determine optimal encoding dimension (#69)
* changed estimateBestQ to use OHT * edit estimateBestQ error messages and variable names * add unit tests for OHT * update OHT tests * add denoiseR package to Suggests * add xcolor package for vignette building * added my name * update vignette * add if statement to transpose zScores if aspect ratio larger than 1 * replace small latent space error by warning * fix estimateBestQ test error * Use GHA checkout V4 & R 4.4 for mac and windows --------- Co-authored-by: AtaJadidAhari <[email protected]>
- Loading branch information
1 parent
dc99588
commit 52581f5
Showing
11 changed files
with
257 additions
and
39 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,6 +12,7 @@ Authors@R: c( | |
email="[email protected]", comment=c(ORCID="0000-0002-1091-205X")), | ||
person("Agne", "Matuseviciute", role=c("aut")), | ||
person(c("Michaela", "Fee"), "Müller", role=c("ctb")), | ||
person("Andrea", "Raithel", role=c("ctb")), | ||
person("Vicente", "Yepez", role=c("aut"), email="[email protected]"), | ||
person("Julien", "Gagneur", role=c("aut"), email="[email protected]")) | ||
Description: Identification of aberrant gene expression in RNA-seq data. | ||
|
@@ -27,7 +28,7 @@ biocViews: ImmunoOncology, RNASeq, Transcriptomics, Alignment, Sequencing, | |
License: MIT + file LICENSE | ||
NeedsCompilation: yes | ||
Encoding: UTF-8 | ||
RoxygenNote: 7.2.3 | ||
RoxygenNote: 7.3.1 | ||
Depends: | ||
R (>= 3.6), | ||
BiocParallel, | ||
|
@@ -59,7 +60,9 @@ Imports: | |
scales, | ||
splines, | ||
stats, | ||
utils | ||
utils, | ||
RMTstat, | ||
pracma | ||
Suggests: | ||
testthat, | ||
knitr, | ||
|
@@ -73,7 +76,8 @@ Suggests: | |
covr, | ||
GenomeInfoDb, | ||
ggbio, | ||
biovizBase | ||
biovizBase, | ||
denoiseR | ||
LinkingTo: | ||
Rcpp, | ||
RcppArmadillo | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
context("Testing the estimateBestQ function (Optimal Hard Thresholding)") | ||
|
||
library(denoiseR) | ||
|
||
test_that("Input validation handles NULL and non-matrix inputs", { | ||
expect_error(estimateBestQ(), | ||
"Please provide an OutriderDataSet or a z-score matrix.") | ||
expect_error(estimateBestQ(NULL, NULL), | ||
"Please provide an OutriderDataSet or a z-score matrix.") | ||
expect_error(estimateBestQ(zScores = "not a matrix"), | ||
"Provided zScores are not a matrix.") | ||
expect_error(estimateBestQ(ods = "not an ods"), | ||
"Please provide an OutriderDataSet.") | ||
|
||
ctsFile <- system.file('extdata', 'GTExSkinSmall.tsv', | ||
package='OUTRIDER') | ||
ctsTable <- read.table(ctsFile, check.names=FALSE) | ||
ods <- OutriderDataSet(countData=ctsTable) | ||
# filter out non expressed genes | ||
ods <- filterExpression(ods, minCounts=TRUE, filterGenes=TRUE) | ||
ods <- estimateSizeFactors(ods) | ||
|
||
expect_warning(estimateBestQ(ods = ods, zScores = matrix(c(1, 2, 3, 4, 5, 6), nrow = 3, ncol = 2)), | ||
"Provided z-scores are ignored and recalculated from ods.") | ||
}) | ||
|
||
test_that("User is notified about invalid matrix values", { | ||
expect_error(estimateBestQ(zScores = matrix(c(1, 2, 3, 4, 5, Inf), nrow = 3, ncol = 2)), | ||
"Z-score matrix contains infinite values.") | ||
}) | ||
|
||
test_that("optimalSVHTCoef works correctly", { | ||
expect_equal(optimalSVHTCoef(0.5), | ||
1.98, tolerance = 0.01) | ||
expect_equal(optimalSVHTCoef(0.1), | ||
1.58, tolerance = 0.01) | ||
}) | ||
|
||
test_that("medianMarchenkoPastur works correctly", { | ||
# Expected outputs are derived from Table IV in Gavish and Donoho (2014) | ||
expect_equal(optimalSVHTCoef(0.5) / sqrt(medianMarchenkoPastur(100, 200)), | ||
2.1711, tolerance = 0.0001) | ||
expect_equal(optimalSVHTCoef(0.1) / sqrt(medianMarchenkoPastur(100, 1000)), | ||
1.6089, tolerance = 0.0001) | ||
}) | ||
|
||
test_that("Encoding dimensions are properly calculated for simulated z-scores", { | ||
# Simulate zScore matrix consisting of signal and noise | ||
set.seed(42) | ||
numGenes <- 10000 | ||
numSamples <- 200 | ||
latentDim <- 50 | ||
signalNoiseRatio <- 5 | ||
zTilde <- LRsim(numGenes, numSamples, latentDim, signalNoiseRatio)$X * 1000 | ||
|
||
expect_equal(estimateBestQ(zScores = zTilde), | ||
latentDim) | ||
|
||
# Simulate zScore matrix with beta > 1 | ||
set.seed(42) | ||
numGenes <- 50 | ||
numSamples <- 200 | ||
latentDim <- 20 | ||
signalNoiseRatio <- 5 | ||
zTilde <- LRsim(numGenes, numSamples, latentDim, signalNoiseRatio)$X * 1000 | ||
|
||
expect_equal(estimateBestQ(zScores = zTilde), | ||
latentDim) | ||
|
||
# Simulate zScore matrix consisting of noise only | ||
set.seed(42) | ||
latentDim <- 0 | ||
zTilde <- matrix(rnorm(numGenes * numSamples), nrow = numGenes, ncol = numSamples) | ||
expect_warning(expect_equal(estimateBestQ(zScores = zTilde), 2), | ||
paste("Optimal latent space dimension is smaller than 2\\. Check your count matrix and", | ||
"verify that all samples have the expected number of counts", | ||
"\\(hist\\(colSums\\(counts\\(ods\\)\\)\\)\\)\\.", | ||
"For now\\, the latent space dimension is set to 2\\.", collapse = "\n")) | ||
}) | ||
|
||
test_that("Encoding dimensions are properly calculated for real ODS", { | ||
ctsFile <- system.file('extdata', 'GTExSkinSmall.tsv', | ||
package='OUTRIDER') | ||
ctsTable <- read.table(ctsFile, check.names=FALSE) | ||
ods <- OutriderDataSet(countData=ctsTable) | ||
ods <- filterExpression(ods, minCounts=TRUE, filterGenes=TRUE) | ||
ods <- estimateSizeFactors(ods) | ||
|
||
outsingleResult <- 5 # Expected value was calculated with OutSingle | ||
expect_equal(estimateBestQ(ods = ods), outsingleResult, | ||
tolerance = 1) | ||
}) |
Oops, something went wrong.