Skip to content

Commit

Permalink
Merge pull request #38 from EvolEcolGroup/subset_tests
Browse files Browse the repository at this point in the history
Subset tests
  • Loading branch information
dramanica authored May 17, 2024
2 parents 6b65c6f + 6275dfe commit 7a4bdf0
Show file tree
Hide file tree
Showing 30 changed files with 530 additions and 33 deletions.
1 change: 0 additions & 1 deletion R/filter_high_relatedness.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
#' coefficients and returns the maximum set of individuals that contains no
#' relationships above the given threshold.
#'
#' TODO this function needs a test
#'
#' @param matrix a square symmetric matrix of individuals containing relationship coefficients
#' @param .x a [`gen_tibble`] object
Expand Down
7 changes: 6 additions & 1 deletion R/gen_tibble.R
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,12 @@ gen_tibble.matrix <- function(x, indiv_meta, loci, ...,
stop ("'0' can not be a valid allele (it is the default missing allele value!)")
}

# TODO check object types
if (!inherits(loci, "data.frame") || inherits(x, "tbl")){
stop("loci must be one of data.frame or tbl")
}
if (!inherits(indiv_meta, "data.frame") || inherits(x, "tbl") || is.list(x)){
stop("indiv_meta must be one of data.frame, tbl, or list")
}
if (!all(c("id", "population") %in% names(indiv_meta))){
stop("ind_meta does not include the compulsory columns 'id' and 'population")
}
Expand Down
2 changes: 1 addition & 1 deletion R/gt_has_imputed.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ gt_uses_imputed <- function (x){
x <- x$genotypes
}
if (!gt_has_imputed(x)){
stop("this dataset does not have any imputated values to use!")
stop("this dataset does not have any imputed values to use!")
}
if (identical(attr(x,"bigsnp")$genotypes$code256, bigsnpr::CODE_IMPUTE_PRED) |
identical(attr(x,"bigsnp")$genotypes$code256, bigsnpr::CODE_DOSAGE)){
Expand Down
2 changes: 1 addition & 1 deletion R/gt_impute_simple.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#'
#' @param x a [gen_tibble] with missing data
#' @param method one of
#' - 'median': the most frequent genotype
#' - 'mode': the most frequent genotype
#' - 'mean0': the mean rounded to the nearest integer
#' - 'mean2': the mean rounded to 2 decimal places
#' - 'random': randomly sample a genotype based on the observed allele frequencies
Expand Down
2 changes: 1 addition & 1 deletion R/gt_pca_tidiers.R
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ augment_loci.gt_pca <- function(x, data = NULL, k= NULL, ...) {
ret <- if (!missing(data) && !is.null(data)) {
#check that names of the two columns are in sync
# @TODO reinstate this check once we have rownames in the pca object for loadings
if (!all.equal(loci_names(data), rownames(as.data.frame(x$v)))){
if (!identical(loci_names(data), rownames(as.data.frame(x$v)))){
stop("the loci names in 'data' do not correspond to the loci in the pca object 'x'")
}
show_loci(data) <- show_loci(data) %>% tibble::add_column(loadings)
Expand Down
9 changes: 7 additions & 2 deletions R/loci_ld_clump.R
Original file line number Diff line number Diff line change
Expand Up @@ -63,13 +63,18 @@ loci_ld_clump.vctrs_bigSNP <- function(.x,
...)
{
rlang::check_dots_empty()

stopifnot_diploid(.x)

if (gt_has_imputed(.x) && gt_uses_imputed(.x)==FALSE){ #but not uses_imputed
gt_set_imputed(.x, set = TRUE)
on.exit(gt_set_imputed(.x, set = FALSE))
}

stopifnot_diploid(.x)
if(!identical(show_loci(.x),.x %>% show_loci() %>% arrange(show_loci(.x)$chromosome,show_loci(.x)$position))){
stop("Your loci are not sorted, try using: show_loci(.data) <- .data %>% show_loci() %>% arrange(chromosome,position)")

}

# get the FBM
geno_fbm <- attr(.x,"bigsnp")$genotypes
# rows (individuals) that we want to use
Expand Down
1 change: 1 addition & 0 deletions R/loci_transitions.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ loci_transitions <- function(.x, ...) {
loci_transitions.tbl_df <- function(.x, ...) {
#TODO this is a hack to deal with the class being dropped when going through group_map
stopifnot_gen_tibble(.x)
check_allele_alphabet(.x$genotypes)
loci_transitions(.x$genotypes, ...)
}

Expand Down
1 change: 1 addition & 0 deletions R/loci_transversions.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ loci_transversions <- function(.x, ...) {
loci_transversions.tbl_df <- function(.x, ...) {
#TODO this is a hack to deal with the class being dropped when going through group_map
stopifnot_gen_tibble(.x)
check_allele_alphabet(.x$genotypes)
loci_transversions(.x$genotypes, ...)
}

Expand Down
3 changes: 1 addition & 2 deletions R/qc_report_indiv.R
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ qc_report_indiv <- function(.x, kings_threshold = NULL, ...){
#' ready plots.
#'
#' @param object an object of class `qc_report_indiv`
#' @param type the type of plot (`scatter`)
#' @param type the type of plot (`scatter`,`relatedness`)
#' @param miss_threshold a threshold for the accepted rate of missingness within
#' individuals
#' @param kings_threshold an optional numeric, a threshold of relatedness for the sample
Expand Down Expand Up @@ -96,7 +96,6 @@ autoplot_qc_report_indiv <- function(object, miss_threshold = miss_threshold){

autoplot_qc_report_indiv_king <- function(object, kings_threshold = kings_threshold){

browser()
king <- as.data.frame(attr(object$to_keep, "king"))
num_samples <- nrow(king)
king$row <- colnames(king)
Expand Down
2 changes: 1 addition & 1 deletion R/select_loci_if.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ select_loci_if <-function(.data, .sel_logical){
# and now evaluate it, allowing it to see the data
loci_sel <- rlang::eval_tidy(sel_defused,data=.data)
if (!inherits(loci_sel,"logical")){
stop(".sel_logical should be a logical (boolean) vector")
stop(".sel_logical should be a logical boolean vector")
}
if (length(loci_sel) != ncol(show_genotypes(.data$genotypes))){
stop(".sel_logical should be the same length as the number of loci")
Expand Down
3 changes: 3 additions & 0 deletions inst/extdata/pop_b.ped
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
pop_b SA073 0 0 2 -9 A A G G A A A A G G C C G G G G T T G G G G C C C C A A C T A G A A
pop_b SA1021 0 0 2 -9 G A A G A A G A G G C C T G A G 0 0 G G G G C C C C G A T T G G A A
pop_b SA1008 0 0 1 -9 A A A G A A G A G G C C T G G G T T G G G G C C C C A A C T G G A A
11 changes: 11 additions & 0 deletions inst/extdata/related/families_hwe.hwe
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
CHR SNP TEST A1 A2 GENO O(HET) E(HET) P
1 1 ALL(NP) 2 1 0/4/6 0.4 0.32 1
1 2 ALL(NP) 2 1 2/4/6 0.3333 0.4444 0.5176
1 3 ALL(NP) 2 1 0/5/7 0.4167 0.3299 1
1 4 ALL(NP) 1 2 2/4/5 0.3636 0.4628 0.5377
1 5 ALL(NP) 2 1 0/7/5 0.5833 0.4132 0.4874
1 6 ALL(NP) 2 1 1/6/3 0.6 0.48 1
1 7 ALL(NP) 2 1 1/5/5 0.4545 0.4339 1
1 8 ALL(NP) 2 1 0/8/4 0.6667 0.4444 0.216
1 9 ALL(NP) 2 1 0/6/5 0.5455 0.3967 0.5046
1 10 ALL(NP) 1 2 2/7/3 0.5833 0.4965 1
12 changes: 12 additions & 0 deletions inst/extdata/related/families_hwe.nosex
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
1 1
2 2
3 3
4 4
5 5
6 6
7 7
8 8
9 9
10 10
11 11
12 12
11 changes: 11 additions & 0 deletions inst/extdata/related/families_hwe_midp.hwe
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
CHR SNP TEST A1 A2 GENO O(HET) E(HET) P
1 1 ALL(NP) 2 1 0/4/6 0.4 0.32 0.6533
1 2 ALL(NP) 2 1 2/4/6 0.3333 0.4444 0.3668
1 3 ALL(NP) 2 1 0/5/7 0.4167 0.3299 0.7019
1 4 ALL(NP) 1 2 2/4/5 0.3636 0.4628 0.3643
1 5 ALL(NP) 2 1 0/7/5 0.5833 0.4132 0.341
1 6 ALL(NP) 2 1 1/6/3 0.6 0.48 0.7866
1 7 ALL(NP) 2 1 1/5/5 0.4545 0.4339 0.7399
1 8 ALL(NP) 2 1 0/8/4 0.6667 0.4444 0.1299
1 9 ALL(NP) 2 1 0/6/5 0.5455 0.3967 0.3065
1 10 ALL(NP) 1 2 2/7/3 0.5833 0.4965 0.7969
12 changes: 12 additions & 0 deletions inst/extdata/related/families_hwe_midp.nosex
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
1 1
2 2
3 3
4 4
5 5
6 6
7 7
8 8
9 9
10 10
11 11
12 12
2 changes: 1 addition & 1 deletion man/autoplot.qc_report_indiv.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 0 additions & 3 deletions man/filter_high_relatedness.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/gt_impute_simple.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 16 additions & 0 deletions tests/testthat/test_augment_loci.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
test_that("augment_loci adds to loci_table",{
bed_file <- system.file("extdata", "example-missing.bed", package = "bigsnpr")
missing_gt <- gen_tibble(bed_file, backingfile = tempfile("missing_"),quiet=TRUE)
missing_gt <- gt_impute_simple(missing_gt)
missing_pca <- missing_gt %>% gt_pca_partialSVD()

#Add loadings to loci table
missing_pca_load <- augment_loci(missing_pca, data = missing_gt)
expect_true(all(colnames(missing_pca_load) == c(colnames(show_loci(missing_gt)),paste0(".loadingPC", c(1:10)))))

#Try assigning loadings to object with fewer loci
missing_gt_rm <- missing_gt %>% select_loci(c(1:400))
expect_error(augment_loci(missing_pca, data = missing_gt_rm),"the loci names in 'data' do not correspond to the loci in the pca object ")


})
103 changes: 96 additions & 7 deletions tests/testthat/test_gen_tibble.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,16 +32,19 @@ test_that("create gen_tibble from dfs",{

})

test_genotypes_c <- rbind(c("1","1","0","1","1","0"),
c("2","1","0","0","0","0"),
c("2","2","0","0","1","1"))


test_that("gen_tibble does not accept character matrix",{
expect_error(test_dfs_gt <- gen_tibble(test_genotypes_c, indiv_meta = test_indiv_meta,
loci = test_loci, quiet = TRUE),"'x' is not a matrix of integers")
# now create it directly from the dfs
test_that("create gen_tibble from dfs",{
test_dfs_gt <- gen_tibble(test_genotypes, indiv_meta = test_indiv_meta,
loci = test_loci, quiet = TRUE)
# because of the different backing file info, we cannot use identical on the whole object
expect_true(identical(show_genotypes(test_gt), show_genotypes(test_dfs_gt)))
expect_true(identical(show_loci(test_gt), show_loci(test_dfs_gt)))
expect_true(identical(test_gt %>% select(-genotypes),
test_dfs_gt %>% select(-genotypes)))
})


test_that("gen_tibble catches invalid alleles",{
test_loci_wrong <- test_loci
test_loci_wrong$allele_alt[1] <- "N"
Expand Down Expand Up @@ -70,6 +73,92 @@ test_that("gen_tibble catches invalid alleles",{

})


test_that("if order of loci is changed, order of genotypes also changes",{

pop_b <- gen_tibble(system.file("extdata/pop_b.bed", package="tidypopgen"),backingfile = tempfile(), quiet = TRUE)
#original genotypes
pop_b_gen <- show_genotypes(pop_b)

#now scramble the loci
set.seed(123)
random_order <- sample(1:17)
show_loci(pop_b) <- pop_b %>% select_loci(all_of(random_order)) %>% show_loci()

#reorder the original genotypes according to 'random_order'
pop_b_gen_reordered <- pop_b_gen[,random_order]

#check that genotypes are now reordered according to random order
expect_equal(pop_b_gen_reordered, show_genotypes(pop_b))


})

test_that("gen_tibble does not accept character matrix",{
test_genotypes_c <- rbind(c("1","1","0","1","1","0"),
c("2","1","0","0","0","0"),
c("2","2","0","0","1","1"))
expect_error(test_dfs_gt <- gen_tibble(test_genotypes_c, indiv_meta = test_indiv_meta,
loci = test_loci, quiet = TRUE),"'x' is not a matrix of integers")
})

test_that("gen_tibble wrong filetype error",{
expect_error(test_dfs_gt <- gen_tibble(system.file("extdata/related/test_king.kin0", package = "tidypopgen")),
"file_path should be pointing ")
})

test_that("gen_tibble loci is dataframe or tbl",{

test_loci <- data.frame(name=paste0("rs",1:6),
chromosome=paste0("chr",c(1,1,1,1,2,2)),
position=as.integer(c(3,5,65,343,23,456)),
genetic_dist = as.integer(rep(0,6)),
allele_ref = c("A","T","C","G","C","T"),
allele_alt = c("T","C", NA,"C","G","A"))
wrong_loci_matrix <- as.matrix(test_loci)

expect_error(test_dfs_gt <- gen_tibble(test_genotypes, indiv_meta = test_indiv_meta,
loci = wrong_loci_matrix, quiet = TRUE),"loci must be one of data.frame or tbl")
})

test_that("gen_tibble required id and population",{
wrong_indiv_meta <- data.frame (x =c("a","b","c"),
y = c("pop1","pop1","pop2"))
expect_error(test_dfs_gt <- gen_tibble(test_genotypes, indiv_meta = wrong_indiv_meta,
loci = test_loci, quiet = TRUE),"ind_meta does not include the compulsory columns")
})

test_that("gen_tibble indiv_meta is list, dataframe, or tbl",{
wrong_indiv_meta <- data.frame (id=c("a","b","c"),
population = c("pop1","pop1","pop2"))
wrong_indiv_meta_matrix <- as.matrix(wrong_indiv_meta)

expect_error(test_dfs_gt <- gen_tibble(test_genotypes, indiv_meta = wrong_indiv_meta_matrix,
loci = test_loci, quiet = TRUE),"indiv_meta must be one of data.frame, tbl, or list")
})

test_that("gen_tibble identifies wrong dimensions in genotypes",{
wrong_genotypes <- rbind(c(1,1,0,1,1,0),
c(2,1,0,0,0,0))
expect_error(test_dfs_gt <- gen_tibble(wrong_genotypes, indiv_meta = test_indiv_meta,
loci = test_loci, quiet = TRUE),
"there is a mismatch between the number of loci in the genotype table x and in the loci table")

})

test_that("gen_tibble identifies wrong loci table columns",{
wrong_loci <- data.frame(a=paste0("rs",1:6),
b=paste0("chr",c(1,1,1,1,2,2)),
c=as.integer(c(3,5,65,343,23,456)),
d = as.integer(rep(0,6)),
e = c("A","T","C","G","C","T"),
f = c("T","C", NA,"C","G","A"))
expect_error(test_dfs_gt <- gen_tibble(test_genotypes, indiv_meta = test_indiv_meta,
loci = wrong_loci, quiet = TRUE),
"loci does not include the compulsory columns")
})


test_that("gen_tibble from files",{
bed_path <- system.file("extdata/pop_a.bed", package = "tidypopgen")
pop_a_gt <- gen_tibble(bed_path, quiet=TRUE, backingfile = tempfile())
Expand Down
Loading

0 comments on commit 7a4bdf0

Please sign in to comment.