From ae11cf433811c0f16d555e0a33f444cd0d05143d Mon Sep 17 00:00:00 2001 From: Andrea Manica Date: Fri, 13 Sep 2024 17:49:40 +0100 Subject: [PATCH] clean up files after failed gen_tibble Fix #50 --- R/gen_tibble.R | 21 ++++++++++++++++----- R/gen_tibble_ped.R | 3 ++- tests/testthat/test_gen_tibble.R | 32 +++++++++++++++++++++++++++++--- 3 files changed, 47 insertions(+), 9 deletions(-) diff --git a/R/gen_tibble.R b/R/gen_tibble.R index 918ac495..6eab840c 100644 --- a/R/gen_tibble.R +++ b/R/gen_tibble.R @@ -198,8 +198,9 @@ gen_tibble_bed_rds <- function(x, ..., indiv_meta, class = "gen_tbl" ) - check_allele_alphabet (new_gen_tbl, valid_alleles = valid_alleles, - missing_alleles = missing_alleles) + check_allele_alphabet(new_gen_tbl, valid_alleles = valid_alleles, + missing_alleles = missing_alleles, + remove_on_fail = TRUE) show_loci(new_gen_tbl) <- harmonise_missing_values(show_loci(new_gen_tbl), missing_alleles = missing_alleles) return(new_gen_tbl) @@ -272,8 +273,9 @@ gen_tibble.matrix <- function(x, indiv_meta, loci, ..., indiv_meta, class = "gen_tbl" ) - check_allele_alphabet (new_gen_tbl, valid_alleles = valid_alleles, - missing_alleles = missing_alleles) + check_allele_alphabet(new_gen_tbl, valid_alleles = valid_alleles, + missing_alleles = missing_alleles, + remove_on_fail = TRUE) show_loci(new_gen_tbl) <- harmonise_missing_values(show_loci(new_gen_tbl), missing_alleles = missing_alleles) files_in_use <- gt_save(new_gen_tbl, quiet = quiet) return(new_gen_tbl) @@ -437,9 +439,18 @@ tbl_sum.gen_tbl <- function(x, ...) { # function to check the allele alphabet check_allele_alphabet <- function(x, valid_alleles = c("A", "T", "C", "G"), - missing_alleles = c("0",".")){ + missing_alleles = c("0","."), + remove_on_fail = FALSE){ if (any(!show_loci(x)$allele_ref %in% c(valid_alleles,missing_alleles,NA), !show_loci(x)$allele_alt %in% c(valid_alleles,missing_alleles,NA))){ + if (remove_on_fail){ # remove files if they were generated + if(file.exists(gt_get_file_names(x)[1])){ + file.remove(gt_get_file_names(x)[1]) + } + if(file.exists(gt_get_file_names(x)[2])){ + file.remove(gt_get_file_names(x)[2]) + } + } stop("valid alleles are ", paste(c(valid_alleles,missing_alleles), collapse=" ")," but ", paste(unique(c(show_loci(x)$allele_ref,show_loci(x)$allele_alt)), collapse=" "), " were found.") diff --git a/R/gen_tibble_ped.R b/R/gen_tibble_ped.R index b596a072..1e4ccea4 100644 --- a/R/gen_tibble_ped.R +++ b/R/gen_tibble_ped.R @@ -19,7 +19,8 @@ gen_tibble_ped <- function(x, ..., loci = res$map, backingfile = backingfile, quiet=quiet) check_allele_alphabet (new_gen_tbl, valid_alleles = valid_alleles, - missing_alleles = missing_alleles) + missing_alleles = missing_alleles, + remove_on_fail = TRUE) show_loci(new_gen_tbl) <- harmonise_missing_values(show_loci(new_gen_tbl), missing_alleles = missing_alleles) return(new_gen_tbl) diff --git a/tests/testthat/test_gen_tibble.R b/tests/testthat/test_gen_tibble.R index 20124450..2fc12911 100644 --- a/tests/testthat/test_gen_tibble.R +++ b/tests/testthat/test_gen_tibble.R @@ -56,7 +56,7 @@ test_that("gen_tibble catches invalid alleles",{ valid_alleles = c("A","C","T","G","N"), quiet = TRUE) expect_true("N" %in% show_loci(test_dfs_gt)$allele_alt) - # but if we add to missing values it shoudl be turned into a zero + # but if we add to missing values it should be turned into a zero test_dfs_gt <- gen_tibble(test_genotypes, indiv_meta = test_indiv_meta, loci = test_loci_wrong, missing_alleles = c("0",".","N"), @@ -174,7 +174,7 @@ test_that("gen_tibble from files",{ # PLINK PED files ######################## ped_path <- system.file("extdata/pop_a.ped", package = "tidypopgen") - pop_a_ped_gt <- gen_tibble(ped_path, quiet=TRUE,backingfile = tempfile()) + pop_a_ped_gt <- gen_tibble(ped_path, quiet=TRUE, backingfile = tempfile()) # because ref and alt are defined based on which occurs first in a ped, some alleles will be swapped equal_geno <- show_genotypes(pop_a_gt)==show_genotypes(pop_a_ped_gt) not_equal <- which(!apply(equal_geno,2,all)) @@ -417,4 +417,30 @@ test_that("check summary stats are the same for gen_tibbles read in different wa }) - +test_that("on error, we remove the old fils",{ + # create file + test_indiv_meta <- data.frame (id=c("a","b","c"), + population = c("pop1","pop1","pop2")) + test_genotypes <- rbind(c(1,1,0,1,1,0), + c(2,1,0,0,0,0), + c(2,2,0,0,1,1)) + test_loci <- data.frame(name=paste0("rs",1:6), + chromosome=paste0("chr",c(1,1,1,1,2,2)), + position=as.integer(c(3,5,65,343,23,456)), + genetic_dist = as.integer(rep(0,6)), + allele_ref = c("A","T","C","G","C","T"), + allele_alt = c("T","C", NA,"C","G","A")) + test_loci_wrong <- test_loci + test_loci_wrong$allele_alt[1] <- "N" + this_bkfile <- tempfile() + expect_error(test_dfs_gt <- gen_tibble(test_genotypes, indiv_meta = test_indiv_meta, + loci = test_loci_wrong, + backingfile = this_bkfile, + quiet = TRUE),"valid alleles are") + expect_false(file.exists(paste0(this_bkfile,".bk"))) + test_dfs_gt <- gen_tibble(test_genotypes, indiv_meta = test_indiv_meta, + loci = test_loci, + backingfile = this_bkfile, + quiet = TRUE) + expect_true(file.exists(paste0(this_bkfile,".bk"))) +})