Skip to content

Commit

Permalink
Merge pull request #68 from EvolEcolGroup/version_tests
Browse files Browse the repository at this point in the history
Version tests
  • Loading branch information
dramanica authored Dec 16, 2024
2 parents a7a68b2 + be821a6 commit c254875
Show file tree
Hide file tree
Showing 11 changed files with 341 additions and 166 deletions.
56 changes: 19 additions & 37 deletions R/gen_tibble.R
Original file line number Diff line number Diff line change
Expand Up @@ -288,9 +288,6 @@ gen_tibble.matrix <- function(x, indiv_meta, loci, ...,
backingfile <- change_duplicated_file_name(backingfile)
}

# use code for NA in FBM.256
# x[is.na(x)]<-3

bigsnp_obj <- gt_write_bigsnp_from_dfs(genotypes = x,
indiv_meta = indiv_meta,
loci = loci,
Expand Down Expand Up @@ -383,7 +380,6 @@ gt_write_bigsnp_from_dfs <- function(genotypes, indiv_meta, loci,
sex = 0,
affection = 0,
ploidy = ploidy)

map <- tibble(chromosome = loci$chromosome,
marker.ID = loci$name,
genetic.dist = as.double(loci$genetic_dist), ## make sure that genetic.dist is double
Expand Down Expand Up @@ -519,54 +515,38 @@ change_duplicated_file_name <- function(file){
bk <- paste0(file, ".bk")
rds <- paste0(file, ".rds")

if(file.exists(bk) && !file.exists(rds)){

if(file.exists(bk) | file.exists(rds)){
version <- 2

base_name <- basename(file)

version_pattern <- paste0(base_name, "_v(\\d+)\\.bk$")

# read existing files to check for existing versions
existing_files <- list.files(dirname(bk), pattern = paste0("^", base_name, "_v\\d+\\.bk$"))


if (length(existing_files) > 0) {
versions <- sub(version_pattern, "\\1", existing_files)
versions <- as.numeric(versions)
if (!any(is.na(versions))) {
version <- max(versions) + 1
}
# extract the base name and version number
base_name_pattern <- "^(.*)_v(\\d+)$"
# check for any matches
matches <- regmatches(basename(file), regexec(base_name_pattern, basename(file)))

if (length(matches[[1]]) > 0) {
# Extract base name without version and current version number
base_name <- matches[[1]][2] # Part before "_v<number>"
current_version <- as.numeric(matches[[1]][3]) # extract current version number
} else {
base_name <- basename(file) # Use the full name if there's no "_v" suffix
current_version <- 1
}

new_file <- paste0(file,"_v",version)

return(new_file)
} else if (file.exists(bk) && file.exists(rds)){

version <- 2

base_name <- basename(file)

version_pattern <- paste0(base_name, "_v(\\d+)\\.bk$")

# read existing files to check for existing versions
# read files to check for existing versions
existing_files <- list.files(dirname(bk), pattern = paste0("^", base_name, "_v\\d+\\.bk$"))


if (length(existing_files) > 0) {
versions <- sub(version_pattern, "\\1", existing_files)
versions <- as.numeric(versions)
if (!any(is.na(versions))) {
version <- max(versions) + 1
version <- max(versions) + 1 # add 1 to the version number
}
}

new_file <- paste0(file,"_v",version)
# create new file path
new_file <- paste0(dirname(file), "/", base_name, "_v", version)

return(new_file)
}

return(file)

}
Expand All @@ -579,6 +559,8 @@ cast_chromosome_to_int <- function(chromosome){
}
# if chromosome is a character, then cast it to integer
if (is.character(chromosome)){
# attempt to strip chr from the chromosome
chromosome <- gsub("^(chromosome_|chr_|chromosome|chr)", "", chromosome, ignore.case = TRUE)
chromosome <- tryCatch(as.integer(chromosome), warning = function(e) {as.integer(as.factor(chromosome))})
}
if (is.numeric(chromosome)){
Expand Down
9 changes: 6 additions & 3 deletions R/gt_order_loci.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,20 +11,23 @@
#' reordered; if TRUE, then the current loci table, which might have been reordered
#' manually, will be used, but only if the positions within each chromosome are
#' sequential
#' @param ignore_genetic_dist boolean to ignore the genetic distance when checking. Note
#' that, if `gentic_dist` are being ignored and they are not sorted, the function will
#' set them to zero to avoid problems with other software.
#' @param quiet boolean to suppress information about the files
#' @param ... other arguments
#' @return A [gen_tibble]
#' @export

gt_order_loci <- function(.x, use_current_table = FALSE, quiet = FALSE, ...){
gt_order_loci <- function(.x, use_current_table = FALSE, ignore_genetic_dist = TRUE, quiet = FALSE, ...){
if (use_current_table){
new_table <- show_loci(.x)
} else {
new_table <- show_loci(.x) %>% dplyr::arrange(.data$chr_int, .data$position)
show_loci(.x) <- new_table
}
# if asked to use the current table, check that it is ordered
is_loci_table_ordered(.x, error_on_false = TRUE)
gt_update_backingfile(.x, quiet=quiet, ...)
is_loci_table_ordered(.x, error_on_false = TRUE, ignore_genetic_dist = ignore_genetic_dist)
gt_update_backingfile(.x, quiet=quiet, ignore_genetic_dist = ignore_genetic_dist, ...)

}
17 changes: 16 additions & 1 deletion R/gt_update_backingfile.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,16 @@
#' for backing files used to store the data (they will be given a .bk
#' and .RDS automatically). If left to NULL (the default), the file name
#' will be based on the name f the current backing file.
#' @param ignore_genetic_dist boolean to ignore the genetic distance when checking. Note
#' that, if `gentic_dist` are being ignored and they are not sorted, the function will
#' set them to zero to avoid problems with other software.
#' @param chunk_size the number of loci to process at once
#' @param quiet boolean to suppress information about the files
#' @returns a [`gen_tibble`] with a backing file (i.e. a new File Backed Matrix)
#' @export

gt_update_backingfile <- function (.x, backingfile = NULL, chunk_size = NULL,
quiet = FALSE){
ignore_genetic_dist = TRUE, quiet = FALSE){
# if the backingfile is null, create a name based on the current backing file
if (is.null(backingfile)){
backingfile <- change_duplicated_file_name(gt_get_file_names(.x)[2])
Expand Down Expand Up @@ -69,6 +72,18 @@ gt_update_backingfile <- function (.x, backingfile = NULL, chunk_size = NULL,
# same for map
map <- attr(.x$genotypes,"bigsnp")$map
map <- map[.gt_bigsnp_cols(.x),]
# if we ignore genetic distance, set it to zero if it is not sorted
if (ignore_genetic_dist){
if (any(unlist(show_loci(.x) %>%
group_by(.data$chr_int) %>%
group_map(~ is.unsorted(.x$genetic_dist))))){
if (!quiet){
message("Genetic distances are not sorted, setting them to zero")
}
show_loci(.x)$genetic_dist <- 0
}
}


# Create the bigSNP object
bigsnp_obj <- structure(list(genotypes = new_bk_matrix, fam = fam, map = map),
Expand Down
24 changes: 20 additions & 4 deletions R/is_loci_table_ordered.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,26 +8,28 @@
#' or a [`gen_tibble`].
#' @param error_on_false logical, if `TRUE` an error is thrown if the loci
#' are not ordered.
#' @param ignore_genetic_dist logical, if `TRUE` the physical position is not checked.
#' @param ... other arguments passed to specific methods.
#' @returns a logical vector defining which loci are transversions
#' @rdname is_loci_table_ordered
#' @export
is_loci_table_ordered <- function(.x, error_on_false = FALSE, ...) {
is_loci_table_ordered <- function(.x, error_on_false = FALSE, ignore_genetic_dist = TRUE, ...) {
UseMethod("is_loci_table_ordered", .x)
}

#' @export
#' @rdname is_loci_table_ordered
is_loci_table_ordered.tbl_df <- function(.x, error_on_false = FALSE, ...) {
is_loci_table_ordered.tbl_df <- function(.x, error_on_false = FALSE, ignore_genetic_dist = TRUE, ...) {
#TODO this is a hack to deal with the class being dropped when going through group_map
stopifnot_gen_tibble(.x)
is_loci_table_ordered(.x$genotypes, error_on_false, ...)
is_loci_table_ordered(.x$genotypes, error_on_false = error_on_false,
ignore_genetic_dist = ignore_genetic_dist, ...)
}


#' @export
#' @rdname is_loci_table_ordered
is_loci_table_ordered.vctrs_bigSNP <- function(.x, error_on_false = FALSE, ...) {
is_loci_table_ordered.vctrs_bigSNP <- function(.x, error_on_false = FALSE, ignore_genetic_dist = TRUE, ...) {
rlang::check_dots_empty()

# check that within each chromosome positions are sorted
Expand All @@ -49,6 +51,20 @@ is_loci_table_ordered.vctrs_bigSNP <- function(.x, error_on_false = FALSE, ...)
return(FALSE)
}
}

# check genetic distance
if (!ignore_genetic_dist){
if (any(unlist(show_loci(.x) %>%
group_by(.data$chr_int) %>%
group_map(~ is.unsorted(.x$genetic_dist))))){
if (error_on_false){
stop("Your genetic distances are not sorted within chromosomes")
} else {
return(FALSE)
}
}
}

return(TRUE)
}

12 changes: 11 additions & 1 deletion man/gt_order_loci.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 11 additions & 1 deletion man/gt_update_backingfile.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

23 changes: 20 additions & 3 deletions man/is_loci_table_ordered.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit c254875

Please sign in to comment.