Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Version tests #68

Merged
merged 17 commits into from
Dec 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 19 additions & 37 deletions R/gen_tibble.R
Original file line number Diff line number Diff line change
Expand Up @@ -288,9 +288,6 @@ gen_tibble.matrix <- function(x, indiv_meta, loci, ...,
backingfile <- change_duplicated_file_name(backingfile)
}

# use code for NA in FBM.256
# x[is.na(x)]<-3

bigsnp_obj <- gt_write_bigsnp_from_dfs(genotypes = x,
indiv_meta = indiv_meta,
loci = loci,
Expand Down Expand Up @@ -383,7 +380,6 @@ gt_write_bigsnp_from_dfs <- function(genotypes, indiv_meta, loci,
sex = 0,
affection = 0,
ploidy = ploidy)

map <- tibble(chromosome = loci$chromosome,
marker.ID = loci$name,
genetic.dist = as.double(loci$genetic_dist), ## make sure that genetic.dist is double
Expand Down Expand Up @@ -519,54 +515,38 @@ change_duplicated_file_name <- function(file){
bk <- paste0(file, ".bk")
rds <- paste0(file, ".rds")

if(file.exists(bk) && !file.exists(rds)){

if(file.exists(bk) | file.exists(rds)){
version <- 2

base_name <- basename(file)

version_pattern <- paste0(base_name, "_v(\\d+)\\.bk$")

# read existing files to check for existing versions
existing_files <- list.files(dirname(bk), pattern = paste0("^", base_name, "_v\\d+\\.bk$"))


if (length(existing_files) > 0) {
versions <- sub(version_pattern, "\\1", existing_files)
versions <- as.numeric(versions)
if (!any(is.na(versions))) {
version <- max(versions) + 1
}
# extract the base name and version number
base_name_pattern <- "^(.*)_v(\\d+)$"
# check for any matches
matches <- regmatches(basename(file), regexec(base_name_pattern, basename(file)))

if (length(matches[[1]]) > 0) {
# Extract base name without version and current version number
base_name <- matches[[1]][2] # Part before "_v<number>"
current_version <- as.numeric(matches[[1]][3]) # extract current version number
} else {
base_name <- basename(file) # Use the full name if there's no "_v" suffix
current_version <- 1
}

new_file <- paste0(file,"_v",version)

return(new_file)
} else if (file.exists(bk) && file.exists(rds)){

version <- 2

base_name <- basename(file)

version_pattern <- paste0(base_name, "_v(\\d+)\\.bk$")

# read existing files to check for existing versions
# read files to check for existing versions
existing_files <- list.files(dirname(bk), pattern = paste0("^", base_name, "_v\\d+\\.bk$"))


if (length(existing_files) > 0) {
versions <- sub(version_pattern, "\\1", existing_files)
versions <- as.numeric(versions)
if (!any(is.na(versions))) {
version <- max(versions) + 1
version <- max(versions) + 1 # add 1 to the version number
}
}

new_file <- paste0(file,"_v",version)
# create new file path
new_file <- paste0(dirname(file), "/", base_name, "_v", version)

return(new_file)
}

return(file)

}
Expand All @@ -579,6 +559,8 @@ cast_chromosome_to_int <- function(chromosome){
}
# if chromosome is a character, then cast it to integer
if (is.character(chromosome)){
# attempt to strip chr from the chromosome
chromosome <- gsub("^(chromosome_|chr_|chromosome|chr)", "", chromosome, ignore.case = TRUE)
chromosome <- tryCatch(as.integer(chromosome), warning = function(e) {as.integer(as.factor(chromosome))})
}
if (is.numeric(chromosome)){
Expand Down
9 changes: 6 additions & 3 deletions R/gt_order_loci.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,20 +11,23 @@
#' reordered; if TRUE, then the current loci table, which might have been reordered
#' manually, will be used, but only if the positions within each chromosome are
#' sequential
#' @param ignore_genetic_dist boolean to ignore the genetic distance when checking. Note
#' that, if `gentic_dist` are being ignored and they are not sorted, the function will
#' set them to zero to avoid problems with other software.
#' @param quiet boolean to suppress information about the files
#' @param ... other arguments
#' @return A [gen_tibble]
#' @export

gt_order_loci <- function(.x, use_current_table = FALSE, quiet = FALSE, ...){
gt_order_loci <- function(.x, use_current_table = FALSE, ignore_genetic_dist = TRUE, quiet = FALSE, ...){
if (use_current_table){
new_table <- show_loci(.x)
} else {
new_table <- show_loci(.x) %>% dplyr::arrange(.data$chr_int, .data$position)
show_loci(.x) <- new_table
}
# if asked to use the current table, check that it is ordered
is_loci_table_ordered(.x, error_on_false = TRUE)
gt_update_backingfile(.x, quiet=quiet, ...)
is_loci_table_ordered(.x, error_on_false = TRUE, ignore_genetic_dist = ignore_genetic_dist)
gt_update_backingfile(.x, quiet=quiet, ignore_genetic_dist = ignore_genetic_dist, ...)

}
17 changes: 16 additions & 1 deletion R/gt_update_backingfile.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,16 @@
#' for backing files used to store the data (they will be given a .bk
#' and .RDS automatically). If left to NULL (the default), the file name
#' will be based on the name f the current backing file.
#' @param ignore_genetic_dist boolean to ignore the genetic distance when checking. Note
#' that, if `gentic_dist` are being ignored and they are not sorted, the function will
#' set them to zero to avoid problems with other software.
#' @param chunk_size the number of loci to process at once
#' @param quiet boolean to suppress information about the files
#' @returns a [`gen_tibble`] with a backing file (i.e. a new File Backed Matrix)
#' @export

gt_update_backingfile <- function (.x, backingfile = NULL, chunk_size = NULL,
quiet = FALSE){
ignore_genetic_dist = TRUE, quiet = FALSE){
# if the backingfile is null, create a name based on the current backing file
if (is.null(backingfile)){
backingfile <- change_duplicated_file_name(gt_get_file_names(.x)[2])
Expand Down Expand Up @@ -69,6 +72,18 @@
# same for map
map <- attr(.x$genotypes,"bigsnp")$map
map <- map[.gt_bigsnp_cols(.x),]
# if we ignore genetic distance, set it to zero if it is not sorted
if (ignore_genetic_dist){
if (any(unlist(show_loci(.x) %>%
group_by(.data$chr_int) %>%
group_map(~ is.unsorted(.x$genetic_dist))))){
if (!quiet){
message("Genetic distances are not sorted, setting them to zero")

Check warning on line 81 in R/gt_update_backingfile.R

View check run for this annotation

Codecov / codecov/patch

R/gt_update_backingfile.R#L81

Added line #L81 was not covered by tests
}
show_loci(.x)$genetic_dist <- 0
}
}


# Create the bigSNP object
bigsnp_obj <- structure(list(genotypes = new_bk_matrix, fam = fam, map = map),
Expand Down
24 changes: 20 additions & 4 deletions R/is_loci_table_ordered.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,26 +8,28 @@
#' or a [`gen_tibble`].
#' @param error_on_false logical, if `TRUE` an error is thrown if the loci
#' are not ordered.
#' @param ignore_genetic_dist logical, if `TRUE` the physical position is not checked.
#' @param ... other arguments passed to specific methods.
#' @returns a logical vector defining which loci are transversions
#' @rdname is_loci_table_ordered
#' @export
is_loci_table_ordered <- function(.x, error_on_false = FALSE, ...) {
is_loci_table_ordered <- function(.x, error_on_false = FALSE, ignore_genetic_dist = TRUE, ...) {
UseMethod("is_loci_table_ordered", .x)
}

#' @export
#' @rdname is_loci_table_ordered
is_loci_table_ordered.tbl_df <- function(.x, error_on_false = FALSE, ...) {
is_loci_table_ordered.tbl_df <- function(.x, error_on_false = FALSE, ignore_genetic_dist = TRUE, ...) {
#TODO this is a hack to deal with the class being dropped when going through group_map
stopifnot_gen_tibble(.x)
is_loci_table_ordered(.x$genotypes, error_on_false, ...)
is_loci_table_ordered(.x$genotypes, error_on_false = error_on_false,
ignore_genetic_dist = ignore_genetic_dist, ...)
}


#' @export
#' @rdname is_loci_table_ordered
is_loci_table_ordered.vctrs_bigSNP <- function(.x, error_on_false = FALSE, ...) {
is_loci_table_ordered.vctrs_bigSNP <- function(.x, error_on_false = FALSE, ignore_genetic_dist = TRUE, ...) {
rlang::check_dots_empty()

# check that within each chromosome positions are sorted
Expand All @@ -49,6 +51,20 @@
return(FALSE)
}
}

# check genetic distance
if (!ignore_genetic_dist){
if (any(unlist(show_loci(.x) %>%
group_by(.data$chr_int) %>%
group_map(~ is.unsorted(.x$genetic_dist))))){
if (error_on_false){
stop("Your genetic distances are not sorted within chromosomes")

Check warning on line 61 in R/is_loci_table_ordered.R

View check run for this annotation

Codecov / codecov/patch

R/is_loci_table_ordered.R#L61

Added line #L61 was not covered by tests
} else {
return(FALSE)
}
}
}

return(TRUE)
}

12 changes: 11 additions & 1 deletion man/gt_order_loci.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 11 additions & 1 deletion man/gt_update_backingfile.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

23 changes: 20 additions & 3 deletions man/is_loci_table_ordered.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading