Skip to content

Commit

Permalink
Merge pull request #10 from mattssca/test_branch
Browse files Browse the repository at this point in the history
Updates to purify_regions to take chromosomeal ranges into considerat…
  • Loading branch information
mattssca authored Jan 10, 2024
2 parents 7e6ce43 + 263ba46 commit 8f7715c
Show file tree
Hide file tree
Showing 5 changed files with 112 additions and 8 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ export(get_gene_info)
export(purify_chr)
export(purify_regions)
export(region_ranger)
export(sanity_check_regions)
import(data.table, except = c("last", "first", "between", "transpose"))
import(dplyr)
import(stats, except = c("lag", "filter"))
Expand Down
26 changes: 19 additions & 7 deletions R/purify_regions.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,10 @@
#' `qchrom` (string), `qstart` (string, or integer), and `qend` (string or integer).
#' These parameters can also accept a vector of characters for multiple regions.
#' The function also handles chromosome prefixes in the returned object,
#' based on the selected `projection`.
#' based on the selected `projection`. In addition, this function also checks if
#' the provided start coordinate is equal or greater to the end coordinate for
#' the same chromosome. It also ensures that specified ranges are within the
#' actual chromosomal range.
#'
#' @param these_regions The region(s) to be queried. Can be a data frame with
#' regions with the following columns; chrom, start, end.
Expand Down Expand Up @@ -61,9 +64,6 @@ purify_regions <- function(these_regions = NULL,
`these_regions` or a combination of `qchrom`, `qstart`, and `qend`")
}

#TODO: Checks if the specified region is outside the actual chromosomal ranges.
#TODO: Check if end is greater than start.

#wrangle the regions provided
if(!is.null(these_regions)){
if(is.data.frame(these_regions)){
Expand Down Expand Up @@ -102,15 +102,27 @@ purify_regions <- function(these_regions = NULL,
or individually specify the chromosome, start and end positions with;
`qchrom`, `qstart`, and `qend`")
}


#enforce data types
region_table = region_table %>%
dplyr::mutate(chrom = as.character(chrom),
start = as.integer(start),
end = as.integer(end))

#run helper function to deal with prefixes
region_table = purify_chr(projection = projection,
incoming_table = region_table)
region_table = BioMaesteR::purify_chr(projection = projection,
incoming_table = region_table)

#check if regions make sense
BioMaesteR::sanity_check_regions(incoming_regions = region_table,
projection = projection)

#enforce data types
region_table$chrom = as.character(region_table$chrom)
region_table$start = as.integer(region_table$start)
region_table$end = as.integer(region_table$end)

region_table = as.data.table(region_table)

return(region_table)
}
Expand Down
57 changes: 57 additions & 0 deletions R/sanity_check_regions.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#' @title Sanity Check Regions.
#'
#' @description Helper function ensuring the provided regions are valid.
#'
#' @details This function checks if the provided start coordinate is equal or greater
#' to the end coordinate for the same chromosome. It also ensures that specified ranges
#' are within the actual chromosomal range.
#'
#' @param incoming_regions A data frame with regions with the following columns;
#' chrom, start, end.
#' @param projection The projection.
#'
#' @return Nothing.
#'
#' @import dplyr
#'
#' @export
#'
#' @examples
#' #Example 1 - Give the function one region as a string
#' my_regions = data.frame(chrom = c("chr1", "chr2"), start = c(100, 100), end = c(200, 200))
#' sanity_check_regions(incoming_regions = my_regions, projection = "hg38")
#'
sanity_check_regions <- function(incoming_regions = NULL,
projection = NULL){

if(is.null(incoming_regions)){
stop("No regions provided")
}

if(projection == "hg38"){
chromosome_ranges = chromosome_arms_hg38 %>%
dplyr::filter(arm == "q")
}else if(projection == "grch37"){
chromosome_ranges = chromosome_arms_grch37 %>%
dplyr::filter(arm == "q")
}else{
stop("Invalid projection specified")
}

#check if start is greater than end
for (i in 1:nrow(incoming_regions)){
if (incoming_regions$start[i] >= incoming_regions$end[i]) {
stop(paste("Row", i, "does not meet the condition: start is greater than or equal to end"))
}
}

#check if start or end is outside the actual chromosomal range
result <- incoming_regions %>%
inner_join(chromosome_ranges, by = "chrom") %>%
filter(start.x > end.y | end.x > end.y)

if(nrow(result) > 0){
stop("Specified start or end coordinates fall outside the actual chromosomal range")
}
return()
}
5 changes: 4 additions & 1 deletion man/purify_regions.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

31 changes: 31 additions & 0 deletions man/sanity_check_regions.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 8f7715c

Please sign in to comment.