diff --git a/.gitignore b/.gitignore index 671b0d8..23a20d4 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ .Rproj.user .DS_Store +my_bed.bed diff --git a/R/bin_splitter.R b/R/bin_splitter.R index 70b18f3..8acd7d0 100644 --- a/R/bin_splitter.R +++ b/R/bin_splitter.R @@ -4,7 +4,7 @@ #' specified size. #' #' @details This function internally calls the purify_regions function to -#' properly format the incoming regions.Thus, thuis function can accept either a +#' properly format the incoming regions.Thus, this function can accept either a #' data frame of regions or individual region coordinates. #' #' @param these_regions The region(s) to be queried. Can be a data frame with @@ -39,7 +39,7 @@ bin_splitter = function(these_regions = NULL, qend = NULL, bin_size = 1000){ - #call helperto format regions + #call helper to format regions my_regions = purify_regions(these_regions = these_regions, qchrom = qchrom, qstart = qstart, diff --git a/README.md b/README.md index 4286b52..a79e8b1 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ -# BioMaesteR -A package with convenience functions for generic genomic analysis within R. -Standardized, generic functions as well as bundled data objects that can be -called in various settings in the scope of biological analysis in R. -This package is specialized to provide bioinformatics infrastructure that can be -used in a variety of analysis. \ No newline at end of file +# +# biomaesteR +This is biomaesteR, an R package with convenience functions commonly requested in various types of genomic analysis within R. This package is specialized to provide bioinformatics infrastructure that can be +used in a variety of genomic analysis. + +This repo is also an ongoing project intended for demonstrating and following best-practices in development of R packages. This includes adequate package documentation (functions, bundled data objects, etc.), reproducible unit tests and vignettes. Upon Pull Requests, this repo also executes a GitAction workflow where the package is installed in various environments and thorough testing of the complete code base. \ No newline at end of file diff --git a/biomaesteR.png b/biomaesteR.png new file mode 100644 index 0000000..e5e2039 Binary files /dev/null and b/biomaesteR.png differ diff --git a/man/bin_splitter.Rd b/man/bin_splitter.Rd index 5cde8f4..a4c9134 100644 --- a/man/bin_splitter.Rd +++ b/man/bin_splitter.Rd @@ -35,7 +35,7 @@ specified size. } \details{ This function internally calls the purify_regions function to -properly format the incoming regions.Thus, thuis function can accept either a +properly format the incoming regions.Thus, this function can accept either a data frame of regions or individual region coordinates. } \examples{ diff --git a/tests/testthat/test-bin_splitter.R b/tests/testthat/test-bin_splitter.R new file mode 100644 index 0000000..f584065 --- /dev/null +++ b/tests/testthat/test-bin_splitter.R @@ -0,0 +1,29 @@ +#load packages +library(testthat) + +test_that("bin_splitter returns correct number of bins", { + result <- bin_splitter(qchrom = c("chr1"), qstart = c(1), qend = c(1001), bin_size = 100) + expect_equal(nrow(result), 10) +}) + +test_that("bin_splitter returns correct bin size", { + result <- bin_splitter(qchrom = c("chr1"), qstart = c(1), qend = c(1000), bin_size = 100) + expect_equal(result$bin_end[1] - result$bin_start[1], 100) +}) + +test_that("bin_splitter handles multiple regions", { + result <- bin_splitter(qchrom = c("chr1", "chr2"), qstart = c(1, 1001), qend = c(1001, 2001), bin_size = 100) + expect_equal(nrow(result), 20) +}) + +test_that("bin_splitter handles regions data frame", { + my_regions <- data.frame(chrom = c("chr1", "chr2"), start = c(1, 1000), end = c(1001, 2001)) + result <- bin_splitter(these_regions = my_regions, bin_size = 100) + expect_equal(nrow(result), 20) +}) + +test_that("bin_splitter returns correct chromosome names", { + result <- bin_splitter(qchrom = c("chr1", "chr2"), qstart = c(1, 1001), qend = c(1000, 2000), bin_size = 100) + expect_equal(unique(result$bin_chr), c("chr1", "chr2")) +}) + diff --git a/tests/testthat/test-gene_ranger.R b/tests/testthat/test-gene_ranger.R new file mode 100644 index 0000000..e617796 --- /dev/null +++ b/tests/testthat/test-gene_ranger.R @@ -0,0 +1,23 @@ +library(testthat) + +test_that("gene_ranger throws error with no genes", { + expect_error(gene_ranger()) +}) + +test_that("gene_ranger throws error with invalid projection", { + expect_error(gene_ranger(these_genes = "MYC", projection = "invalid")) +}) + +test_that("gene_ranger throws error with invalid return format", { + expect_error(gene_ranger(these_genes = "MYC", return_as = "invalid")) +}) + +test_that("gene_ranger throws error when write_to_bed is TRUE but bed_path, track_name, or track_description is not provided", { + expect_error(gene_ranger(these_genes = "MYC", write_to_bed = TRUE), "Provide a path for output bed file") + expect_error(gene_ranger(these_genes = "MYC", write_to_bed = TRUE, bed_path = "my_bed"), "Provide a track name for output bed file") + expect_error(gene_ranger(these_genes = "MYC", write_to_bed = TRUE, bed_path = "my_bed", track_name = "MYC"), "Provide a track description for output bed file") +}) + +test_that("gene_ranger runs successfully with valid input", { + expect_silent(gene_ranger(these_genes = "MYC")) +}) diff --git a/tests/testthat/test-get_gene_info.R b/tests/testthat/test-get_gene_info.R index 5736524..4abcfec 100644 --- a/tests/testthat/test-get_gene_info.R +++ b/tests/testthat/test-get_gene_info.R @@ -1,4 +1,4 @@ -#load pacakges +#load packages library(testthat) diff --git a/tests/testthat/test-purify_chr.R b/tests/testthat/test-purify_chr.R new file mode 100644 index 0000000..31fcb3d --- /dev/null +++ b/tests/testthat/test-purify_chr.R @@ -0,0 +1,25 @@ +library(testthat) + +test_that("purify_chr throws error with no projection", { + expect_error(purify_chr(), "You must provide a valid projection") +}) + +test_that("purify_chr throws error with no incoming_table", { + expect_error(purify_chr(projection = "hg38"), "You must provide a data table with `incoming_table`") +}) + +test_that("purify_chr throws error with invalid projection", { + expect_error(purify_chr(projection = "invalid", incoming_table = data.frame(chrom = c("1", "2", "3"))), "This function supports the following projections") +}) + +test_that("purify_chr adds 'chr' prefix for hg38 projection", { + df = data.frame(chrom = c("1", "2", "3")) + result = purify_chr(projection = "hg38", incoming_table = df) + expect_equal(result$chrom, c("chr1", "chr2", "chr3")) +}) + +test_that("purify_chr removes 'chr' prefix for grch37 projection", { + df = data.frame(chrom = c("chr1", "chr2", "chr3")) + result = purify_chr(projection = "grch37", incoming_table = df) + expect_equal(result$chrom, c("1", "2", "3")) +}) diff --git a/tests/testthat/test-purify_regions.R b/tests/testthat/test-purify_regions.R index e69de29..ccd66e3 100644 --- a/tests/testthat/test-purify_regions.R +++ b/tests/testthat/test-purify_regions.R @@ -0,0 +1,47 @@ +library(testthat) + +test_that("purify_regions handles single region string", { + result <- purify_regions(these_regions = "chr1:100-500") + expect_equal(nrow(result), 1) + expect_equal(result$chrom, "chr1") + expect_equal(result$start, 100) + expect_equal(result$end, 500) +}) + +test_that("purify_regions handles multiple region strings", { + result <- purify_regions(these_regions = c("chr1:100-500", "chr2:100-500")) + expect_equal(nrow(result), 2) + expect_equal(result$chrom, c("chr1", "chr2")) + expect_equal(result$start, c(100, 100)) + expect_equal(result$end, c(500, 500)) +}) + +test_that("purify_regions handles individual region parameters", { + result <- purify_regions(qchrom = "chr1", qstart = 100, qend = 500) + expect_equal(nrow(result), 1) + expect_equal(result$chrom, "chr1") + expect_equal(result$start, 100) + expect_equal(result$end, 500) +}) + +test_that("purify_regions handles multiple individual region parameters", { + result <- purify_regions(qchrom = c("chr1", "chr2"), qstart = c(100, 200), qend = c(500, 600)) + expect_equal(nrow(result), 2) + expect_equal(result$chrom, c("chr1", "chr2")) + expect_equal(result$start, c(100, 200)) + expect_equal(result$end, c(500, 600)) +}) + +test_that("purify_regions handles data frame input", { + my_regions <- data.frame(chrom = c("chr1", "chr2"), start = c(100, 200), end = c(500, 600)) + result <- purify_regions(these_regions = my_regions) + expect_equal(nrow(result), 2) + expect_equal(result$chrom, c("chr1", "chr2")) + expect_equal(result$start, c(100, 200)) + expect_equal(result$end, c(500, 600)) +}) + +test_that("purify_regions throws error with incorrect input", { + expect_error(purify_regions(qchrom = "chr1", qstart = 100)) + expect_error(purify_regions(these_regions = data.frame(chrom = c("chr1", "chr2"), start = c(100, 200), end = c(50, 100)))) +}) diff --git a/tests/testthat/test-region_ranger.R b/tests/testthat/test-region_ranger.R new file mode 100644 index 0000000..8dce8ee --- /dev/null +++ b/tests/testthat/test-region_ranger.R @@ -0,0 +1,23 @@ +library(testthat) + +test_that("region_ranger throws error with no regions", { + expect_error(region_ranger()) +}) + +test_that("region_ranger throws error with invalid projection", { + expect_error(region_ranger(these_regions = "chr8:127735434-127742951", projection = "invalid"), "This function supports the following projections") +}) + +test_that("region_ranger returns correct number of columns when raw is FALSE", { + result = region_ranger(these_regions = "chr8:127735434-127742951") + expect_equal(ncol(result), 11) +}) + +test_that("region_ranger returns all columns when raw is TRUE", { + result = region_ranger(these_regions = "chr8:127735434-127742951", raw = TRUE) + expect_true(ncol(result) > 11) +}) + +test_that("region_ranger returns warning when no genes found", { + expect_warning(region_ranger(these_regions = "chr1:1-10")) +}) diff --git a/tests/testthat/test-sanity_check_regions.R b/tests/testthat/test-sanity_check_regions.R new file mode 100644 index 0000000..9d55699 --- /dev/null +++ b/tests/testthat/test-sanity_check_regions.R @@ -0,0 +1,31 @@ +library(testthat) + +test_that("sanity_check_regions throws error with no regions", { + expect_error(sanity_check_regions(), "No regions provided") +}) + +test_that("sanity_check_regions throws error with invalid projection", { + my_regions <- data.frame(chrom = c("chr1"), start = c(100), end = c(200)) + expect_error(sanity_check_regions(incoming_regions = my_regions, projection = "invalid"), "Invalid projection specified") +}) + +test_that("sanity_check_regions throws error when start is greater than end", { + my_regions <- data.frame(chrom = c("chr1"), start = c(200), end = c(100)) + expect_error(sanity_check_regions(incoming_regions = my_regions, projection = "hg38"), "start is greater than or equal to end") +}) + +test_that("sanity_check_regions throws error when start or end is outside chromosomal range", { + my_regions <- data.frame(chrom = c("chr1"), start = c(100), end = c(300000000)) + expect_error(sanity_check_regions(incoming_regions = my_regions, projection = "hg38"), "Specified start or end coordinates fall outside the actual chromosomal range") +}) + +test_that("sanity_check_regions runs successfully with valid input", { + my_regions <- data.frame(chrom = c("chr1"), start = c(100), end = c(200)) + expect_silent(sanity_check_regions(incoming_regions = my_regions, projection = "hg38")) +}) + +test_that("sanity_check_regions runs successfully with valid input", { + my_regions <- data.frame(chrom = c("1"), start = c(100), end = c(200)) + expect_silent(sanity_check_regions(incoming_regions = my_regions, projection = "grch37")) +}) +