diff --git a/R/gen_tibble.R b/R/gen_tibble.R index 38eec8e7..85402def 100644 --- a/R/gen_tibble.R +++ b/R/gen_tibble.R @@ -116,6 +116,8 @@ gen_tibble_bed_rds <- function(x, ..., indiv_meta <- list(id = bigsnp_obj$fam$sample.ID, population = bigsnp_obj$fam$family.ID) + # TODO check if other elements are informative; if they are, bring them over + indiv_meta$genotypes <- new_vctrs_bigsnp(bigsnp_obj, bigsnp_file = bigsnp_path, diff --git a/R/gen_tibble_ped.R b/R/gen_tibble_ped.R index 3401a625..5b6a1ecf 100644 --- a/R/gen_tibble_ped.R +++ b/R/gen_tibble_ped.R @@ -40,9 +40,9 @@ read.pedfile <- function(file, n, snps, which, split="\t| +", sep=".", # r3 <- as.raw(3) r0 <- 3 - r1 <- 2 + r1 <- 0 r2 <- 1 - r3 <- 0 + r3 <- 2 ## Input file con <- gzfile(file) diff --git a/R/gt_as_plink.R b/R/gt_as_plink.R index d124eff9..709eef0b 100644 --- a/R/gt_as_plink.R +++ b/R/gt_as_plink.R @@ -32,7 +32,7 @@ gt_as_plink <- function(x, file = NULL, type = c("bed","ped","raw"), } else if (type=="ped"){ all_files <- c(file, gsub(".ped",".fam",file)) - } else if (type=="map"){ + } else if (type=="raw"){ all_files <- file } diff --git a/inst/extdata/pop_a.bk b/inst/extdata/pop_a.bk new file mode 100644 index 00000000..57b68c9f Binary files /dev/null and b/inst/extdata/pop_a.bk differ diff --git a/inst/extdata/pop_a_ped_plink.ped b/inst/extdata/pop_a.ped similarity index 100% rename from inst/extdata/pop_a_ped_plink.ped rename to inst/extdata/pop_a.ped diff --git a/inst/extdata/pop_a.rds b/inst/extdata/pop_a.rds new file mode 100644 index 00000000..2710662d Binary files /dev/null and b/inst/extdata/pop_a.rds differ diff --git a/inst/extdata/pop_a_ped_plink.log b/inst/extdata/pop_a_ped_plink.log deleted file mode 100644 index 8195b484..00000000 --- a/inst/extdata/pop_a_ped_plink.log +++ /dev/null @@ -1,24 +0,0 @@ -PLINK v1.90b7.2 64-bit (11 Dec 2023) -Options in effect: - --bfile C:/Users/eviec/Git/tidypopgen/inst/extdata/pop_a - --out C:/Users/eviec/Git/tidypopgen/inst/extdata/pop_a_ped_plink - --recode - -Hostname: LAPTOP-S7R1KB1T -Working directory: C:\Users\eviec\OneDrive\Desktop\PhD\tidypopgen\ped_check -Start time: Fri May 10 06:29:41 2024 - -Random number seed: 1715318981 -8080 MB RAM detected; reserving 4040 MB for main workspace. -16 variants loaded from .bim file. -5 people (5 males, 0 females) loaded from .fam. -Using 1 thread (no multithreaded calculations invoked). -Before main variant filters, 5 founders and 0 nonfounders present. -Calculating allele frequencies... done. -Total genotyping rate is exactly 1. -16 variants and 5 people pass filters and QC. -Note: No phenotypes present. ---recode ped to C:/Users/eviec/Git/tidypopgen/inst/extdata/pop_a_ped_plink.ped -+ C:/Users/eviec/Git/tidypopgen/inst/extdata/pop_a_ped_plink.map ... done. - -End time: Fri May 10 06:29:41 2024 diff --git a/inst/extdata/pop_a_ped_plink.map b/inst/extdata/pop_a_ped_plink.map deleted file mode 100644 index 5d4cd2d3..00000000 --- a/inst/extdata/pop_a_ped_plink.map +++ /dev/null @@ -1,16 +0,0 @@ -1 rs4477212 0 82154 -1 rs3094315 0 752566 -1 rs3131972 0 752721 -1 rs12124819 0 776546 -1 rs11240777 0 798959 -1 rs1110052 0 873558 -1 rs9697457 0 934345 -1 rs6657048 0 957640 -1 rs2488991 0 994391 -1 rs307354 0 1264539 -1 rs1240719 0 1346905 -2 rs2862633 0 61974443 -2 rs28569024 0 139008811 -2 rs10106770 0 235832763 -3 rs11942835 0 155913651 -23 rs5945676 0 51433071 diff --git a/inst/extdata/pop_a_ped_tidypopgen.map b/inst/extdata/pop_a_ped_tidypopgen.map deleted file mode 100644 index dbe5526e..00000000 --- a/inst/extdata/pop_a_ped_tidypopgen.map +++ /dev/null @@ -1,16 +0,0 @@ -1 rs4477212 0 82154 -1 rs3094315 0 752566 -1 rs3131972 0 752721 -1 rs12124819 0 776546 -1 rs11240777 0 798959 -1 rs1110052 0 873558 -1 rs9697457 0 934345 -1 rs6657048 0 957640 -1 rs2488991 0 994391 -1 rs307354 0 1264539 -1 rs1240719 0 1346905 -2 rs2862633 0 61974443 -2 rs28569024 0 139008811 -2 rs10106770 0 235832763 -3 rs11942835 0 155913651 -23 rs5945676 0 51433071 diff --git a/inst/extdata/pop_a_ped_tidypopgen.ped b/inst/extdata/pop_a_ped_tidypopgen.ped deleted file mode 100644 index 54d6586e..00000000 --- a/inst/extdata/pop_a_ped_tidypopgen.ped +++ /dev/null @@ -1,5 +0,0 @@ -pop_a GRC14300079 0 0 0 -9 A A A A G G A A G G G T G G C C T T C C T T G G T T G A T T T T -pop_a GRC14300142 0 0 0 -9 A A A A G G A A G G G G G G C C T T C G T A G G T T G G T C T T -pop_a GRC14300159 0 0 0 -9 A A A G G A A A A A G T G G C C T T C C T T G G T T G A T T T T -pop_a GRC14300286 0 0 0 -9 A A A A G G A A G G G G G G C C T T C G T T G G T T G A T C T T -pop_a GRC14300305 0 0 0 -9 A A A A G A A A G A T T G G C C T T C C T T G G T T G A T T T T diff --git a/src/snp_as.cpp b/src/snp_as.cpp index 18d7c861..2ae71a7e 100644 --- a/src/snp_as.cpp +++ b/src/snp_as.cpp @@ -41,8 +41,8 @@ inline arma::mat FBM_RW2arma(Rcpp::Environment BM) { size_t n = macc.nrow(); size_t m = macc.ncol(); - for (int j = 0; j < m; j++) { - for (int i = 0; i < n; i++) { + for (unsigned int j = 0; j < m; j++) { + for (unsigned int i = 0; i < n; i++) { int value = (macc(i,j)); if (value <3){ dos_mat(i, j) = value-1; // note that we want the matrix of (dos-1) @@ -53,10 +53,10 @@ inline arma::mat FBM_RW2arma(Rcpp::Environment BM) { }} // fill the rest with 0s (should be 1 column max) - int m2 = dos_mat.n_cols; + size_t m2 = dos_mat.n_cols; if (m2 > m) { myassert(m2 == (m + 1), ERROR_BUG); - for (int i = 0; i < n; i++) { + for (unsigned int i = 0; i < n; i++) { dos_mat(i, m) = 1; na_mat(i, m) = 0; } diff --git a/src/snp_ibs.cpp b/src/snp_ibs.cpp index 606fca43..d741a2ce 100644 --- a/src/snp_ibs.cpp +++ b/src/snp_ibs.cpp @@ -42,8 +42,8 @@ inline arma::mat FBM_RW2arma(Rcpp::Environment BM) { size_t n = macc.nrow(); size_t m = macc.ncol(); - for (int j = 0; j < m; j++) { - for (int i = 0; i < n; i++) { + for (unsigned int j = 0; j < m; j++) { + for (unsigned int i = 0; i < n; i++) { int value = (macc(i,j)); if (value == 0){ genotype0(i, j) = 1; @@ -55,10 +55,10 @@ inline arma::mat FBM_RW2arma(Rcpp::Environment BM) { }} // fill the rest with 0s (should be 1 column max) - int m2 = genotype0.n_cols; + size_t m2 = genotype0.n_cols; if (m2 > m) { myassert(m2 == (m + 1), ERROR_BUG); - for (int i = 0; i < n; i++) { + for (unsigned int i = 0; i < n; i++) { genotype0(i, m) = 0; genotype1(i, m) = 0; genotype2(i, m) = 0; diff --git a/src/snp_king.cpp b/src/snp_king.cpp index 1d51418a..2b8d542f 100644 --- a/src/snp_king.cpp +++ b/src/snp_king.cpp @@ -42,8 +42,8 @@ inline arma::mat FBM_RW2arma(Rcpp::Environment BM) { size_t n = macc.nrow(); size_t m = macc.ncol(); - for (int j = 0; j < m; j++) { - for (int i = 0; i < n; i++) { + for (unsigned int j = 0; j < m; j++) { + for (unsigned int i = 0; i < n; i++) { int value = (macc(i,j)); if (value == 0){ genotype0(i, j) = 1; @@ -58,10 +58,10 @@ inline arma::mat FBM_RW2arma(Rcpp::Environment BM) { }} // fill the rest with 0s (should be 1 column max) - int m2 = genotype0.n_cols; + size_t m2 = genotype0.n_cols; if (m2 > m) { myassert(m2 == (m + 1), ERROR_BUG); - for (int i = 0; i < n; i++) { + for (unsigned int i = 0; i < n; i++) { genotype0(i, m) = 0; genotype1(i, m) = 0; genotype2(i, m) = 0; diff --git a/tests/testthat/test_gen_tibble.R b/tests/testthat/test_gen_tibble.R index 2cf81aa5..52d69aa7 100644 --- a/tests/testthat/test_gen_tibble.R +++ b/tests/testthat/test_gen_tibble.R @@ -14,7 +14,7 @@ test_loci <- data.frame(name=paste0("rs",1:6), test_gt <- gen_tibble(x = test_genotypes, loci = test_loci, indiv_meta = test_indiv_meta, quiet = TRUE) # this also tests show_genotypes and show_loci -test_that("create gen_tibble from bed",{ +test_that("create gen_tibble from dfs",{ expect_true(inherits(test_gt,"gen_tbl")) # we can extract the genotypes correctly extracted_genotypes <- test_gt %>% show_genotypes() @@ -32,17 +32,6 @@ test_that("create gen_tibble from bed",{ }) -# now create it directly from the dfs -test_that("create gen_tibble from dfs",{ - test_dfs_gt <- gen_tibble(test_genotypes, indiv_meta = test_indiv_meta, - loci = test_loci, quiet = TRUE) - # because of the different backing file info, we cannot use identical on the whole object - expect_true(identical(show_genotypes(test_gt), show_genotypes(test_dfs_gt))) - expect_true(identical(show_loci(test_gt), show_loci(test_dfs_gt))) - expect_true(identical(test_gt %>% select(-genotypes), - test_dfs_gt %>% select(-genotypes))) -}) - test_genotypes_c <- rbind(c("1","1","0","1","1","0"), c("2","1","0","0","0","0"), c("2","2","0","0","1","1")) @@ -81,4 +70,17 @@ test_that("gen_tibble catches invalid alleles",{ }) +test_that("gen_tibble from a bed file",{ + bed_path <- system.file("extdata/pop_a.bed", package = "tidypopgen") + pop_a_gt <- gen_tibble(bed_path, quiet=TRUE) + # now read the dosages created by plink when saving in raw format + raw_file_pop_a <- read.table(system.file("extdata/pop_a.raw", package = "tidypopgen"), header= TRUE) + mat <- as.matrix(raw_file_pop_a[,7:ncol(raw_file_pop_a)]) + mat <- unname(mat) + expect_true(all.equal(mat,show_genotypes(pop_a_gt))) + # now read in the ped file + ped_path <- system.file("extdata/pop_a.ped", package = "tidypopgen") + pop_a_ped_gt <- gen_tibble(ped_path, quiet=TRUE,backingfile = tempfile()) + +}) diff --git a/tests/testthat/test_gt_as_plink.R b/tests/testthat/test_gt_as_plink.R index 8e4bcf91..6baf6df7 100644 --- a/tests/testthat/test_gt_as_plink.R +++ b/tests/testthat/test_gt_as_plink.R @@ -29,6 +29,7 @@ test_that("write a bed file",{ #check gt_as_plink converts the NA missing allele to 0 expect_true(is.na(show_loci(test_gt2)$allele_alt[3])) + # now write it as a ped ped_path <- gt_as_plink(test_gt, file = paste0(tempfile(),".ped"), type = "ped") test_gt3 <- gen_tibble(ped_path, quiet=TRUE) @@ -37,5 +38,11 @@ test_that("write a bed file",{ expect_true(all.equal(show_loci(test_gt3),show_loci(test_gt2))) expect_true(all.equal(show_genotypes(test_gt3),show_genotypes(test_gt2))) + # write it as raw + raw_path <- gt_as_plink(pop_a_gt,file=tempfile(),type="raw") + # check why sex has been lost (it was not read in properly?!?) + # look at line 119 of gen_tibble + #tools::md5sum(raw_path)==tools::md5sum(system.file("extdata","pop_a.raw",package="tidypopgen")) + })