Skip to content

Commit

Permalink
various ped fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
dramanica committed May 10, 2024
1 parent fd11bf9 commit 02cc057
Show file tree
Hide file tree
Showing 15 changed files with 38 additions and 88 deletions.
2 changes: 2 additions & 0 deletions R/gen_tibble.R
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,8 @@ gen_tibble_bed_rds <- function(x, ...,

indiv_meta <- list(id = bigsnp_obj$fam$sample.ID,
population = bigsnp_obj$fam$family.ID)
# TODO check if other elements are informative; if they are, bring them over


indiv_meta$genotypes <- new_vctrs_bigsnp(bigsnp_obj,
bigsnp_file = bigsnp_path,
Expand Down
4 changes: 2 additions & 2 deletions R/gen_tibble_ped.R
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,9 @@ read.pedfile <- function(file, n, snps, which, split="\t| +", sep=".",
# r3 <- as.raw(3)

r0 <- 3
r1 <- 2
r1 <- 0
r2 <- 1
r3 <- 0
r3 <- 2

## Input file
con <- gzfile(file)
Expand Down
2 changes: 1 addition & 1 deletion R/gt_as_plink.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ gt_as_plink <- function(x, file = NULL, type = c("bed","ped","raw"),
} else if (type=="ped"){
all_files <- c(file,
gsub(".ped",".fam",file))
} else if (type=="map"){
} else if (type=="raw"){
all_files <- file
}

Expand Down
Binary file added inst/extdata/pop_a.bk
Binary file not shown.
File renamed without changes.
Binary file added inst/extdata/pop_a.rds
Binary file not shown.
24 changes: 0 additions & 24 deletions inst/extdata/pop_a_ped_plink.log

This file was deleted.

16 changes: 0 additions & 16 deletions inst/extdata/pop_a_ped_plink.map

This file was deleted.

16 changes: 0 additions & 16 deletions inst/extdata/pop_a_ped_tidypopgen.map

This file was deleted.

5 changes: 0 additions & 5 deletions inst/extdata/pop_a_ped_tidypopgen.ped

This file was deleted.

8 changes: 4 additions & 4 deletions src/snp_as.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ inline arma::mat FBM_RW2arma(Rcpp::Environment BM) {
size_t n = macc.nrow();
size_t m = macc.ncol();

for (int j = 0; j < m; j++) {
for (int i = 0; i < n; i++) {
for (unsigned int j = 0; j < m; j++) {
for (unsigned int i = 0; i < n; i++) {
int value = (macc(i,j));
if (value <3){
dos_mat(i, j) = value-1; // note that we want the matrix of (dos-1)
Expand All @@ -53,10 +53,10 @@ inline arma::mat FBM_RW2arma(Rcpp::Environment BM) {
}}

// fill the rest with 0s (should be 1 column max)
int m2 = dos_mat.n_cols;
size_t m2 = dos_mat.n_cols;
if (m2 > m) {
myassert(m2 == (m + 1), ERROR_BUG);
for (int i = 0; i < n; i++) {
for (unsigned int i = 0; i < n; i++) {
dos_mat(i, m) = 1;
na_mat(i, m) = 0;
}
Expand Down
8 changes: 4 additions & 4 deletions src/snp_ibs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ inline arma::mat FBM_RW2arma(Rcpp::Environment BM) {
size_t n = macc.nrow();
size_t m = macc.ncol();

for (int j = 0; j < m; j++) {
for (int i = 0; i < n; i++) {
for (unsigned int j = 0; j < m; j++) {
for (unsigned int i = 0; i < n; i++) {
int value = (macc(i,j));
if (value == 0){
genotype0(i, j) = 1;
Expand All @@ -55,10 +55,10 @@ inline arma::mat FBM_RW2arma(Rcpp::Environment BM) {
}}

// fill the rest with 0s (should be 1 column max)
int m2 = genotype0.n_cols;
size_t m2 = genotype0.n_cols;
if (m2 > m) {
myassert(m2 == (m + 1), ERROR_BUG);
for (int i = 0; i < n; i++) {
for (unsigned int i = 0; i < n; i++) {
genotype0(i, m) = 0;
genotype1(i, m) = 0;
genotype2(i, m) = 0;
Expand Down
8 changes: 4 additions & 4 deletions src/snp_king.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ inline arma::mat FBM_RW2arma(Rcpp::Environment BM) {
size_t n = macc.nrow();
size_t m = macc.ncol();

for (int j = 0; j < m; j++) {
for (int i = 0; i < n; i++) {
for (unsigned int j = 0; j < m; j++) {
for (unsigned int i = 0; i < n; i++) {
int value = (macc(i,j));
if (value == 0){
genotype0(i, j) = 1;
Expand All @@ -58,10 +58,10 @@ inline arma::mat FBM_RW2arma(Rcpp::Environment BM) {
}}

// fill the rest with 0s (should be 1 column max)
int m2 = genotype0.n_cols;
size_t m2 = genotype0.n_cols;
if (m2 > m) {
myassert(m2 == (m + 1), ERROR_BUG);
for (int i = 0; i < n; i++) {
for (unsigned int i = 0; i < n; i++) {
genotype0(i, m) = 0;
genotype1(i, m) = 0;
genotype2(i, m) = 0;
Expand Down
26 changes: 14 additions & 12 deletions tests/testthat/test_gen_tibble.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ test_loci <- data.frame(name=paste0("rs",1:6),
test_gt <- gen_tibble(x = test_genotypes, loci = test_loci, indiv_meta = test_indiv_meta, quiet = TRUE)

# this also tests show_genotypes and show_loci
test_that("create gen_tibble from bed",{
test_that("create gen_tibble from dfs",{
expect_true(inherits(test_gt,"gen_tbl"))
# we can extract the genotypes correctly
extracted_genotypes <- test_gt %>% show_genotypes()
Expand All @@ -32,17 +32,6 @@ test_that("create gen_tibble from bed",{

})

# now create it directly from the dfs
test_that("create gen_tibble from dfs",{
test_dfs_gt <- gen_tibble(test_genotypes, indiv_meta = test_indiv_meta,
loci = test_loci, quiet = TRUE)
# because of the different backing file info, we cannot use identical on the whole object
expect_true(identical(show_genotypes(test_gt), show_genotypes(test_dfs_gt)))
expect_true(identical(show_loci(test_gt), show_loci(test_dfs_gt)))
expect_true(identical(test_gt %>% select(-genotypes),
test_dfs_gt %>% select(-genotypes)))
})

test_genotypes_c <- rbind(c("1","1","0","1","1","0"),
c("2","1","0","0","0","0"),
c("2","2","0","0","1","1"))
Expand Down Expand Up @@ -81,4 +70,17 @@ test_that("gen_tibble catches invalid alleles",{

})

test_that("gen_tibble from a bed file",{
bed_path <- system.file("extdata/pop_a.bed", package = "tidypopgen")
pop_a_gt <- gen_tibble(bed_path, quiet=TRUE)
# now read the dosages created by plink when saving in raw format
raw_file_pop_a <- read.table(system.file("extdata/pop_a.raw", package = "tidypopgen"), header= TRUE)
mat <- as.matrix(raw_file_pop_a[,7:ncol(raw_file_pop_a)])
mat <- unname(mat)
expect_true(all.equal(mat,show_genotypes(pop_a_gt)))
# now read in the ped file
ped_path <- system.file("extdata/pop_a.ped", package = "tidypopgen")
pop_a_ped_gt <- gen_tibble(ped_path, quiet=TRUE,backingfile = tempfile())


})
7 changes: 7 additions & 0 deletions tests/testthat/test_gt_as_plink.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ test_that("write a bed file",{
#check gt_as_plink converts the NA missing allele to 0
expect_true(is.na(show_loci(test_gt2)$allele_alt[3]))


# now write it as a ped
ped_path <- gt_as_plink(test_gt, file = paste0(tempfile(),".ped"), type = "ped")
test_gt3 <- gen_tibble(ped_path, quiet=TRUE)
Expand All @@ -37,5 +38,11 @@ test_that("write a bed file",{
expect_true(all.equal(show_loci(test_gt3),show_loci(test_gt2)))
expect_true(all.equal(show_genotypes(test_gt3),show_genotypes(test_gt2)))

# write it as raw
raw_path <- gt_as_plink(pop_a_gt,file=tempfile(),type="raw")
# check why sex has been lost (it was not read in properly?!?)
# look at line 119 of gen_tibble
#tools::md5sum(raw_path)==tools::md5sum(system.file("extdata","pop_a.raw",package="tidypopgen"))

})

0 comments on commit 02cc057

Please sign in to comment.