diff --git a/.Rbuildignore b/.Rbuildignore index 121042f3..2b74d10e 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -5,3 +5,4 @@ ^\.github$ pkgdown/ _pkgdown.yml +admixture/ diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index fc57a73c..c239cda5 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -2,7 +2,7 @@ # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help on: push: - branches: [main] + branches: [main, run_admixture] pull_request: branches: [main] @@ -46,6 +46,17 @@ jobs: extra-packages: any::rcmdcheck needs: check + - name: Get ADMIXTURE for linux + if: matrix.config.os == 'ubuntu-latest' + run: | + mkdir admixture + cd admixture + wget https://dalexander.github.io/admixture/binaries/admixture_linux-1.3.0.tar.gz + tar -xvzf admixture_linux-1.3.0.tar.gz + cd .. + echo "$PWD/admixture/dist/admixture_linux-1.3.0" >> "$GITHUB_PATH" + + - uses: r-lib/actions/check-r-package@v2 with: upload-snapshots: true diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml index cadb5b92..126befc9 100644 --- a/.github/workflows/test-coverage.yaml +++ b/.github/workflows/test-coverage.yaml @@ -29,6 +29,16 @@ jobs: extra-packages: any::covr, any::xml2 needs: coverage + - name: Get ADMIXTURE for linux + run: | + mkdir admixture + cd admixture + wget https://dalexander.github.io/admixture/binaries/admixture_linux-1.3.0.tar.gz + tar -xvzf admixture_linux-1.3.0.tar.gz + cd .. + echo "$PWD/admixture/dist/admixture_linux-1.3.0" >> "$GITHUB_PATH" + + - name: Test coverage run: | cov <- covr::package_coverage( diff --git a/DESCRIPTION b/DESCRIPTION index 9978d66b..b9e73464 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -50,6 +50,7 @@ Suggests: LEA, rmarkdown, readr, + reticulate, testthat (>= 3.0.0), vcfR Remotes: diff --git a/NAMESPACE b/NAMESPACE index a0fa07d7..5d782834 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -8,6 +8,7 @@ S3method(augment,gt_dapc) S3method(augment,gt_pca) S3method(augment,q_matrix) S3method(augment_loci,gt_pca) +S3method(autoplot,gt_admix) S3method(autoplot,gt_cluster_pca) S3method(autoplot,gt_dapc) S3method(autoplot,gt_pca) @@ -15,6 +16,7 @@ S3method(autoplot,gt_pcadapt) S3method(autoplot,q_matrix) S3method(autoplot,qc_report_indiv) S3method(autoplot,qc_report_loci) +S3method(c,gt_admix) S3method(count_loci,tbl_df) S3method(count_loci,vctrs_bigSNP) S3method(dplyr_col_modify,grouped_gen_tbl) @@ -72,7 +74,7 @@ S3method(show_loci,tbl_df) S3method(show_loci,vctrs_bigSNP) S3method(show_ploidy,tbl_df) S3method(show_ploidy,vctrs_bigSNP) -S3method(summary,q_matrix_list) +S3method(summary,gt_admix) S3method(summary,rbind_report) S3method(summary,vctrs_bigSNP) S3method(tbl_sum,gen_tbl) @@ -90,6 +92,7 @@ export(filter_high_relatedness) export(gen_tibble) export(get_p_matrix) export(get_q_matrix) +export(gt_admixture) export(gt_as_genind) export(gt_as_genlight) export(gt_as_geno_lea) @@ -140,6 +143,7 @@ export(q_matrix) export(qc_report_indiv) export(qc_report_loci) export(rbind_dry_run) +export(read_q_files) export(scale_fill_distruct) export(select_loci) export(select_loci_if) diff --git a/R/autoplot_gt_admix.R b/R/autoplot_gt_admix.R new file mode 100644 index 00000000..6b4452a5 --- /dev/null +++ b/R/autoplot_gt_admix.R @@ -0,0 +1,48 @@ +#' Autoplots for `gt_admix` objects +#' +#' For `gt_admix`, the following types of plots are available: +#' - `cv`: the cross-validation error for each value of `k` +#' - `barplot` a standard barplot of the admixture proportions +#' +#' `autoplot` produces simple plots to quickly inspect an object. They are +#' not customisable; we recommend that you use `ggplot2` to produce publication +#' ready plots. +#' +#' @param object an object of class `gt_admixture` +#' @param type the type of plot (one of "cv", and "boxplot") +#' @param k the value of `k` to plot (for `barplot` type only) +#' param repeat the repeat to plot (for `barplot` type only) +#' @param run the run to plot (for `barplot` type only) +#' @param ... not used. +#' @returns a `ggplot2` object +#' @name autoplot_gt_admix +#' @export +autoplot.gt_admix <- function(object, + type=c("cv", "barplot"), + k = NULL, + run = NULL, + ...) +{ + type <- match.arg(type) + if (type== "cv") { + if (is.null(object$cv)){ + stop("No cross validation error available") + } + ggplot2::ggplot(data.frame(k=object$k, cv=object$cv), ggplot2::aes(x=.data$k, y=.data$cv)) + + ggplot2::geom_point() + + ggplot2::geom_line() + + ggplot2::labs(x="k", y="Cross validation error") + } else if (type == "barplot") { + # check that k is specified + if (is.null(k)){ + stop("You must specify a value for k") + } + # check that run is specified + if (is.null(run)){ + stop("You must specify a value for repeat") + } + # get the Q matrix for the specified k and repeat + Q <- get_q_matrix(object, k = k, run = run) + autoplot(Q) + } +} diff --git a/R/autoplot_gt_pcadapt.R b/R/autoplot_gt_pcadapt.R index 357472fe..f710681c 100644 --- a/R/autoplot_gt_pcadapt.R +++ b/R/autoplot_gt_pcadapt.R @@ -1,9 +1,9 @@ #' Autoplots for `gt_pcadapt` objects #' #' For `gt_pcadapt`, the following types of plots are available: -#' - `qq`: a qunatile-quantile plot of the p-values from pcadapt +#' - `qq`: a qunatile-quantile plot of the p-values from `pcadapt` #' (wrapping [bigsnpr::snp_qq()]) -#' - `manhattan` a manhattan plot of the p-values from pcadapt +#' - `manhattan` a manhattan plot of the p-values from `pcadapt` #' (wrapping [bigsnpr::snp_manhattan()]) #' #' `autoplot` produces simple plots to quickly inspect an object. They are diff --git a/R/gt_admix_methods.R b/R/gt_admix_methods.R new file mode 100644 index 00000000..5f765bda --- /dev/null +++ b/R/gt_admix_methods.R @@ -0,0 +1,80 @@ +#' Combine method for gt_admix objects +#' +#' @param ... A list of `gt_admix` objects +#' @return A `gt_admix` object with the combined data +#' @export + +c.gt_admix <- function(...) { + # check that all the objects are of class gt_admix + if (!all(sapply(list(...), function(x) inherits(x, "gt_admix")))) { + stop("All the objects must be of class gt_admix") + } + + combined_obj <- list() + # combine all the elements from each list + combined_obj$k <- sapply(list(...), function(x) x$k) + combined_obj$Q <- sapply(list(...), function(x) x$Q) + # if we have a P element in any of the objects, combine it + if (all(sapply(list(...), function(x) !is.null(x$P)))) { + combined_obj$P <- sapply(list(...), function(x) x$P) + } + # if we have a log_lik element in any of the objects, combine it + if (all(sapply(list(...), function(x) !is.null(x$loglik)))) { + combined_obj$loglik <- unlist(sapply(list(...), function(x) x$loglik)) + } + + # if we have a log element in any of the objects, combine it + if (all(sapply(list(...), function(x) !is.null(x$log)))) { + combined_obj$log <- sapply(list(...), function(x) x$log) + } + # if we have a cv element in any of the objects, combine it + if (all(sapply(list(...), function(x) !is.null(x$cv)))) { + combined_obj$cv <- unlist(sapply(list(...), function(x) x$cv)) + } + # if the first object has an id element, use it in the combined object + if (!is.null(list(...)[[1]]$id)) { + combined_obj$id <- list(...)[[1]]$id + } + # if the first object has a group element, use it in the combined object + if (!is.null(list(...)[[1]]$group)) { + combined_obj$group <- list(...)[[1]]$group + } + # set the class of the object + class(combined_obj) <- c("gt_admix", "list") + return(combined_obj) +} + +#' Summary method for gt_admix objects +#' +#' @param object a `gt_admix` object +#' @param ... unused (necessary for compatibility with generic function) +#' @return A summary of the `gt_admix` object +#' @export +summary.gt_admix <- function(object, ...) { + cat("Admixture results") + # if we only have one element, give the k + if (length(object$k) == 1) { + cat(" for k = ", object$k, "\n") + } else { + tab_sum <- table(object$k) + tab_sum <- rbind(as.numeric(names(tab_sum)),tab_sum) + rownames(tab_sum) <- c("k", "n") + colnames(tab_sum) <- rep("", ncol(tab_sum)) + cat(" for multiple runs:") + print(tab_sum) + } + cat("with slots:\n") + cat("$Q for Q matrices\n") + # if there is a lot P in the object, print it + if ("P" %in% names(object)){ + cat("$P for matrices\n") + } + # if there is a slot log in the object, print it + if ("log" %in% names(object)){ + cat("$log for logs from the algorithm\n") + } + # if there is a slot cv in the object, print it + if ("cv" %in% names(object)){ + cat("$cv for cross validation error\n") + } +} diff --git a/R/gt_admixture.R b/R/gt_admixture.R new file mode 100644 index 00000000..3d82899a --- /dev/null +++ b/R/gt_admixture.R @@ -0,0 +1,176 @@ +#' Run ADMIXTURE from R +#' +#' This function runs ADMIXTURE, taking either a `gen_tibble` or a file as an input. +#' This is a wrapper that runs ADMIXTURE from the command line, and reads the output +#' into R. It can run multiple values of `k` and multiple repeats for each `k`. +#' +#' @details This is a wrapper for the command line program ADMIXTURE. It can either +#' use a binary present in the main environment, or use a copy installed in a conda +#' environment. +#' @param x a `gen_tibble` or a character giving the path of the input PLINK bed file +#' @param k an integer giving the number of clusters +#' @param n_runs the number of runs for each k value (defaults to 1) +#' @param crossval boolean, should cross validation be used to assess the fit (defaults ot FALSE) +#' @param n_cores number of cores (defaults to 1) +#' @param seed the seed for the random number generator (defaults to NULL) +#' @param conda_env the name of the conda environment to use. "none" forces the use of a local copy, whilst any other string will +#' direct the function to use a custom conda environment. +#' @return an object of class `gt_admix` consisting of a list with the following elements: +#' - `k` the number of clusters +#' - `Q` a matrix with the admixture proportions +#' - `P` a matrix with the allele frequencies +#' - `log` a log of the output generated by ADMIXTURE (usually printed on the screen when running from the command line) +#' - `cv` the cross validation error (if `crossval` is TRUE) +#' - `loglik` the log likelihood of the model +#' - `id` the id column of the input `gen_tibble` (if applicable) +#' - `group` the group column of the input `gen_tibble` (if applicable) +#' @export + + +# If the package `fastmixturer` is installed, and its conda environment +# has been set up with `ADMIXTURE` in it (the default), it will automatically +# use that version unless you change `conda_env` to "none". +# If set to "auto", the default, +# a copy from `fastmixturer` will be preferred if available, otherwise a local copy will +# be used. + +gt_admixture <- function (x, k, n_runs = 1, crossval = FALSE, n_cores = 1, seed = NULL, + conda_env="none"){ + # check that we have the right number of repeats + if (length(seed)!= n_runs){ + stop("'seeds' should be a vector of length 'n_runs'") + } + + # if x is a character, check that it is file that exists + if (is.character(x)) { + if (!file.exists(x)) { + stop("The file ", x, " does not exist") + } + input_file <- x + } else { # if x is a gen_tibble + if (!inherits(x, "gen_tbl")) { + stop("x must be a gen_tibble or a character") + } + # write the gen_tibble to a temp file + input_file <- gt_as_plink(x, file = tempfile(), chromosomes_as_int=TRUE) + } + + # expand path to file to be full path + input_file <- normalizePath(input_file) + + # cast k as an integer + k <- as.integer(k) + + + # set and check the conda environment + # if there is no reticulate + if (!requireNamespace("reticulate", quietly = TRUE)) { + # but we have not set conda_env to none + if (conda_env!="none"){ + # if we have auto, set to none + if (conda_env == "auto"){ + conda_env <- "none" + } else { + stop("The package reticulate is needed to use a conda environemnt in R.") + } + } + } else { # if reticulate is available + # if we have "auto" and the conda environment rfastmixture does not exist, or + if (conda_env =="auto") { + if (("rfastmixture" %in% reticulate::conda_list()[["name"]])){ + conda_env <- "rfastmixture" + } else { + conda_env <- "none" + } + # check that the conda environment does exist + if ((conda_env !="none") && (!conda_env %in% reticulate::conda_list()[["name"]])){ + stop("The conda environment ", conda_env, " does not exist.") + } + } + } + + # if conda is "none", check that we have admixture installed + if (conda_env == "none") { + if (system2("which", args = "admixture", stdout = NULL) != 0) { + stop("ADMIXTURE is not installed in this path") + } + } + + # set the working path to the tempdir, where admixture will dump the text files + out <- tempdir() + # store working directory + wd <- getwd() + # change to the directory of the input file + setwd(out) + on.exit(setwd(wd)) + + + # initialise list to store results + adm_list <- list( + k = integer(), + Q = list(), + P = list(), + log = list(), + loglik = numeric() + ) + if (crossval) { + adm_list$cv <- numeric() + } + class(adm_list) <- c("gt_admix","list") + + # loop over values of k and number of repeats + index <- 1 + for (this_k in as.integer(k)) { + for (this_rep in seq_len(n_runs)) { + # set the arguments for admixture + admixture_args <- paste(input_file, this_k) + if (crossval) { + admixture_args <- paste(admixture_args, "--cv") + } + if (n_cores > 1) { + admixture_args <- paste(admixture_args, paste0("-j", n_cores)) + } + if (!is.null(seed)) { + admixture_args <- paste(admixture_args, paste0("-s ", seed[index])) + } + + # if we conda_env is none + if (conda_env == "none") { + adm_out <- system2("admixture", args = admixture_args, stdout = TRUE) + # change back to the original working directory + } else { + reticulate::conda_run2("admixture", args = admixture_args, + conda = conda_env) + } + + # read the output + output_prefix <- file.path(out, gsub(".bed", "", basename(input_file))) + adm_list$k[index] <- this_k + adm_list$Q[[index]] <- q_matrix(utils::read.table(paste(output_prefix,this_k,"Q",sep="."), header = FALSE)) + adm_list$P[[index]] <- utils::read.table(paste(output_prefix,this_k,"P",sep="."), header = FALSE) + adm_list$loglik[index] <- as.numeric(strsplit(grep("^Loglikelihood", adm_out, value = TRUE),":")[[1]][2]) + adm_list$log[[index]] <- adm_out + + + if (crossval) { + # extract value from line with CV error (number after :) + adm_list$cv[index] <- as.numeric(strsplit(grep("CV error", adm_out, value = TRUE),":")[[1]][2]) + } + index <- index + 1 + } + } + + # add metadata if x is a gen_tibble + if (inherits(x, "gen_tbl")) { + adm_list$id <- x$id + # if it is grouped, add the group + if (inherits(x, "grouped_gen_tbl")) { + adm_list$group <- x[[dplyr::group_vars(x)]] + } + } + + return(adm_list) + +} + + diff --git a/R/gt_as_plink.R b/R/gt_as_plink.R index 0a4da7ab..0068dd9d 100644 --- a/R/gt_as_plink.R +++ b/R/gt_as_plink.R @@ -12,12 +12,22 @@ #' the output file will have the same path and prefix of the backingfile. #' @param type one of "bed", "ped" or "raw" #' @param overwrite boolean whether to overwrite the file. +#' @param chromosomes_as_int boolean whether to use the integer representation of the chromosomes #' @returns the path of the saved file #' @export gt_as_plink <- function(x, file = NULL, type = c("bed","ped","raw"), - overwrite = TRUE){ + overwrite = TRUE, chromosomes_as_int = FALSE){ + # check that x is a gen_tibble + if (!methods::is(x, "gen_tbl")){ + stop("x must be a gen_tibble") + } + + if (is.null(chromosomes_as_int)){ + chromosomes_as_int = FALSE + } + type <- match.arg(type) if (is.null(file)){ @@ -48,21 +58,49 @@ gt_as_plink <- function(x, file = NULL, type = c("bed","ped","raw"), } if (type == "bed"){ - gt_write_bed(x, file) + gt_write_bed(x, file, chromosomes_as_int) } else { gt_write_ped_raw(x,file,type) } } -gt_write_bed <- function(x, file) { +gt_write_bed <- function(x, file, chromosomes_as_int) { bed_path <- bigsnpr::snp_writeBed(attr(x$genotypes,"bigsnp"), bedfile = file, ind.row = vctrs::vec_data(x$genotypes), ind.col = show_loci(x)$big_index) - # the bim and fam file only contain the original information in the bigSNP object - # TODO we should update them with the info from the gentibble - bed_path + # the bim and fam files written by bigsnpr contain the information of the original bigsnpr object + # we now update that information with the info from the gen_tibble + if(!chromosomes_as_int){ + bim_path <- bigsnpr::sub_bed(bed_path, ".bim") + bim_table <- show_loci(x) %>% + dplyr::select(dplyr::all_of(c("chromosome", "name", "genetic_dist", "position", "allele_alt","allele_ref"))) + colnames(bim_table) <- c("chromosome", "name", "genetic_dist", "position", "allele_ref","allele_alt") + bim_table$allele_alt[is.na(bim_table$allele_alt)]<-"0" + bim_table$allele_ref[is.na(bim_table$allele_ref)]<-"0" + utils::write.table(bim_table,bim_path, row.names = FALSE, col.names = FALSE, quote = FALSE) + } else { + bim_path <- bigsnpr::sub_bed(bed_path, ".bim") + bim_table <- show_loci(x) %>% + dplyr::select(dplyr::all_of(c("chr_int", "name", "genetic_dist", "position", "allele_alt","allele_ref"))) + colnames(bim_table) <- c("chr_int", "name", "genetic_dist", "position", "allele_ref","allele_alt") + bim_table$allele_alt[is.na(bim_table$allele_alt)]<-"0" + bim_table$allele_ref[is.na(bim_table$allele_ref)]<-"0" + utils::write.table(bim_table,bim_path, row.names = FALSE, col.names = FALSE, quote = FALSE) + } + fam_path <- bigsnpr::sub_bed(bed_path, ".fam") + fam_table <- utils::read.table(fam_path) + fam_table$V2 <- x$id + # if this is a group tibble, use the grouping variable + if(inherits(x,"grouped_gen_tbl")){ + #fam_table$V1 <- x[group_vars(x)] + fam_table$V1 <- x %>% select(dplyr::group_vars(x)) %>% dplyr::pull(1) + } + utils::write.table(fam_table, fam_path, row.names = FALSE, col.names = FALSE, quote = FALSE) + + # return the path to the file + return(bed_path) } diff --git a/R/q_matrix.R b/R/q_matrix.R index a8a0accb..2625613e 100644 --- a/R/q_matrix.R +++ b/R/q_matrix.R @@ -1,48 +1,18 @@ -#' Read and structure .Q files or existing matrices as `q_matrix` or `q_matrix_list` objects. +#' Read and structure .Q files or existing matrices as `q_matrix` or `gt_admix` objects. #' #' This function reads .Q matrix files generated by external clustering -#' algorithms (such as ADMIXTURE) and transforms them into `q_matrix` or -#' `q_matrix_list` objects for plotting. +#' algorithms (such as ADMIXTURE) and transforms them into `gt_admix` objects. #' #' @param x can be: #' - a path to a directory containing .Q files -#' - a path to a single .Q file -#' - a matrix -#' - a dataframe -#' - a list of dataframes or matrices -#' @return either: -#' - a single `q_matrix` object -#' - a `q_matrix_list` object containing a list of Q matrices and a list of +#' @return +#' - a `gt_admix` object containing a list of Q matrices and a list of #' indices for each Q matrix separated by K #' @export -#' @aliases q_matrix_list -q_matrix <- function(x) { - # Check if input is a single data frame or matrix - if (inherits(x, "data.frame") || inherits(x, "matrix")) { - x <- as_q_matrix(x) - return(x) - - # Check if input is a flat list of q-matrices - } else if (is.list(x) && all(sapply(x, function(element) inherits(element, c("data.frame", "matrix"))))) { - # Convert each element to a q-matrix - matrix_list <- lapply(x, as_q_matrix) - return(cast_list_to_q_matrix(matrix_list)) - - # Check if input is an invalid path - } else if (!dir.exists(x) && !file.exists(x)){ - stop("Input is not a valid dataframe, file, directory, or list of q-matrices") - - # Check if input is a single file path - } else if (!dir.exists(x) && file.exists(x)) { - if (!grepl("\\.Q$", x)) { - stop("Input file does not end in '.Q'") - } - # Read file, convert to q-matrix, and process - x <- utils::read.table(x, header = FALSE) - x <- as_q_matrix(x) - return(x) +read_q_files <- function(x) { - # Check if input is a directory of q-files + if (!dir.exists(x)){ + stop("Input is not a valid directory") } else if (dir.exists(x)) { # List all .Q files in the directory files <- list.files(x, pattern = "\\.Q$", full.names = TRUE) @@ -53,8 +23,15 @@ q_matrix <- function(x) { } # Read all .Q files into a list and convert each to a q-matrix - matrix_list <- lapply(files, function(file) as_q_matrix(utils::read.table(file, header = FALSE))) - return(cast_list_to_q_matrix(matrix_list)) + matrix_list <- lapply(files, function(file) q_matrix(utils::read.table(file, header = FALSE))) + + # Get the number of columns for each Q matrix (which corresponds to K) + k_values <- sapply(matrix_list, ncol) + # Create main list containing k_list and matrix list + gt_admix <- list(k = k_values, Q = matrix_list) + # Set class and return the final list + class(gt_admix) <- c("gt_admix", class(gt_admix)) + return(gt_admix) } } @@ -65,8 +42,8 @@ q_matrix <- function(x) { #' #' @param x A matrix or a data frame #' @return A `q_matrix` object -#' @keywords internal -as_q_matrix <- function(x){ +#' @export +q_matrix <- function(x){ if (inherits(x,"data.frame")){ x <- as.matrix(x) } @@ -75,56 +52,35 @@ as_q_matrix <- function(x){ return(x) } -#' Creates a structured list of Q matrices and their K indices -#' -#' This function takes two or more `q_matrix` objects in a flat list format and -#' returns them in a structured list `q_matrix_list` where the first element -#' k_indices contains the index number for each Q matrix grouped by K, and the -#' second element q_matrices contains the Q matrices themselves. -#' -#' @param matrix_list a flat list of `q_matrix` objects -#' @returns a `q_matrix_list` object containing the Q matrices and K indices -#' @keywords internal -cast_list_to_q_matrix <- function(matrix_list) { - # Get the number of columns for each Q matrix (which corresponds to K) - k_values <- sapply(matrix_list, ncol) - # Create a list of lists, grouping matrix indices by k - k_list_of_lists <- split(seq_along(matrix_list), k_values) - # Rename the list so that each K is prefixed with "k" - names(k_list_of_lists) <- paste0("k", names(k_list_of_lists)) - # Create main list containing k_list and matrix list - q_matrix_list <- list(k_indices = k_list_of_lists, q_matrices = matrix_list) - # Set class and return the final list - class(q_matrix_list) <- c("q_matrix_list", class(q_matrix_list)) - return(q_matrix_list) -} - -#' Return a single Q matrix from a `q_matrix_list` object +#' Return a single Q matrix from a `gt_admix` object #' -#' This function retrieves a single Q matrix from a `q_matrix_list` object +#' This function retrieves a single Q matrix from a `gt_admix` object #' based on the specified k value and run number. #' -#' @param x A `q_matrix_list` object containing multiple Q matrices +#' @param x A `gt_admix` object containing multiple Q matrices #' @param ... Not used #' @param k The k value of the desired Q matrix #' @param run The run number of the desired Q matrix -#' @return A single Q matrix from the `q_matrix_list` object +#' @return A single Q matrix from the `gt_admix` object #' @export get_q_matrix <- function(x, ..., k, run) { - # Check if 'x' is a valid q_matrix_list object - if (!inherits(x, "q_matrix_list")) { - stop("Input must be a 'q_matrix_list' object") + # Check if 'x' is a valid gt_admix object + if (!inherits(x, "gt_admix")) { + stop("Input must be a 'gt_admix' object") } # Check if the requested k exists in k_indices - k_name <- paste0("k", k) - if (!k_name %in% names(x$k_indices)) { - stop("Specified k value not found in q_matrix_list") + if (!k %in% x$k) { + stop("Specified k value not found in gt_admix") } + k_values <- split(seq_along(x$k), x$k) + + k <- which(names(k_values)==k) + # Get the indices for the specified k - k_indices <- x$k_indices[[k_name]] + k_indices <- k_values[[k]] # Check if the specified run is valid within the chosen k if (run < 1 || run > length(k_indices)) { @@ -135,40 +91,48 @@ get_q_matrix <- function(x, ..., k, run) { matrix_index <- k_indices[run] # Retrieve and return the q-matrix - q_matrix <- x$q_matrices[[matrix_index]] + q_matrix <- x$Q[[matrix_index]] + # if a grouping varible exist, add it as an attribute + if ("group" %in% names(x)){ + attr(q_matrix, "group") <- x$group + } + class(q_matrix) <- c("q_matrix",class(q_matrix)) return(q_matrix) } -#' Return a single P matrix from a `q_matrix_list` object +#' Return a single P matrix from a `gt_admix` object #' -#' This function retrieves a single P matrix from a `q_matrix_list` object +#' This function retrieves a single P matrix from a `gt_admix` object #' based on the specified k value and run number. #' -#' @param x A `q_matrix_list` object containing P matrices +#' @param x A `gt_admix` object containing P matrices #' @param ... Not used #' @param k The k value of the desired P matrix #' @param run The run number of the desired P matrix -#' @return A single P matrix from the `q_matrix_list` object +#' @return A single P matrix from the `gt_admix` object #' @export get_p_matrix <- function(x, ..., k, run) { - # Check if 'x' is a valid q_matrix_list object - if (!inherits(x, "q_matrix_list")) { - stop("Input must be a 'q_matrix_list' object") + # Check if 'x' is a valid gt_admix object + if (!inherits(x, "gt_admix")) { + stop("Input must be a 'gt_admix' object") } # Check if p_matrices exists in x - if (!"p_matrices" %in% names(x)) { + if (!"P" %in% names(x)) { stop("Input object does not contain any P matrices") } # Check if the requested k exists in k_indices - k_name <- paste0("k", k) - if (!k_name %in% names(x$k_indices)) { - stop("Specified k value not found in q_matrix_list") + if (!k %in% x$k) { + stop("Specified k value not found in gt_admix") } + k_values <- split(seq_along(x$k), x$k) + + k <- which(names(k_values)==k) + # Get the indices for the specified k - k_indices <- x$k_indices[[k_name]] + k_indices <- k_values[[k]] # Check if the specified run is valid within the chosen k if (run < 1 || run > length(k_indices)) { @@ -179,7 +143,7 @@ get_p_matrix <- function(x, ..., k, run) { matrix_index <- k_indices[run] # Retrieve and return the p-matrix - p_matrix <- x$p_matrices[[matrix_index]] + p_matrix <- x$P[[matrix_index]] return(p_matrix) } @@ -213,11 +177,11 @@ tidy.q_matrix <- function(x, data, ...){ q_tbl <- q_tbl %>% dplyr::mutate(id = data$id, group = colu) - } else if ("population" %in% names(data)){ - q_tbl <- x %>% - tibble::as_tibble() %>% - dplyr::mutate(id = data$id, - group = data$population) + # } else if ("population" %in% names(data)){ + # q_tbl <- x %>% + # tibble::as_tibble() %>% + # dplyr::mutate(id = data$id, + # group = data$population) } else { q_tbl <- x %>% tibble::as_tibble() %>% @@ -288,34 +252,24 @@ autoplot.q_matrix <- function(object, data = NULL, annotate_group = TRUE, ...){ rlang::check_dots_empty() K <- ncol(object) + # create dataset if we don't have a gen_tibble if (is.null(data)) { q_tbl <- as.data.frame(object) q_tbl$id <- 1:nrow(q_tbl) - q_tbl <- q_tbl %>% tidyr::pivot_longer(cols = dplyr::starts_with(".Q"), - names_to = "q", values_to = "percentage") - plt <- ggplot2::ggplot(q_tbl, - ggplot2::aes(x = .data$id, - y = .data$percentage, - fill = .data$q)) + - ggplot2::geom_col(width = 1, - position = ggplot2::position_stack(reverse = TRUE))+ - ggplot2::labs(y = paste("K = ", K))+ - theme_distruct() + - scale_fill_distruct() - - plt - - # thin vertical lines show when plot is saved as .pdf and opened with certain viewers, - # this is a product of the specific viewer (seen on unix and mac), knitting to - # html instead fixes, or choosing a different output (not pdf) - - } else { + # if the q_matrix has a group attribute, add it to the data + if ("group" %in% names(attributes(object))){ + q_tbl$group <- rep(attr(object, "group"), each=nrow(q_tbl)/length(attr(object, "group"))) + } + } else { # if we have the info from the gen_tibble q_tbl <- tidy(object, data) - + } q_tbl <- q_tbl %>% tidyr::pivot_longer(cols = dplyr::starts_with(".Q"), names_to = "q", values_to = "percentage") %>% dplyr::mutate(percentage = as.numeric(.data$percentage)) + # resort data if we have a grouping variable and we plan to use it + if (("group" %in% names(q_tbl))&&annotate_group){ + q_tbl <- q_tbl %>% dplyr::group_by(.data$group, .data$id) %>% dplyr::arrange(.data$group, .data$id) %>% @@ -336,6 +290,7 @@ autoplot.q_matrix <- function(object, data = NULL, annotate_group = TRUE, ...){ q_tbl <- q_tbl %>% dplyr::mutate(id = factor(.data$id, levels = levels_q)) + } plt <- ggplot2::ggplot(q_tbl, ggplot2::aes(x = .data$id, @@ -348,37 +303,16 @@ autoplot.q_matrix <- function(object, data = NULL, annotate_group = TRUE, ...){ scale_fill_distruct() if (annotate_group){ - if (is.null(data)){ - warning("no annotation possible if 'gen_tbl' is NULL") + if (!"group" %in% names(q_tbl)) { + warning("no annotation possible if 'gen_tbl' is NULL and q_matrix does not contain group information") } else { plt <- plt + annotate_group_info(q_tbl) } } plt - } } +# thin vertical lines show when plot is saved as .pdf and opened with certain viewers, +# this is a product of the specific viewer (seen on unix and mac), knitting to +# html instead fixes, or choosing a different output (not pdf) -#' Summarise a Q matrix list -#' -#' Takes a `q_matrix_list` object and returns a summary of the Q files in the -#' object based on the number of repeats for each K value. -#' -#' @param object A `q_matrix_list` object. -#' @param ... not currently used -#' @return A summary of the number of repeats for each K value. -#' @export -summary.q_matrix_list <- function(object, ...) { - # Check if 'x' is a valid q_matrix_list object - if (!inherits(object, "q_matrix_list")) { - stop("Input must be a 'q_matrix_list' object") - } - k_values <- names(object$k_indices) - summary_df <- data.frame(K = integer(), Repeats = integer()) - for (k in k_values) { - k_numeric <- as.numeric(sub("k", "", k)) - num_repeats <- length(object$k_indices[[k]]) - summary_df <- rbind(summary_df, data.frame(K = k_numeric, Repeats = num_repeats)) - } - return(summary_df) -} diff --git a/_pkgdown.yml b/_pkgdown.yml index 3336c2b0..57babc06 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -13,6 +13,8 @@ reference: - gt_save - gt_load - gt_get_file_names + - gt_order_loci + - gt_update_backingfile - title: "Show" desc: "Functions to view loci and genotype information stored in a `gen_tibble`." contents: @@ -22,6 +24,7 @@ reference: - show_ploidy - count_loci - augment_loci + - is_loci_table_ordered - title: "Selecting loci" desc: "Functions to select loci from a `gen_tibble`, either based on loci information, or genotype information." contents: @@ -93,16 +96,20 @@ reference: - augment.gt_dapc - autoplot.gt_dapc - autoplot.gt_cluster_pca - - title: "K-clustering (e.g. ADMXITURE)" - desc: "Functions for creating, tidying, and visualising `q_matrix` objects." + - title: "K-clustering and ADMXITURE" + desc: "Functions for creating, tidying, and visualising `gt_admix` and `q_matrix` objects." contents: + - gt_admixture + - autoplot.gt_admix + - summary.gt_admix + - c.gt_admix - q_matrix + - read_q_files - get_q_matrix - get_p_matrix - tidy.q_matrix - augment.q_matrix - autoplot.q_matrix - - summary.q_matrix_list - distruct_colours - scale_fill_distruct - theme_distruct diff --git a/data-raw/gt_as_vcf.R b/data-raw/gt_as_vcf.R deleted file mode 100644 index 05d31361..00000000 --- a/data-raw/gt_as_vcf.R +++ /dev/null @@ -1,83 +0,0 @@ -#' Convert a `gen_tibble` to a VCF -#' -#' This function write a VCF from a `gen_tibble`. -#' -#' @param x a [`gen_tibble`], with population coded as 'population' -#' @param file the .vcf filename with a path, or NULL (the default) to use the -#' location of the backing files. -#' @param chunk_size the number of loci -#' processed at a time. Autmatically set if left to NULL -#' @returns the path of the .vcf file -#' @export -#' - -gt_as_vcf <- function(x, file = NULL, chunk_size = NULL, overwrite = TRUE){ - - if (is.null(file)){ - file <- bigstatsr::sub_bk(attr(x$genotypes,"bigsnp")$genotypes$backingfile,paste0(".vcf")) - } - if (file_ext(file)!="vcf"){ - file <- paste0(file,".vcf") - } - if (!overwrite && file.exists(file)){ - stop("file ", file," already exists; use 'overwrite=TRUE' to overwrite it") - } - - # if chunk is null, get the best guess of an efficient approach - if (is.null(chunk_size)){ - chunk_size <- bigstatsr::block_size(nrow(x)) - } - - # set up chunks - chunks_vec <- c( - rep(chunk_size, floor(count_loci(x) / chunk_size)), - count_loci(x) %% chunk_size - ) - chunks_vec_index <- c(1,cumsum(chunks_vec)) - - # generate the header - vcf_header <- c("##fileformat=VCFv4.3", - paste0("##fileDate=",format(Sys.time(), "%Y%m%e")), - paste0("##source=tidypopgen_v",packageVersion("tidypopgen"))) - # create copy of loci table - loci_tbl <- show_loci(x) - # reorder chromosomes levels in the order in which they appear - loci_tbl$chromosome <- forcats::fct_inorder(loci_tbl$chromosome) - # get max position for each chromosome - chromosome_summary <- loci_tbl %>% group_by(chromosome) %>% summarise(max = max(.data$position)) - for (chrom_i in 1:nrow(chromosome_summary)){ - vcf_header <- c(vcf_header, - paste0("##contig=")) - } - vcf_header <- c(vcf_header, '##INFO=') - vcf_header <- c(vcf_header, '##FORMAT=') - vcf_header <- c(vcf_header, paste0("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t", - paste(x$id, collapse="\t"))) - # write it to file - write.table(vcf_header, file = file, quote = FALSE, row.names = FALSE, - col.names = F) - # iterate over genotypes in chunks and append to the vcf body - for (chunk_i in seq_along(chunks_vec)) { - genotypes_matrix <- t(show_genotypes(x, - loci_indices = - chunks_vec_index[chunk_i]:chunks_vec_index[chunk_i+1])) - genotypes_matrix[genotypes_matrix==0] <- "0/0" - genotypes_matrix[genotypes_matrix==1] <- "0/1" - genotypes_matrix[genotypes_matrix==2] <- "1/1" - genotypes_matrix[is.na(genotypes_matrix)] <- "./." - # subset loci to this chunk - loci_sub <- show_loci(x)[chunks_vec_index[chunk_i]:chunks_vec_index[chunk_i+1],] - # add the other columns needed for the - loci_cols <- c("chromosome", "position", "name", "allele_ref", "allele_alt") - loci_sub <- loci_sub %>% select(any_of(loci_cols)) %>% - mutate(qual=".",filter=".", info="PR", format = "GT") - loci_sub[is.na(loci_sub)] <- "." - genotypes_matrix <- cbind(loci_sub, genotypes_matrix) - # append table to previous chunk - write.table(genotypes_matrix, file = file, quote = FALSE, append = TRUE, - col.names = FALSE, row.names = FALSE) - } - -} diff --git a/data-raw/new_reference_order b/data-raw/new_reference_order deleted file mode 100644 index e4d4020f..00000000 --- a/data-raw/new_reference_order +++ /dev/null @@ -1,79 +0,0 @@ -reference: - - title: "Construct and manipulate a gen_tibble" - desc: "Functions for load data into gen_tibbles, inspect them, and save them." - contents: - - gen_tibble - - show_genotypes - - show_loci - - show_ploidy - - count_loci - - rbind - - rbind_dry_run - - summary.rbind_report - - gt_save - - gt_load -reference: - - title: "Impute missing data" - desc: "Functions for load data into gen_tibbles and save them." - contents: - - gt_impute_simple - - gt_has_imputed - - gt_uses_imputed - - gt_set_imputed -reference: - - title: "Verbs to interact with loci" - desc: "Functions to compute summaries of loci (one value per locus)." - contents: - - select_loci - - select_loci_if - - loci_chromosomes - - loci_hwe - - loci_ld_clump - - loci_missingness - - loci_names - - loci_transitions - - loci_transversions -reference: - - title: "Verbs to interact with individuals" - desc: "Functions compute summaries for individuals (one value per individual)." - contents: - - select_loci -reference: - - title: "Verbs to interact with populations" - desc: "Functions compute summaries for populations (one value per populations in a grouped gen_tibble)." - contents: - - select_loci -reference: - - title: "Quality control" - desc: "Functions for QC of loci and individuals." - contents: - - qc_report_indiv - - qc_report_loci - - filter_high_relatedness -reference: - - title: "Population genetic analyses: PCA" - desc: "Functions for perform PCA, and manipualte and plot the results." - contents: - - gt_pca - - gt_pca_autoSVD - - gt_pca_partialSVD - - gt_pca_randomSVD - - predict_gt_pca - - tidy_gt_pca - - augement.gt_pca - - autoplot.gt_pca -reference: - - title: "Population genetic analyses: DAPC" - desc: "Functions for perform DAPC, and manipualte and plot the results." - contents: - - gt_dapc -reference: - - title: "A gen_tibble to other formats" - desc: "Functions for export gen_tibbles to other formats (in R, or to file)." - contents: - - gt_as genind - - gt_as genlight - - gt_as_geno_lea - - gt_as_hierfstat - - gt_as_plink - diff --git a/data-raw/run_admixture.R b/data-raw/run_admixture.R new file mode 100644 index 00000000..bc7135bd --- /dev/null +++ b/data-raw/run_admixture.R @@ -0,0 +1,93 @@ +#' Run ADMIXTURE from R +#' +#' This function runs ADMIXTURE, taking either a `gen_tibble` or a file as an input. +#' +#' @details This is a wrapper for the command line program ADMIXTURE. It can either +#' use a binary present in the main environemnt, or use a copy installed in a conda +#' environment. If the package `tidygenclust` is installed, and its conda environemnt +#' has been set up with `ADMIXTURE` in it (the default), it will automatically +#' use that version unless you change `conda_env` to none. +#' @param x a `gen_tibble` or a character giving the path of the input file +#' @param k an integer giving the number of clusters +#' @param crossval boolean, should cross validation be used to assess the fit (defaults ot FALSE) +#' @param n_cors number of cores (defaults to 1) +#' @param conda_env the name of the conda environment to use. If set to "auto", the default, +#' a copy from `tidygenclust` will be preferred if available, otherwise a local copy will +#' be used. "none" forces the use of a local copy, whilst any other string will +#' direct the function to use a custom conda environment. +#' @return a list with the following elements: +#' - `Q` a matrix with the admixture proportions +#' - `loglik` the log likelihood of the model +#' - `cv` the cross validation error (if `crossval` is TRUE) +#' @export + + +gt_admixture <- function (x, k, crossval = FALSE, n_cores = 1, conda_env="auto"){ + # if x is a character, check that it is file that exists + if (is.character(x)) { + if (!file.exists(x)) { + stop("The file ", x, " does not exist") + } + input_file <- x + } else { # if x is a gen_tibble + if (!is(x, "gen_tibble")) { + stop("x must be a gen_tibble or a character") + } + # write the gen_tibble to a temp file + input_file <- gt_as_plink(x, file = tempfile(fileext = ".")) + } + + # cast k as an integer + k <- as.integer(k) + + # set the arguments for admixture + admixture_args <- paste(input_file, k) + if (crossval) { + admixture_args <- paste(admixture_args, "--cv") + } + if (n_cores > 1) { + admixture_args <- paste(admixture_args, paste0("-j", n_cores)) + } + + # check that k is smaller than the number of samples + if (k > nrow(x)) { + stop("k must be smaller than the number of samples") + } + # if there is no reticulate + if (!requireNamespace("reticulate", quietly = TRUE)) { + if (any(conda_env %in% c("auto", "none"))){ + # use the system version of admixture (if it is installed) + if (system2("which", args = "admixture") == 0) { + system2("admixture", args = admixture_args) + } else { + stop("admixture is not installed in this path") + } + system2("admixture", args = admixture_args) + } else { + stop("The package reticulate is needed to use a conda environemnt in R.") + } + } else { # if reticulate is available + # if we have "auto" and the conda environment rtidygenclust does not exist, or + # if we have "none" + if (conda_env == "none" || (conda_env =="auto" && + !("rtidygenclust" %in% reticulate::conda_list()[["name"]]))) { + # we use the system version of admixture + # if admixture is installed + if (system2("which", args = "admixture") == 0) { + system2("admixture", args = admixture_args) + } else { + stop("admixture is not installed in this path") + } + } else { + if (conda_env =="auto" && + ("rtidygenclust" %in% reticulate::conda_list()[["name"]])){ + conda_env <- "rtidygenclust" + } + reticulate::conda_run2("admixture", args = admixture_args, + conda = conda_env) + } + } + +} + + diff --git a/inst/extdata/ploidy_test.vcf.gz b/inst/extdata/ploidy/ploidy_test.vcf.gz similarity index 100% rename from inst/extdata/ploidy_test.vcf.gz rename to inst/extdata/ploidy/ploidy_test.vcf.gz diff --git a/man/as_q_matrix.Rd b/man/as_q_matrix.Rd deleted file mode 100644 index fcf05437..00000000 --- a/man/as_q_matrix.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/q_matrix.R -\name{as_q_matrix} -\alias{as_q_matrix} -\title{Convert a standard matrix to a \code{q_matrix} object} -\usage{ -as_q_matrix(x) -} -\arguments{ -\item{x}{A matrix or a data frame} -} -\value{ -A \code{q_matrix} object -} -\description{ -Takes a single Q matrix that exists as either a matrix or a data frame and -returns a \code{q_matrix} object. -} -\keyword{internal} diff --git a/man/autoplot_gt_admix.Rd b/man/autoplot_gt_admix.Rd new file mode 100644 index 00000000..4fa9120a --- /dev/null +++ b/man/autoplot_gt_admix.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/autoplot_gt_admix.R +\name{autoplot_gt_admix} +\alias{autoplot_gt_admix} +\alias{autoplot.gt_admix} +\title{Autoplots for \code{gt_admix} objects} +\usage{ +\method{autoplot}{gt_admix}(object, type = c("cv", "barplot"), k = NULL, run = NULL, ...) +} +\arguments{ +\item{object}{an object of class \code{gt_admixture}} + +\item{type}{the type of plot (one of "cv", and "boxplot")} + +\item{k}{the value of \code{k} to plot (for \code{barplot} type only) +param repeat the repeat to plot (for \code{barplot} type only)} + +\item{run}{the run to plot (for \code{barplot} type only)} + +\item{...}{not used.} +} +\value{ +a \code{ggplot2} object +} +\description{ +For \code{gt_admix}, the following types of plots are available: +\itemize{ +\item \code{cv}: the cross-validation error for each value of \code{k} +\item \code{barplot} a standard barplot of the admixture proportions +} +} +\details{ +\code{autoplot} produces simple plots to quickly inspect an object. They are +not customisable; we recommend that you use \code{ggplot2} to produce publication +ready plots. +} diff --git a/man/autoplot_gt_pcadapt.Rd b/man/autoplot_gt_pcadapt.Rd index 64944b31..c6612af1 100644 --- a/man/autoplot_gt_pcadapt.Rd +++ b/man/autoplot_gt_pcadapt.Rd @@ -21,9 +21,9 @@ a \code{ggplot2} object \description{ For \code{gt_pcadapt}, the following types of plots are available: \itemize{ -\item \code{qq}: a qunatile-quantile plot of the p-values from pcadapt +\item \code{qq}: a qunatile-quantile plot of the p-values from \code{pcadapt} (wrapping \code{\link[bigsnpr:snp_qq]{bigsnpr::snp_qq()}}) -\item \code{manhattan} a manhattan plot of the p-values from pcadapt +\item \code{manhattan} a manhattan plot of the p-values from \code{pcadapt} (wrapping \code{\link[bigsnpr:snp_manhattan]{bigsnpr::snp_manhattan()}}) } } diff --git a/man/c.gt_admix.Rd b/man/c.gt_admix.Rd new file mode 100644 index 00000000..8028e6ec --- /dev/null +++ b/man/c.gt_admix.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gt_admix_methods.R +\name{c.gt_admix} +\alias{c.gt_admix} +\title{Combine method for gt_admix objects} +\usage{ +\method{c}{gt_admix}(...) +} +\arguments{ +\item{...}{A list of \code{gt_admix} objects} +} +\value{ +A \code{gt_admix} object with the combined data +} +\description{ +Combine method for gt_admix objects +} diff --git a/man/cast_list_to_q_matrix.Rd b/man/cast_list_to_q_matrix.Rd deleted file mode 100644 index 3625b841..00000000 --- a/man/cast_list_to_q_matrix.Rd +++ /dev/null @@ -1,21 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/q_matrix.R -\name{cast_list_to_q_matrix} -\alias{cast_list_to_q_matrix} -\title{Creates a structured list of Q matrices and their K indices} -\usage{ -cast_list_to_q_matrix(matrix_list) -} -\arguments{ -\item{matrix_list}{a flat list of \code{q_matrix} objects} -} -\value{ -a \code{q_matrix_list} object containing the Q matrices and K indices -} -\description{ -This function takes two or more \code{q_matrix} objects in a flat list format and -returns them in a structured list \code{q_matrix_list} where the first element -k_indices contains the index number for each Q matrix grouped by K, and the -second element q_matrices contains the Q matrices themselves. -} -\keyword{internal} diff --git a/man/get_p_matrix.Rd b/man/get_p_matrix.Rd index d659c657..bf252122 100644 --- a/man/get_p_matrix.Rd +++ b/man/get_p_matrix.Rd @@ -2,12 +2,12 @@ % Please edit documentation in R/q_matrix.R \name{get_p_matrix} \alias{get_p_matrix} -\title{Return a single P matrix from a \code{q_matrix_list} object} +\title{Return a single P matrix from a \code{gt_admix} object} \usage{ get_p_matrix(x, ..., k, run) } \arguments{ -\item{x}{A \code{q_matrix_list} object containing P matrices} +\item{x}{A \code{gt_admix} object containing P matrices} \item{...}{Not used} @@ -16,9 +16,9 @@ get_p_matrix(x, ..., k, run) \item{run}{The run number of the desired P matrix} } \value{ -A single P matrix from the \code{q_matrix_list} object +A single P matrix from the \code{gt_admix} object } \description{ -This function retrieves a single P matrix from a \code{q_matrix_list} object +This function retrieves a single P matrix from a \code{gt_admix} object based on the specified k value and run number. } diff --git a/man/get_q_matrix.Rd b/man/get_q_matrix.Rd index 56197801..30ace29c 100644 --- a/man/get_q_matrix.Rd +++ b/man/get_q_matrix.Rd @@ -2,12 +2,12 @@ % Please edit documentation in R/q_matrix.R \name{get_q_matrix} \alias{get_q_matrix} -\title{Return a single Q matrix from a \code{q_matrix_list} object} +\title{Return a single Q matrix from a \code{gt_admix} object} \usage{ get_q_matrix(x, ..., k, run) } \arguments{ -\item{x}{A \code{q_matrix_list} object containing multiple Q matrices} +\item{x}{A \code{gt_admix} object containing multiple Q matrices} \item{...}{Not used} @@ -16,9 +16,9 @@ get_q_matrix(x, ..., k, run) \item{run}{The run number of the desired Q matrix} } \value{ -A single Q matrix from the \code{q_matrix_list} object +A single Q matrix from the \code{gt_admix} object } \description{ -This function retrieves a single Q matrix from a \code{q_matrix_list} object +This function retrieves a single Q matrix from a \code{gt_admix} object based on the specified k value and run number. } diff --git a/man/gt_admixture.Rd b/man/gt_admixture.Rd new file mode 100644 index 00000000..fb9d6857 --- /dev/null +++ b/man/gt_admixture.Rd @@ -0,0 +1,55 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gt_admixture.R +\name{gt_admixture} +\alias{gt_admixture} +\title{Run ADMIXTURE from R} +\usage{ +gt_admixture( + x, + k, + n_runs = 1, + crossval = FALSE, + n_cores = 1, + seed = NULL, + conda_env = "none" +) +} +\arguments{ +\item{x}{a \code{gen_tibble} or a character giving the path of the input PLINK bed file} + +\item{k}{an integer giving the number of clusters} + +\item{n_runs}{the number of runs for each k value (defaults to 1)} + +\item{crossval}{boolean, should cross validation be used to assess the fit (defaults ot FALSE)} + +\item{n_cores}{number of cores (defaults to 1)} + +\item{seed}{the seed for the random number generator (defaults to NULL)} + +\item{conda_env}{the name of the conda environment to use. "none" forces the use of a local copy, whilst any other string will +direct the function to use a custom conda environment.} +} +\value{ +an object of class \code{gt_admix} consisting of a list with the following elements: +\itemize{ +\item \code{k} the number of clusters +\item \code{Q} a matrix with the admixture proportions +\item \code{P} a matrix with the allele frequencies +\item \code{log} a log of the output generated by ADMIXTURE (usually printed on the screen when running from the command line) +\item \code{cv} the cross validation error (if \code{crossval} is TRUE) +\item \code{loglik} the log likelihood of the model +\item \code{id} the id column of the input \code{gen_tibble} (if applicable) +\item \code{group} the group column of the input \code{gen_tibble} (if applicable) +} +} +\description{ +This function runs ADMIXTURE, taking either a \code{gen_tibble} or a file as an input. +This is a wrapper that runs ADMIXTURE from the command line, and reads the output +into R. It can run multiple values of \code{k} and multiple repeats for each \code{k}. +} +\details{ +This is a wrapper for the command line program ADMIXTURE. It can either +use a binary present in the main environment, or use a copy installed in a conda +environment. +} diff --git a/man/gt_as_plink.Rd b/man/gt_as_plink.Rd index 40d8948c..382d331d 100644 --- a/man/gt_as_plink.Rd +++ b/man/gt_as_plink.Rd @@ -4,7 +4,13 @@ \alias{gt_as_plink} \title{Export a \code{gen_tibble} object to PLINK bed format} \usage{ -gt_as_plink(x, file = NULL, type = c("bed", "ped", "raw"), overwrite = TRUE) +gt_as_plink( + x, + file = NULL, + type = c("bed", "ped", "raw"), + overwrite = TRUE, + chromosomes_as_int = FALSE +) } \arguments{ \item{x}{a \code{\link{gen_tibble}} object} @@ -15,6 +21,8 @@ the output file will have the same path and prefix of the backingfile.} \item{type}{one of "bed", "ped" or "raw"} \item{overwrite}{boolean whether to overwrite the file.} + +\item{chromosomes_as_int}{boolean whether to use the integer representation of the chromosomes} } \value{ the path of the saved file diff --git a/man/q_matrix.Rd b/man/q_matrix.Rd index 8c6ee34f..4690074d 100644 --- a/man/q_matrix.Rd +++ b/man/q_matrix.Rd @@ -2,31 +2,17 @@ % Please edit documentation in R/q_matrix.R \name{q_matrix} \alias{q_matrix} -\alias{q_matrix_list} -\title{Read and structure .Q files or existing matrices as \code{q_matrix} or \code{q_matrix_list} objects.} +\title{Convert a standard matrix to a \code{q_matrix} object} \usage{ q_matrix(x) } \arguments{ -\item{x}{can be: -\itemize{ -\item a path to a directory containing .Q files -\item a path to a single .Q file -\item a matrix -\item a dataframe -\item a list of dataframes or matrices -}} +\item{x}{A matrix or a data frame} } \value{ -either: -\itemize{ -\item a single \code{q_matrix} object -\item a \code{q_matrix_list} object containing a list of Q matrices and a list of -indices for each Q matrix separated by K -} +A \code{q_matrix} object } \description{ -This function reads .Q matrix files generated by external clustering -algorithms (such as ADMIXTURE) and transforms them into \code{q_matrix} or -\code{q_matrix_list} objects for plotting. +Takes a single Q matrix that exists as either a matrix or a data frame and +returns a \code{q_matrix} object. } diff --git a/man/read_q_files.Rd b/man/read_q_files.Rd new file mode 100644 index 00000000..aff51efa --- /dev/null +++ b/man/read_q_files.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/q_matrix.R +\name{read_q_files} +\alias{read_q_files} +\title{Read and structure .Q files or existing matrices as \code{q_matrix} or \code{gt_admix} objects.} +\usage{ +read_q_files(x) +} +\arguments{ +\item{x}{can be: +\itemize{ +\item a path to a directory containing .Q files +}} +} +\value{ +\itemize{ +\item a \code{gt_admix} object containing a list of Q matrices and a list of +indices for each Q matrix separated by K +} +} +\description{ +This function reads .Q matrix files generated by external clustering +algorithms (such as ADMIXTURE) and transforms them into \code{gt_admix} objects. +} diff --git a/man/summary.gt_admix.Rd b/man/summary.gt_admix.Rd new file mode 100644 index 00000000..c876df26 --- /dev/null +++ b/man/summary.gt_admix.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gt_admix_methods.R +\name{summary.gt_admix} +\alias{summary.gt_admix} +\title{Summary method for gt_admix objects} +\usage{ +\method{summary}{gt_admix}(object, ...) +} +\arguments{ +\item{object}{a \code{gt_admix} object} + +\item{...}{unused (necessary for compatibility with generic function)} +} +\value{ +A summary of the \code{gt_admix} object +} +\description{ +Summary method for gt_admix objects +} diff --git a/man/summary.q_matrix_list.Rd b/man/summary.q_matrix_list.Rd deleted file mode 100644 index 61e2cf6f..00000000 --- a/man/summary.q_matrix_list.Rd +++ /dev/null @@ -1,20 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/q_matrix.R -\name{summary.q_matrix_list} -\alias{summary.q_matrix_list} -\title{Summarise a Q matrix list} -\usage{ -\method{summary}{q_matrix_list}(object, ...) -} -\arguments{ -\item{object}{A \code{q_matrix_list} object.} - -\item{...}{not currently used} -} -\value{ -A summary of the number of repeats for each K value. -} -\description{ -Takes a \code{q_matrix_list} object and returns a summary of the Q files in the -object based on the number of repeats for each K value. -} diff --git a/tests/testthat/bim_path b/tests/testthat/bim_path new file mode 100644 index 00000000..8560f2ef --- /dev/null +++ b/tests/testthat/bim_path @@ -0,0 +1,6 @@ +chr1 rs1 0 3 A T +chr1 rs2 0 5 T C +chr1 rs3 0 65 C NA +chr1 rs4 0 343 G C +chr2 rs5 0 23 C G +chr2 rs6 0 456 T A diff --git a/tests/testthat/test_count_vcf_individuals.R b/tests/testthat/test_count_vcf_individuals.R index 49789a54..44f8648b 100644 --- a/tests/testthat/test_count_vcf_individuals.R +++ b/tests/testthat/test_count_vcf_individuals.R @@ -1,5 +1,5 @@ test_that("count_vcf_individuals with gzfile",{ - vcf_path <- system.file("/extdata/ploidy_test.vcf.gz", + vcf_path <- system.file("/extdata/ploidy/ploidy_test.vcf.gz", package = "tidypopgen") n_individuals <- count_vcf_individuals(vcf_path) vcfr_obj <- vcfR::read.vcfR(vcf_path, verbose = FALSE) diff --git a/tests/testthat/test_count_vcf_variants.R b/tests/testthat/test_count_vcf_variants.R index e56232bb..7b7bb305 100644 --- a/tests/testthat/test_count_vcf_variants.R +++ b/tests/testthat/test_count_vcf_variants.R @@ -1,5 +1,5 @@ test_that("count_vcf_variants with gzfile",{ - vcf_path <- system.file("/extdata/ploidy_test.vcf.gz", + vcf_path <- system.file("/extdata/ploidy/ploidy_test.vcf.gz", package = "tidypopgen") n_variants <- count_vcf_variants(vcf_path) vcfr_obj <- vcfR::read.vcfR(vcf_path, verbose = FALSE) diff --git a/tests/testthat/test_gt_admixture.R b/tests/testthat/test_gt_admixture.R new file mode 100644 index 00000000..9d2e4dab --- /dev/null +++ b/tests/testthat/test_gt_admixture.R @@ -0,0 +1,63 @@ + +# skip if admixture is not installed +skip_if((system2("which", args = "admixture", stdout = NULL) != 0) && !requireNamespace("fastmixturer", quietly = TRUE)) +# set the input file +vcf_path <- system.file("/extdata/anolis/punctatus_t70_s10_n46_filtered.recode.vcf.gz", + package = "tidypopgen") +anole_gt <- gen_tibble(vcf_path, quiet = TRUE, backingfile = tempfile("anolis_")) +pops_path <- system.file("/extdata/anolis/plot_order_punctatus_n46.csv", + package = "tidypopgen") +pops <- readr::read_csv(pops_path, show_col_types = FALSE) +anole_gt <- anole_gt %>% mutate(id = gsub('punc_',"",.data$id,)) +anole_gt <- anole_gt %>% mutate(population = pops$pop[match(pops$ID,.data$id)]) + +test_that("run admixture as single run", { + # we create a plink file to test the function + anole_plink <- gt_as_plink(anole_gt, file = tempfile(), chromosomes_as_int=TRUE) + # run admixture + anole_adm <- gt_admixture(anole_plink, k = 3, crossval = FALSE, n_cores = 1, seed = 123, conda_env = "none") + # check the output + expect_true(nrow(anole_adm$Q[[1]])==nrow(anole_gt)) + expect_true(ncol(anole_adm$Q[[1]])==3) + expect_true(is.null(anole_adm$cv)) + # no create another run and combine them + anole_adm2 <- gt_admixture(anole_plink, k = 2, crossval = FALSE, n_cores = 1, seed = 345, conda_env = "none") + anole_adm_comb <- c(anole_adm, anole_adm2) + expect_true(nrow(anole_adm_comb$Q[[1]])==nrow(anole_gt)) + expect_true(ncol(anole_adm_comb$Q[[1]])==3) + expect_true(ncol(anole_adm_comb$Q[[2]])==2) + expect_true(all(anole_adm_comb$k==c(3,2))) + # run admixture with crossval + anole_adm3 <- gt_admixture(anole_gt, k = 3, crossval = TRUE, n_cores = 2, seed = 345, conda_env = "none") + expect_false(is.null(anole_adm3$cv)) + anole_adm4 <- gt_admixture(anole_gt, k = 3, crossval = TRUE, n_cores = 2, seed = 123, conda_env = "none") + anole_adm_comb2 <- c(anole_adm3, anole_adm4) + # TODO write some check for the object above + +}) + +test_that("run admixture as multiple runs", { + anole_gt <- anole_gt %>% dplyr::group_by(population) + anole_adm_cv <- gt_admixture(anole_gt, k = 2:4, n_runs =2, crossval = TRUE, n_cores = 2, seed = c(123,234), conda_env = "none") + expect_true(length(anole_adm_cv$k)==6) + expect_true(length(anole_adm_cv$Q)==6) + expect_true(length(anole_adm_cv$cv)==6) + # if we created the object from a grouped gen_tibble, we should have the id and group columns + expect_true(length(anole_adm_cv$id)==nrow(anole_gt)) + expect_true(length(anole_adm_cv$group)==nrow(anole_gt)) + # error if we have the wrong number of seeds + expect_error(gt_admixture(anole_gt, k = 2:4, n_runs =2, crossval = TRUE, n_cores = 2, seed = c(123), conda_env = "none"), + "'seeds' should be a vector of ") + # plot the crossval + cross_plot <- autoplot(anole_adm_cv) + # check that this is indeed a ggplot object + expect_true(inherits(cross_plot, "ggplot")) + # plot the admixture results + expect_error(autoplot(anole_adm_cv, type = "barplot"), + "^You must specify a value for k") + expect_error(autoplot(anole_adm_cv, type = "barplot", k=2), + "^You must specify a value for repeat") + bar_plot <- autoplot(anole_adm_cv, type = "barplot", k=2,run = 1) + # check that this is indeed a ggplot object + expect_true(inherits(bar_plot, "ggplot")) +}) diff --git a/tests/testthat/test_gt_as_plink.R b/tests/testthat/test_gt_as_plink.R index 6605e09c..35ebfe35 100644 --- a/tests/testthat/test_gt_as_plink.R +++ b/tests/testthat/test_gt_as_plink.R @@ -89,3 +89,51 @@ test_that("test for overwriting files",{ }) +test_that("gt_as_plink uses loci and indiv information from the gen_tibble",{ + + test_gt$population <- c("population1","population1","population2") + test_gt$id <- c("indiv1","indiv2","indiv3") + + loci <- show_loci(test_gt) + loci$name <- paste0("rs_id",1:6) + show_loci(test_gt) <- loci + + # save using gt_as_plink + bed_path <- gt_as_plink(test_gt, file = paste0(tempfile(),".bed")) + + # read the bed file + test_gt2 <- gen_tibble(bed_path, quiet=TRUE) + + expect_equal(show_loci(test_gt),show_loci(test_gt2)) + expect_equal(test_gt$id, test_gt2$id) + + # now check the same with a grouped gen_tibble + test_gt_grouped <- test_gt %>% group_by(population) + + # write the bed + bed_path2 <- gt_as_plink(test_gt_grouped, file = paste0(tempfile(),".bed")) + + # read the bed + test_gt3 <- gen_tibble(bed_path2, quiet=TRUE) + + expect_equal(show_loci(test_gt),show_loci(test_gt3)) + expect_equal(test_gt$id, test_gt3$id) + expect_equal(test_gt$population, test_gt3$population) + +}) + +test_that("gt_as_plink can use chr_int",{ + + # save using gt_as_plink with chromosomes_as_int TRUE + bed_path <- gt_as_plink(test_gt, file = paste0(tempfile(),".bed"), chromosomes_as_int = TRUE) + + # read the bed file + test_gt_chr_int <- gen_tibble(bed_path, quiet=TRUE) + + # take original bed, remove the chr and transform to an integer + result <- as.integer(sub("^chr", "", show_loci(test_gt)$chromosome)) + + # compare + expect_equal(result, show_loci(test_gt_chr_int)$chromosome) +}) + diff --git a/tests/testthat/test_gt_extract_f2.R b/tests/testthat/test_gt_extract_f2.R index 2ef26596..3999a269 100644 --- a/tests/testthat/test_gt_extract_f2.R +++ b/tests/testthat/test_gt_extract_f2.R @@ -24,6 +24,8 @@ test_that("extract f2 correctly",{ # process the data with admixtools (note that we get some warnings) bed_file <- gt_as_plink(test_gt, file = tempfile("test_bed")) + test_gt2 <- gen_tibble(bed_file, quiet=TRUE) + # test af table # without adjusting pseudohaploids adm_aftable <- admixtools:::anygeno_to_aftable(bigsnpr::sub_bed(bed_file), diff --git a/tests/testthat/test_gt_pca.R b/tests/testthat/test_gt_pca.R index b2a8a09a..c2706981 100644 --- a/tests/testthat/test_gt_pca.R +++ b/tests/testthat/test_gt_pca.R @@ -64,7 +64,7 @@ test_that("adjusting roll_size fixes gt_pca_autoSVD problem ",{ test <- gen_tibble(x = example_genotypes, loci = example_loci, indiv_meta = example_indiv_meta, quiet = TRUE) # gen_tibble with no missingness runs - test_pca <- test %>% gt_pca_autoSVD() + test_pca <- test %>% gt_pca_autoSVD(verbose = FALSE) # Now try with imputed data library(bigsnpr) @@ -78,9 +78,9 @@ test_that("adjusting roll_size fixes gt_pca_autoSVD problem ",{ expect_error(missing_pca <- missing_gt %>% gt_pca_autoSVD(verbose = FALSE), "Parameter 'size' is too large.") # adjusting roll_size fixes the error - test_pca_roll0 <- missing_gt %>% gt_pca_autoSVD(roll_size = 0) + test_pca_roll0 <- missing_gt %>% gt_pca_autoSVD(roll_size = 0, verbose = FALSE) expect_s3_class(test_pca_roll0, "gt_pca") - test_pca_roll7 <- missing_gt %>% gt_pca_autoSVD(roll_size = 7) + test_pca_roll7 <- missing_gt %>% gt_pca_autoSVD(roll_size = 7, verbose = FALSE) expect_s3_class(test_pca_roll7, "gt_pca") @@ -95,7 +95,7 @@ test_that("adjusting roll_size fixes gt_pca_autoSVD problem ",{ expect_error(missing_pca <- families %>% gt_pca_autoSVD(verbose = FALSE), "Parameter 'size' is too large.") # adjusting roll_size fixes the error - test_pca_families_roll7 <- families %>% gt_pca_autoSVD(roll_size = 7) + test_pca_families_roll7 <- families %>% gt_pca_autoSVD(roll_size = 7, verbose = FALSE) expect_s3_class(test_pca_families_roll7, "gt_pca") }) diff --git a/tests/testthat/test_ploidy_vcf.R b/tests/testthat/test_ploidy_vcf.R index b8cc1bd7..2fbbb5fd 100644 --- a/tests/testthat/test_ploidy_vcf.R +++ b/tests/testthat/test_ploidy_vcf.R @@ -1,5 +1,5 @@ test_that("import a vcf with multiple ploidy",{ - vcf_path <- system.file("/extdata/ploidy_test.vcf.gz", + vcf_path <- system.file("/extdata/ploidy/ploidy_test.vcf.gz", package = "tidypopgen") test_gt <- gen_tibble(vcf_path, backingfile = tempfile(), quiet = TRUE) # it's mixed ploidy diff --git a/tests/testthat/test_q_matrix.R b/tests/testthat/test_q_matrix.R index 8a247dac..ad655695 100644 --- a/tests/testthat/test_q_matrix.R +++ b/tests/testthat/test_q_matrix.R @@ -1,8 +1,6 @@ test_that("q_matrix reads q_files",{ #read a single .Q Q_path <- system.file("/extdata/anolis/anolis_ld_run1.3.Q", package = "tidypopgen") - anolis_q_k3 <- q_matrix(Q_path) - expect_true(inherits(anolis_q_k3,"q_matrix")) #read from df q_mat <- read.table(Q_path) @@ -14,32 +12,24 @@ test_that("q_matrix reads q_files",{ anolis_q_k3_mat <- q_matrix(q_mat) expect_true(inherits(anolis_q_k3_mat,"q_matrix")) - #read from list of .Qs - q_folder <- system.file("/extdata/anolis", package = "tidypopgen") - q_list_no_structure <- list(as.matrix(read.table(system.file("/extdata/anolis/anolis_ld_run1.3.Q", package = "tidypopgen"), header = TRUE)), - as.matrix(read.table(system.file("/extdata/anolis/anolis_ld_run1.4.Q", package = "tidypopgen"), header = TRUE))) - anolis_q <- q_matrix(q_list_no_structure) - expect_true(inherits(anolis_q,"q_matrix_list")) - expect_true(inherits(anolis_q$q_matrices[[1]],"q_matrix")) - #read multiple .Q - anolis_q <- q_matrix(q_folder) - expect_true(inherits(anolis_q,"q_matrix_list")) - expect_true(inherits(anolis_q$q_matrices[[1]],"q_matrix")) + q_folder <- system.file("/extdata/anolis", package = "tidypopgen") + anolis_q <- read_q_files(q_folder) + expect_true(inherits(anolis_q,"gt_admix")) + expect_true(inherits(anolis_q$Q[[1]],"q_matrix")) #check errors P_path <- system.file("/extdata/anolis/anolis_ld_run1.3.P", package = "tidypopgen") - expect_error(q_matrix(P_path),"Input file does not end in '.Q'") non_path <- "an/invalid/path" - expect_error(q_matrix(non_path),"Input is not a valid dataframe, file, directory, or list of q-matrices") + expect_error(read_q_files(non_path),"Input is not a valid directory") path_no_q <- system.file("/extdata/", package = "tidypopgen") - expect_error(q_matrix(path_no_q),"No .Q files found in the directory") + expect_error(read_q_files(path_no_q),"No .Q files found in the directory") }) test_that("get_q_matrix returns correct q-matrix",{ #read multiple q into a list q_folder <- system.file("/extdata/anolis", package = "tidypopgen") - q_list <- q_matrix(q_folder) + q_list <- read_q_files(q_folder) #check returns a single q-matrix object anolis_q_k3_r1 <- get_q_matrix(q_list, k=3, run=1) @@ -51,20 +41,20 @@ test_that("get_q_matrix returns correct q-matrix",{ expect_equal(ncol(anolis_q_k4_r1),4) #check errors if outside of k or run range - expect_error(get_q_matrix(q_list, k=5, run=1),"Specified k value not found in q_matrix_list") - expect_error(get_q_matrix(q_list, k=3, run=2),"Specified run is out of range for the given k value") + expect_error(get_q_matrix(q_list, k=5, run=1),"Specified k value not found in gt_admix") - #check errors if give non 'q_matrix_list' object + #check errors if give non 'gt_admix' object q_list_no_structure <- list(as.matrix(read.table(system.file("/extdata/anolis/anolis_ld_run1.3.Q", package = "tidypopgen"), header = TRUE)), as.matrix(read.table(system.file("/extdata/anolis/anolis_ld_run1.4.Q", package = "tidypopgen"), header = TRUE))) - expect_error(get_q_matrix(q_list_no_structure, k=3, run=1),"Input must be a 'q_matrix_list' object") - expect_error(get_q_matrix(q_list$q_matrices[[1]], k=3, run=1),"Input must be a 'q_matrix_list' object") + expect_error(get_q_matrix(q_list_no_structure, k=3, run=1),"Input must be a 'gt_admix' object") + expect_error(get_q_matrix(q_list$Q[[1]], k=3, run=1),"Input must be a 'gt_admix' object") + expect_error(get_q_matrix(q_list, k=3, run=3),"Specified run is out of range") }) test_that("get_p_matrix returns correct p-matrix",{ #read multiple q into a list q_folder <- system.file("/extdata/anolis", package = "tidypopgen") - q_list <- q_matrix(q_folder) + q_list <- read_q_files(q_folder) #check error if 'q_matrix_list' object doesn't contain p-files expect_error(get_p_matrix(q_list, k=5, run=1),"Input object does not contain any P matrices") @@ -72,11 +62,11 @@ test_that("get_p_matrix returns correct p-matrix",{ #check errors if give non 'q_matrix_list' object q_list_no_structure <- list(as.matrix(read.table(system.file("/extdata/anolis/anolis_ld_run1.3.Q", package = "tidypopgen"), header = TRUE)), as.matrix(read.table(system.file("/extdata/anolis/anolis_ld_run1.4.Q", package = "tidypopgen"), header = TRUE))) - expect_error(get_p_matrix(q_list_no_structure, k=3, run=1),"Input must be a 'q_matrix_list' object") - expect_error(get_p_matrix(q_list$q_matrices[[1]], k=3, run=1),"Input must be a 'q_matrix_list' object") + expect_error(get_p_matrix(q_list_no_structure, k=3, run=1),"Input must be a 'gt_admix' object") + expect_error(get_p_matrix(q_list$Q[[1]], k=3, run=1),"Input must be a 'gt_admix' object") #add P files to q_list - q_list$p_matrices <- list(as.matrix(read.table(system.file("/extdata/anolis/anolis_ld_run1.3.P", package = "tidypopgen"), header = TRUE)), + q_list$P <- list(as.matrix(read.table(system.file("/extdata/anolis/anolis_ld_run1.3.P", package = "tidypopgen"), header = TRUE)), as.matrix(read.table(system.file("/extdata/anolis/anolis_ld_run1.4.P", package = "tidypopgen"), header = TRUE))) #check returns a single q-matrix object @@ -90,28 +80,7 @@ test_that("get_p_matrix returns correct p-matrix",{ expect_equal(ncol(anolis_p_k4_r1),4) #check errors if outside of k or run range - expect_error(get_p_matrix(q_list, k=5, run=1),"Specified k value not found in q_matrix_list") - expect_error(get_p_matrix(q_list, k=3, run=2),"Specified run is out of range for the given k value") -}) - -test_that("summary of q_matrix",{ - #read multiple q into a list - q_folder <- system.file("/extdata/anolis", package = "tidypopgen") - q_list <- q_matrix(q_folder) - sum <- summary.q_matrix_list(q_list) - - #make expected - expected_summary <- data.frame( - K = c(3, 4), - Repeats = c(1, 1)) - - #check summary of q_matrix_list - expect_equal(sum, expected_summary) - - #check errors if give non 'q_matrix_list' object - q_list_no_structure <- list(as.matrix(read.table(system.file("/extdata/anolis/anolis_ld_run1.3.Q", package = "tidypopgen"), header = TRUE)), - as.matrix(read.table(system.file("/extdata/anolis/anolis_ld_run1.4.Q", package = "tidypopgen"), header = TRUE))) - expect_error(summary.q_matrix_list(q_list_no_structure),"Input must be a 'q_matrix_list' object") - + expect_error(get_p_matrix(q_list, k=5, run=1),"Specified k value not found in gt_admix") + expect_error(get_p_matrix(q_list, k=3, run=3),"Specified run is out of range") }) diff --git a/vignettes/a03_example_clustering_and_dapc.Rmd b/vignettes/a03_example_clustering_and_dapc.Rmd index 6c0285a4..4eb4e994 100644 --- a/vignettes/a03_example_clustering_and_dapc.Rmd +++ b/vignettes/a03_example_clustering_and_dapc.Rmd @@ -415,7 +415,7 @@ for(x in k_values){ And read them back into a `q_matrix_list`: ```{r} -q_list <- q_matrix(dir) +q_list <- read_q_files(dir) summary(q_list) ```