diff --git a/DESCRIPTION b/DESCRIPTION index b9e73464..ae610806 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: tidypopgen Title: Tidy Population Genetics -Version: 0.0.0.9020 +Version: 0.0.0.9021 Authors@R: c(person("Evie", "Carter", role = c("aut")), person("Andrea", "Manica", email = "am315@cam.ac.uk", diff --git a/NAMESPACE b/NAMESPACE index d798aba6..65a0e6e8 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -94,6 +94,7 @@ export(filter_high_relatedness) export(gen_tibble) export(get_p_matrix) export(get_q_matrix) +export(gt_admix_reorder_q) export(gt_admixture) export(gt_as_genind) export(gt_as_genlight) diff --git a/R/autoplot_gt_admix.R b/R/autoplot_gt_admix.R index 6b4452a5..ab9fe07f 100644 --- a/R/autoplot_gt_admix.R +++ b/R/autoplot_gt_admix.R @@ -13,7 +13,7 @@ #' @param k the value of `k` to plot (for `barplot` type only) #' param repeat the repeat to plot (for `barplot` type only) #' @param run the run to plot (for `barplot` type only) -#' @param ... not used. +#' @param ... additional arguments to be passed to autoplot method for q_matrices [autoplot_q_matrix()]. #' @returns a `ggplot2` object #' @name autoplot_gt_admix #' @export @@ -43,6 +43,6 @@ autoplot.gt_admix <- function(object, } # get the Q matrix for the specified k and repeat Q <- get_q_matrix(object, k = k, run = run) - autoplot(Q) + autoplot(Q, ...) } } diff --git a/R/gt_admix_methods.R b/R/gt_admix_methods.R index 5f765bda..8048996b 100644 --- a/R/gt_admix_methods.R +++ b/R/gt_admix_methods.R @@ -78,3 +78,54 @@ summary.gt_admix <- function(object, ...) { cat("$cv for cross validation error\n") } } + + +#' Reorder the q matrices based on the grouping variable +#' +#' This function reorders the q matrices in a `gt_admix` object based on the grouping variable. This is useful before plotting when the +#' samples from each group are not adjacent to each other in the q matrix. +#' +#' @param x a `gt_admix` object, possibly with a grouping variable +#' @param group a character vector with the grouping variable (if there is no grouping variable info in `x`) +#' @return a `gt_admix` object with the q matrices reordered +#' @export + +gt_admix_reorder_q <- function(x, group = NULL) { + # check that x is a gt_admix object + if (!inherits(x, "gt_admix")) { + stop("x must be a gt_admix object") + } + # if we have a gruop variable, + if (!is.null(group)){ + # check that it is the same length as the q matrix + if (length(group) != nrow(x$Q[[1]])) { + stop("The length of the group variable must be the same as the number of rows in the Q matrix") + } + # and use it + if (!is.null(group)) { + x$group <- group + } + } + # if we have no group variable, check that we have one in the object + if (is.null(x$group) ) { + # if group is null + if (is.null(group)) { + stop("You must provide a group variable if there is no grouping information in the gt_admix object") + } + } + group_meta<- tibble(id = seq(1, length(x$group)), group = x$group) + # sort group meta by group + group_meta <- group_meta[order(group_meta$group),] + # reorder the q matrices + x$Q <- lapply(x$Q, function(y) y[group_meta$id,]) + # if we have an id element, reorder it + if (!is.null(x$id)) { + x$id <- x$id[group_meta$id] + } else { + x$id <- group_meta$id + } + # reorder the group element + x$group <- x$group[group_meta$id] + + return(x) +} diff --git a/R/gt_admixture.R b/R/gt_admixture.R index 3d82899a..2ad97c46 100644 --- a/R/gt_admixture.R +++ b/R/gt_admixture.R @@ -169,6 +169,9 @@ gt_admixture <- function (x, k, n_runs = 1, crossval = FALSE, n_cores = 1, seed } } + # add info on algorithm + adm_list$algorithm <- "ADMIXTURE" + return(adm_list) } diff --git a/R/is_loci_table_ordered.R b/R/is_loci_table_ordered.R index ad48caa7..a1fc80cd 100644 --- a/R/is_loci_table_ordered.R +++ b/R/is_loci_table_ordered.R @@ -32,7 +32,7 @@ is_loci_table_ordered.vctrs_bigSNP <- function(.x, error_on_false = FALSE, ...) # check that within each chromosome positions are sorted if (any(unlist(show_loci(.x) %>% - group_by(chr_int) %>% + group_by(.data$chr_int) %>% group_map(~ is.unsorted(.x$position))))){ if (error_on_false){ stop("Your loci are not sorted within chromosomes") diff --git a/R/q_matrix.R b/R/q_matrix.R index 2625613e..4cecaf93 100644 --- a/R/q_matrix.R +++ b/R/q_matrix.R @@ -92,6 +92,10 @@ get_q_matrix <- function(x, ..., k, run) { # Retrieve and return the q-matrix q_matrix <- x$Q[[matrix_index]] + # if the id variable exists, add it as an attribute + if ("id" %in% names(x)){ + attr(q_matrix, "id") <- x$id + } # if a grouping varible exist, add it as an attribute if ("group" %in% names(x)){ attr(q_matrix, "group") <- x$group @@ -177,11 +181,11 @@ tidy.q_matrix <- function(x, data, ...){ q_tbl <- q_tbl %>% dplyr::mutate(id = data$id, group = colu) - # } else if ("population" %in% names(data)){ - # q_tbl <- x %>% - # tibble::as_tibble() %>% - # dplyr::mutate(id = data$id, - # group = data$population) + # } else if ("population" %in% names(data)){ + # q_tbl <- x %>% + # tibble::as_tibble() %>% + # dplyr::mutate(id = data$id, + # group = data$population) } else { q_tbl <- x %>% tibble::as_tibble() %>% @@ -227,11 +231,11 @@ augment.q_matrix <- function(x, data = NULL, ...) { q_tbl <- tidy(x,data) - if ("population" %in% names(data)) { - if(all(data$population == q_tbl$group)){ - q_tbl <- q_tbl %>% dplyr::select(-.data$group) - } - } + # if ("population" %in% names(data)) { + # if(all(data$population == q_tbl$group)){ + # q_tbl <- q_tbl %>% dplyr::select(-.data$group) + # } + # } data <- dplyr::left_join(data,q_tbl, by = "id") } @@ -244,18 +248,31 @@ augment.q_matrix <- function(x, data = NULL, ...) { #' generate the Q matrix #' @param annotate_group Boolean determining whether to annotate the plot with the #' group information +#' @param reorder_within_groups Boolean determining whether to reorder the individuals within each group based +#' on their ancestry proportion (note that this is not advised if you are making multiple plots, as you would get +#' a different order for each plot!). If TRUE, `annotate_group` must also be TRUE. #' @param ... not currently used. #' @returns a barplot of individuals, coloured by ancestry proportion -#' +#' @name autoplot_q_matrix #' @export -autoplot.q_matrix <- function(object, data = NULL, annotate_group = TRUE, ...){ +autoplot.q_matrix <- function(object, data = NULL, annotate_group = TRUE, reorder_within_groups = FALSE, ...){ rlang::check_dots_empty() + # test that if reorder_within_groups is TRUE, annotate_group should also be TRUE + if (reorder_within_groups & !annotate_group){ + stop("If reorder_within_groups is TRUE, annotate_group should also be TRUE") + } + K <- ncol(object) # create dataset if we don't have a gen_tibble if (is.null(data)) { q_tbl <- as.data.frame(object) - q_tbl$id <- 1:nrow(q_tbl) + # if the q_matrix has an id attribute, add it to the data + if ("id" %in% names(attributes(object))){ + q_tbl$id <- attr(object, "id") + } else { + q_tbl$id <- 1:nrow(q_tbl) + } # if the q_matrix has a group attribute, add it to the data if ("group" %in% names(attributes(object))){ q_tbl$group <- rep(attr(object, "group"), each=nrow(q_tbl)/length(attr(object, "group"))) @@ -263,16 +280,24 @@ autoplot.q_matrix <- function(object, data = NULL, annotate_group = TRUE, ...){ } else { # if we have the info from the gen_tibble q_tbl <- tidy(object, data) } - q_tbl <- q_tbl %>% tidyr::pivot_longer(cols = dplyr::starts_with(".Q"), - names_to = "q", values_to = "percentage") %>% - dplyr::mutate(percentage = as.numeric(.data$percentage)) - - # resort data if we have a grouping variable and we plan to use it - if (("group" %in% names(q_tbl))&&annotate_group){ - + # if we have a grouping variable and we plan to use it, then reorder by it + q_tbl$id <- 1:nrow(q_tbl) + if (("group" %in% names(q_tbl)) && annotate_group){ + q_tbl <- q_tbl %>% + dplyr::arrange(.data$group, .data$id) + } + # now reset the id to the new order + q_tbl$id <- 1:nrow(q_tbl) + q_tbl <- q_tbl %>% tidyr::pivot_longer(cols = dplyr::starts_with(".Q"), + names_to = "q", values_to = "percentage") %>% + dplyr::mutate(percentage = as.numeric(.data$percentage)) + + # if we reorder within group + if (("group" %in% names(q_tbl)) && reorder_within_groups){ q_tbl <- q_tbl %>% dplyr::group_by(.data$group, .data$id) %>% - dplyr::arrange(.data$group, .data$id) %>% + #dplyr::arrange(.data$group, .data$id) %>% + dplyr::arrange(.data$group) %>% # only sort by group, so that we don't reorder individuals within groups dplyr::mutate(q = factor(.data$q, levels = .data$q[order(.data$percentage, decreasing = FALSE)])) dominant_q <- q_tbl %>% @@ -290,26 +315,26 @@ autoplot.q_matrix <- function(object, data = NULL, annotate_group = TRUE, ...){ q_tbl <- q_tbl %>% dplyr::mutate(id = factor(.data$id, levels = levels_q)) - } + } - plt <- ggplot2::ggplot(q_tbl, - ggplot2::aes(x = .data$id, - y = .data$percentage, - fill = .data$q)) + - ggplot2::geom_col(width = 1, - position = "stack")+ - ggplot2::labs(y = paste("K = ", K))+ - theme_distruct() + - scale_fill_distruct() - - if (annotate_group){ - if (!"group" %in% names(q_tbl)) { - warning("no annotation possible if 'gen_tbl' is NULL and q_matrix does not contain group information") - } else { - plt <- plt + annotate_group_info(q_tbl) - } + plt <- ggplot2::ggplot(q_tbl, + ggplot2::aes(x = .data$id, + y = .data$percentage, + fill = .data$q)) + + ggplot2::geom_col(width = 1, + position = "stack")+ + ggplot2::labs(y = paste("K = ", K))+ + theme_distruct() + + scale_fill_distruct() + + if (annotate_group){ + if (!"group" %in% names(q_tbl)) { + warning("no annotation possible if 'gen_tbl' is NULL and q_matrix does not contain group information") + } else { + plt <- plt + annotate_group_info(q_tbl) } - plt + } + plt } # thin vertical lines show when plot is saved as .pdf and opened with certain viewers, diff --git a/_pkgdown.yml b/_pkgdown.yml index 16df57cb..04dd21ac 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -100,6 +100,7 @@ reference: - autoplot.gt_admix - summary.gt_admix - c.gt_admix + - gt_admix_reorder_q - q_matrix - read_q_files - get_q_matrix diff --git a/man/autoplot_gt_admix.Rd b/man/autoplot_gt_admix.Rd index 4fa9120a..b00b6433 100644 --- a/man/autoplot_gt_admix.Rd +++ b/man/autoplot_gt_admix.Rd @@ -17,7 +17,7 @@ param repeat the repeat to plot (for \code{barplot} type only)} \item{run}{the run to plot (for \code{barplot} type only)} -\item{...}{not used.} +\item{...}{additional arguments to be passed to autoplot method for q_matrices \code{\link[=autoplot_q_matrix]{autoplot_q_matrix()}}.} } \value{ a \code{ggplot2} object diff --git a/man/autoplot.q_matrix.Rd b/man/autoplot_q_matrix.Rd similarity index 58% rename from man/autoplot.q_matrix.Rd rename to man/autoplot_q_matrix.Rd index 470047f5..407dde61 100644 --- a/man/autoplot.q_matrix.Rd +++ b/man/autoplot_q_matrix.Rd @@ -1,10 +1,17 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/q_matrix.R -\name{autoplot.q_matrix} +\name{autoplot_q_matrix} +\alias{autoplot_q_matrix} \alias{autoplot.q_matrix} \title{Autoplots for \code{q_matrix} objects} \usage{ -\method{autoplot}{q_matrix}(object, data = NULL, annotate_group = TRUE, ...) +\method{autoplot}{q_matrix}( + object, + data = NULL, + annotate_group = TRUE, + reorder_within_groups = FALSE, + ... +) } \arguments{ \item{object}{A Q matrix object (as returned by \code{\link[=q_matrix]{q_matrix()}}).} @@ -15,6 +22,10 @@ generate the Q matrix} \item{annotate_group}{Boolean determining whether to annotate the plot with the group information} +\item{reorder_within_groups}{Boolean determining whether to reorder the individuals within each group based +on their ancestry proportion (note that this is not advised if you are making multiple plots, as you would get +a different order for each plot!). If TRUE, \code{annotate_group} must also be TRUE.} + \item{...}{not currently used.} } \value{ diff --git a/man/gt_admix_reorder_q.Rd b/man/gt_admix_reorder_q.Rd new file mode 100644 index 00000000..d7aaff7b --- /dev/null +++ b/man/gt_admix_reorder_q.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gt_admix_methods.R +\name{gt_admix_reorder_q} +\alias{gt_admix_reorder_q} +\title{Reorder the q matrices based on the grouping variable} +\usage{ +gt_admix_reorder_q(x, group = NULL) +} +\arguments{ +\item{x}{a \code{gt_admix} object, possibly with a grouping variable} + +\item{group}{a character vector with the grouping variable (if there is no grouping variable info in \code{x})} +} +\value{ +a \code{gt_admix} object with the q matrices reordered +} +\description{ +This function reorders the q matrices in a \code{gt_admix} object based on the grouping variable. This is useful before plotting when the +samples from each group are not adjacent to each other in the q matrix. +} diff --git a/tests/testthat/test_gt_admixture.R b/tests/testthat/test_gt_admixture.R index 9d2e4dab..a46e9c3f 100644 --- a/tests/testthat/test_gt_admixture.R +++ b/tests/testthat/test_gt_admixture.R @@ -60,4 +60,49 @@ test_that("run admixture as multiple runs", { bar_plot <- autoplot(anole_adm_cv, type = "barplot", k=2,run = 1) # check that this is indeed a ggplot object expect_true(inherits(bar_plot, "ggplot")) + + # test the reorder of q matrices + anole_adm_cv_reorder <- gt_admix_reorder_q(anole_adm_cv) + # check plot ordering + unord_plot <- autoplot(anole_adm_cv, type = "barplot", k=3,run = 1, annotate_group=FALSE) + ord_plot <- autoplot(anole_adm_cv_reorder, type = "barplot", k=3,run = 1, annotate_group=TRUE) + reord_plot <- autoplot(anole_adm_cv_reorder, type = "barplot", k=3,run = 1, annotate_group=TRUE) + expect_identical (ord_plot, reord_plot) + expect_false (identical (unord_plot, ord_plot)) + # reordering within plot changes the order further + ord_within_plot <- autoplot(anole_adm_cv_reorder, type = "barplot", k=3,run = 1, + annotate_group=TRUE, reorder_within_groups=TRUE) + expect_false (identical (ord_plot, ord_within_plot)) + + # get q matrix and augment it + anole_q <- get_q_matrix(anole_adm_cv, k=3, run=1) + augment_q <- augment(anole_q, data= anole_gt) + expect_true(nrow(augment_q)==nrow(anole_gt)) + # TODO should we try to check that the data are in the same order as the q matrix? + + # tidy matrix with grouped and ungrouped data + q_tidy_group <- tidy(get_q_matrix(anole_adm_cv, k=3, run=1), anole_gt) + expect_true("group" %in% colnames(q_tidy_group)) + q_tidy_ind <- tidy(get_q_matrix(anole_adm_cv, k=3, run=1), anole_gt %>% dplyr::ungroup()) + expect_false("group" %in% colnames(q_tidy_ind)) + + # reorder errors + # wrong object + expect_error(gt_admix_reorder_q(anole_gt), + "x must be a gt_admix object") + # wrong group length + expect_error(gt_admix_reorder_q(anole_adm_cv, group=c(1,2)), + "The length of the group variable must be the same as the number of rows in the Q matrix") + # no group when we have no group info + anole_adm_no_group <- anole_adm_cv + anole_adm_no_group$group <- NULL + expect_error(gt_admix_reorder_q(anole_adm_no_group), + "You must provide a group variable if there is no grouping information in the gt_admix object") + # use gt_adm without id and group + anole_adm_no_group$id <- NULL + anole_adm_no_group$group <- NULL + adm_reord_no_meta <- gt_admix_reorder_q(anole_adm_no_group, group=anole_gt$population) + # now check that we have an id and group elements in the list + expect_true(length(adm_reord_no_meta$id)==nrow(anole_gt)) + expect_true(length(adm_reord_no_meta$group)==nrow(anole_gt)) })