Skip to content

Commit

Permalink
Merge pull request #69 from EvolEcolGroup/reorder_q
Browse files Browse the repository at this point in the history
Reorder q
  • Loading branch information
dramanica authored Dec 12, 2024
2 parents abd6fc8 + b3b699d commit a7a68b2
Show file tree
Hide file tree
Showing 12 changed files with 203 additions and 46 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: tidypopgen
Title: Tidy Population Genetics
Version: 0.0.0.9020
Version: 0.0.0.9021
Authors@R:
c(person("Evie", "Carter", role = c("aut")),
person("Andrea", "Manica", email = "[email protected]",
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ export(filter_high_relatedness)
export(gen_tibble)
export(get_p_matrix)
export(get_q_matrix)
export(gt_admix_reorder_q)
export(gt_admixture)
export(gt_as_genind)
export(gt_as_genlight)
Expand Down
4 changes: 2 additions & 2 deletions R/autoplot_gt_admix.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
#' @param k the value of `k` to plot (for `barplot` type only)
#' param repeat the repeat to plot (for `barplot` type only)
#' @param run the run to plot (for `barplot` type only)
#' @param ... not used.
#' @param ... additional arguments to be passed to autoplot method for q_matrices [autoplot_q_matrix()].
#' @returns a `ggplot2` object
#' @name autoplot_gt_admix
#' @export
Expand Down Expand Up @@ -43,6 +43,6 @@ autoplot.gt_admix <- function(object,
}
# get the Q matrix for the specified k and repeat
Q <- get_q_matrix(object, k = k, run = run)
autoplot(Q)
autoplot(Q, ...)
}
}
51 changes: 51 additions & 0 deletions R/gt_admix_methods.R
Original file line number Diff line number Diff line change
Expand Up @@ -78,3 +78,54 @@ summary.gt_admix <- function(object, ...) {
cat("$cv for cross validation error\n")
}
}


#' Reorder the q matrices based on the grouping variable
#'
#' This function reorders the q matrices in a `gt_admix` object based on the grouping variable. This is useful before plotting when the
#' samples from each group are not adjacent to each other in the q matrix.
#'
#' @param x a `gt_admix` object, possibly with a grouping variable
#' @param group a character vector with the grouping variable (if there is no grouping variable info in `x`)
#' @return a `gt_admix` object with the q matrices reordered
#' @export

gt_admix_reorder_q <- function(x, group = NULL) {
# check that x is a gt_admix object
if (!inherits(x, "gt_admix")) {
stop("x must be a gt_admix object")
}
# if we have a gruop variable,
if (!is.null(group)){
# check that it is the same length as the q matrix
if (length(group) != nrow(x$Q[[1]])) {
stop("The length of the group variable must be the same as the number of rows in the Q matrix")
}
# and use it
if (!is.null(group)) {
x$group <- group
}
}
# if we have no group variable, check that we have one in the object
if (is.null(x$group) ) {
# if group is null
if (is.null(group)) {
stop("You must provide a group variable if there is no grouping information in the gt_admix object")
}
}
group_meta<- tibble(id = seq(1, length(x$group)), group = x$group)
# sort group meta by group
group_meta <- group_meta[order(group_meta$group),]
# reorder the q matrices
x$Q <- lapply(x$Q, function(y) y[group_meta$id,])
# if we have an id element, reorder it
if (!is.null(x$id)) {
x$id <- x$id[group_meta$id]
} else {
x$id <- group_meta$id
}
# reorder the group element
x$group <- x$group[group_meta$id]

return(x)
}
3 changes: 3 additions & 0 deletions R/gt_admixture.R
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,9 @@ gt_admixture <- function (x, k, n_runs = 1, crossval = FALSE, n_cores = 1, seed
}
}

# add info on algorithm
adm_list$algorithm <- "ADMIXTURE"

return(adm_list)

}
Expand Down
2 changes: 1 addition & 1 deletion R/is_loci_table_ordered.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ is_loci_table_ordered.vctrs_bigSNP <- function(.x, error_on_false = FALSE, ...)

# check that within each chromosome positions are sorted
if (any(unlist(show_loci(.x) %>%
group_by(chr_int) %>%
group_by(.data$chr_int) %>%
group_map(~ is.unsorted(.x$position))))){
if (error_on_false){
stop("Your loci are not sorted within chromosomes")
Expand Down
103 changes: 64 additions & 39 deletions R/q_matrix.R
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,10 @@ get_q_matrix <- function(x, ..., k, run) {

# Retrieve and return the q-matrix
q_matrix <- x$Q[[matrix_index]]
# if the id variable exists, add it as an attribute
if ("id" %in% names(x)){
attr(q_matrix, "id") <- x$id
}
# if a grouping varible exist, add it as an attribute
if ("group" %in% names(x)){
attr(q_matrix, "group") <- x$group
Expand Down Expand Up @@ -177,11 +181,11 @@ tidy.q_matrix <- function(x, data, ...){
q_tbl <- q_tbl %>%
dplyr::mutate(id = data$id,
group = colu)
# } else if ("population" %in% names(data)){
# q_tbl <- x %>%
# tibble::as_tibble() %>%
# dplyr::mutate(id = data$id,
# group = data$population)
# } else if ("population" %in% names(data)){
# q_tbl <- x %>%
# tibble::as_tibble() %>%
# dplyr::mutate(id = data$id,
# group = data$population)
} else {
q_tbl <- x %>%
tibble::as_tibble() %>%
Expand Down Expand Up @@ -227,11 +231,11 @@ augment.q_matrix <- function(x, data = NULL, ...) {

q_tbl <- tidy(x,data)

if ("population" %in% names(data)) {
if(all(data$population == q_tbl$group)){
q_tbl <- q_tbl %>% dplyr::select(-.data$group)
}
}
# if ("population" %in% names(data)) {
# if(all(data$population == q_tbl$group)){
# q_tbl <- q_tbl %>% dplyr::select(-.data$group)
# }
# }

data <- dplyr::left_join(data,q_tbl, by = "id")
}
Expand All @@ -244,35 +248,56 @@ augment.q_matrix <- function(x, data = NULL, ...) {
#' generate the Q matrix
#' @param annotate_group Boolean determining whether to annotate the plot with the
#' group information
#' @param reorder_within_groups Boolean determining whether to reorder the individuals within each group based
#' on their ancestry proportion (note that this is not advised if you are making multiple plots, as you would get
#' a different order for each plot!). If TRUE, `annotate_group` must also be TRUE.
#' @param ... not currently used.
#' @returns a barplot of individuals, coloured by ancestry proportion
#'
#' @name autoplot_q_matrix
#' @export
autoplot.q_matrix <- function(object, data = NULL, annotate_group = TRUE, ...){
autoplot.q_matrix <- function(object, data = NULL, annotate_group = TRUE, reorder_within_groups = FALSE, ...){

rlang::check_dots_empty()
# test that if reorder_within_groups is TRUE, annotate_group should also be TRUE
if (reorder_within_groups & !annotate_group){
stop("If reorder_within_groups is TRUE, annotate_group should also be TRUE")
}

K <- ncol(object)
# create dataset if we don't have a gen_tibble
if (is.null(data)) {
q_tbl <- as.data.frame(object)
q_tbl$id <- 1:nrow(q_tbl)
# if the q_matrix has an id attribute, add it to the data
if ("id" %in% names(attributes(object))){
q_tbl$id <- attr(object, "id")
} else {
q_tbl$id <- 1:nrow(q_tbl)
}
# if the q_matrix has a group attribute, add it to the data
if ("group" %in% names(attributes(object))){
q_tbl$group <- rep(attr(object, "group"), each=nrow(q_tbl)/length(attr(object, "group")))
}
} else { # if we have the info from the gen_tibble
q_tbl <- tidy(object, data)
}
q_tbl <- q_tbl %>% tidyr::pivot_longer(cols = dplyr::starts_with(".Q"),
names_to = "q", values_to = "percentage") %>%
dplyr::mutate(percentage = as.numeric(.data$percentage))

# resort data if we have a grouping variable and we plan to use it
if (("group" %in% names(q_tbl))&&annotate_group){

# if we have a grouping variable and we plan to use it, then reorder by it
q_tbl$id <- 1:nrow(q_tbl)
if (("group" %in% names(q_tbl)) && annotate_group){
q_tbl <- q_tbl %>%
dplyr::arrange(.data$group, .data$id)
}
# now reset the id to the new order
q_tbl$id <- 1:nrow(q_tbl)
q_tbl <- q_tbl %>% tidyr::pivot_longer(cols = dplyr::starts_with(".Q"),
names_to = "q", values_to = "percentage") %>%
dplyr::mutate(percentage = as.numeric(.data$percentage))

# if we reorder within group
if (("group" %in% names(q_tbl)) && reorder_within_groups){
q_tbl <- q_tbl %>%
dplyr::group_by(.data$group, .data$id) %>%
dplyr::arrange(.data$group, .data$id) %>%
#dplyr::arrange(.data$group, .data$id) %>%
dplyr::arrange(.data$group) %>% # only sort by group, so that we don't reorder individuals within groups
dplyr::mutate(q = factor(.data$q, levels = .data$q[order(.data$percentage, decreasing = FALSE)]))

dominant_q <- q_tbl %>%
Expand All @@ -290,26 +315,26 @@ autoplot.q_matrix <- function(object, data = NULL, annotate_group = TRUE, ...){

q_tbl <- q_tbl %>%
dplyr::mutate(id = factor(.data$id, levels = levels_q))
}
}

plt <- ggplot2::ggplot(q_tbl,
ggplot2::aes(x = .data$id,
y = .data$percentage,
fill = .data$q)) +
ggplot2::geom_col(width = 1,
position = "stack")+
ggplot2::labs(y = paste("K = ", K))+
theme_distruct() +
scale_fill_distruct()

if (annotate_group){
if (!"group" %in% names(q_tbl)) {
warning("no annotation possible if 'gen_tbl' is NULL and q_matrix does not contain group information")
} else {
plt <- plt + annotate_group_info(q_tbl)
}
plt <- ggplot2::ggplot(q_tbl,
ggplot2::aes(x = .data$id,
y = .data$percentage,
fill = .data$q)) +
ggplot2::geom_col(width = 1,
position = "stack")+
ggplot2::labs(y = paste("K = ", K))+
theme_distruct() +
scale_fill_distruct()

if (annotate_group){
if (!"group" %in% names(q_tbl)) {
warning("no annotation possible if 'gen_tbl' is NULL and q_matrix does not contain group information")
} else {
plt <- plt + annotate_group_info(q_tbl)
}
plt
}
plt

}
# thin vertical lines show when plot is saved as .pdf and opened with certain viewers,
Expand Down
1 change: 1 addition & 0 deletions _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ reference:
- autoplot.gt_admix
- summary.gt_admix
- c.gt_admix
- gt_admix_reorder_q
- q_matrix
- read_q_files
- get_q_matrix
Expand Down
2 changes: 1 addition & 1 deletion man/autoplot_gt_admix.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 13 additions & 2 deletions man/autoplot.q_matrix.Rd → man/autoplot_q_matrix.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 20 additions & 0 deletions man/gt_admix_reorder_q.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

45 changes: 45 additions & 0 deletions tests/testthat/test_gt_admixture.R
Original file line number Diff line number Diff line change
Expand Up @@ -60,4 +60,49 @@ test_that("run admixture as multiple runs", {
bar_plot <- autoplot(anole_adm_cv, type = "barplot", k=2,run = 1)
# check that this is indeed a ggplot object
expect_true(inherits(bar_plot, "ggplot"))

# test the reorder of q matrices
anole_adm_cv_reorder <- gt_admix_reorder_q(anole_adm_cv)
# check plot ordering
unord_plot <- autoplot(anole_adm_cv, type = "barplot", k=3,run = 1, annotate_group=FALSE)
ord_plot <- autoplot(anole_adm_cv_reorder, type = "barplot", k=3,run = 1, annotate_group=TRUE)
reord_plot <- autoplot(anole_adm_cv_reorder, type = "barplot", k=3,run = 1, annotate_group=TRUE)
expect_identical (ord_plot, reord_plot)
expect_false (identical (unord_plot, ord_plot))
# reordering within plot changes the order further
ord_within_plot <- autoplot(anole_adm_cv_reorder, type = "barplot", k=3,run = 1,
annotate_group=TRUE, reorder_within_groups=TRUE)
expect_false (identical (ord_plot, ord_within_plot))

# get q matrix and augment it
anole_q <- get_q_matrix(anole_adm_cv, k=3, run=1)
augment_q <- augment(anole_q, data= anole_gt)
expect_true(nrow(augment_q)==nrow(anole_gt))
# TODO should we try to check that the data are in the same order as the q matrix?

# tidy matrix with grouped and ungrouped data
q_tidy_group <- tidy(get_q_matrix(anole_adm_cv, k=3, run=1), anole_gt)
expect_true("group" %in% colnames(q_tidy_group))
q_tidy_ind <- tidy(get_q_matrix(anole_adm_cv, k=3, run=1), anole_gt %>% dplyr::ungroup())
expect_false("group" %in% colnames(q_tidy_ind))

# reorder errors
# wrong object
expect_error(gt_admix_reorder_q(anole_gt),
"x must be a gt_admix object")
# wrong group length
expect_error(gt_admix_reorder_q(anole_adm_cv, group=c(1,2)),
"The length of the group variable must be the same as the number of rows in the Q matrix")
# no group when we have no group info
anole_adm_no_group <- anole_adm_cv
anole_adm_no_group$group <- NULL
expect_error(gt_admix_reorder_q(anole_adm_no_group),
"You must provide a group variable if there is no grouping information in the gt_admix object")
# use gt_adm without id and group
anole_adm_no_group$id <- NULL
anole_adm_no_group$group <- NULL
adm_reord_no_meta <- gt_admix_reorder_q(anole_adm_no_group, group=anole_gt$population)
# now check that we have an id and group elements in the list
expect_true(length(adm_reord_no_meta$id)==nrow(anole_gt))
expect_true(length(adm_reord_no_meta$group)==nrow(anole_gt))
})

0 comments on commit a7a68b2

Please sign in to comment.