Skip to content

Commit

Permalink
Merge pull request #55 from EvolEcolGroup/read_q_matrices
Browse files Browse the repository at this point in the history
Read q matrices
  • Loading branch information
dramanica authored Oct 17, 2024
2 parents 9e77229 + 4ffc22e commit 7e708c0
Show file tree
Hide file tree
Showing 19 changed files with 5,424 additions and 110 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: tidypopgen
Title: Tidy Population Genetics
Version: 0.0.0.9016
Version: 0.0.0.9017
Authors@R:
c(person("Evie", "Carter", role = c("aut")),
person("Andrea", "Manica", email = "[email protected]",
Expand Down
4 changes: 2 additions & 2 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ S3method(show_loci,tbl_df)
S3method(show_loci,vctrs_bigSNP)
S3method(show_ploidy,tbl_df)
S3method(show_ploidy,vctrs_bigSNP)
S3method(summary,q_matrix_list)
S3method(summary,rbind_report)
S3method(summary,vctrs_bigSNP)
S3method(tbl_sum,gen_tbl)
Expand All @@ -77,7 +78,6 @@ S3method(tidy,q_matrix)
S3method(ungroup,grouped_gen_tbl)
export("%>%")
export("show_loci<-")
export(as_q_matrix)
export(augment)
export(augment_loci)
export(autoplot)
Expand Down Expand Up @@ -124,10 +124,10 @@ export(pairwise_king)
export(pairwise_pop_fst)
export(pop_fis)
export(pop_fst)
export(q_matrix)
export(qc_report_indiv)
export(qc_report_loci)
export(rbind_dry_run)
export(read_q_matrix_list)
export(scale_fill_distruct)
export(select_loci)
export(select_loci_if)
Expand Down
4 changes: 2 additions & 2 deletions R/gen_tibble.R
Original file line number Diff line number Diff line change
Expand Up @@ -510,7 +510,7 @@ change_duplicated_file_name <- function(file){

if(file.exists(bk) && !file.exists(rds)){

version <- 1
version <- 2

base_name <- basename(file)

Expand All @@ -533,7 +533,7 @@ change_duplicated_file_name <- function(file){
return(new_file)
} else if (file.exists(bk) && file.exists(rds)){

version <- 1
version <- 2

base_name <- basename(file)

Expand Down
189 changes: 135 additions & 54 deletions R/as_q_matrix.R → R/q_matrix.R
Original file line number Diff line number Diff line change
@@ -1,23 +1,63 @@
#' Convert a Q matrix into a `q_matrix` obejct
#' Read and structure .Q files or existing matrices as 'q_matrix' objects.
#'
#' Takes a matrix of Q values, check its validity, and then formats it correctly
#' to make sure it can then be processed and plotted correctly
#' This function reads .Q matrix files generated by external clustering
#' algorithms (such as ADMIXTURE) and transforms them into 'q_matrix' objects
#' for plotting.
#'
#' @param x a matrix
#' @returns a `q_matrix` object, which is a matrix with appropriate column names
#' (.QX, where X is the component number) to use with plotting
#' @param x can be:
#' - a path to a directory containing .Q files
#' - a path to a single .Q file
#' - a matrix
#' - a dataframe
#' @returns either:
#' - a single `q_matrix` object
#' - a list of `q_matrix` objects
#' - a `q_matrix_list` object (for directories containing multiple runs) of q_matrix objects
#' @export
q_matrix <- function(x) {

as_q_matrix <- function(x){
if (inherits(x,"data.frame")){
x <- as.matrix(x)
}
colnames(x)<- paste0(".Q",seq_len(ncol(x)))
class(x) <- c("q_matrix",class(x))
x
}
if (inherits(x, "data.frame") || inherits(x, "matrix")) {
x <- as_q_matrix(x)
return(x)

} else if (!dir.exists(x) && !file.exists(x)){
stop("Input is not a valid dataframe, file or directory")

} else if (!dir.exists(x) && file.exists(x)){
if ((grepl("\\.Q$", x)) == FALSE){
#if input file does not end in Q
stop("Input file does not end in '.Q'")
}
# Action if input is a file
if (grepl("\\.Q$", x) == TRUE) {
x <- utils::read.table(x, header = FALSE)
x <- as_q_matrix(x)
return(x)
}
} else if (dir.exists(x)) {
# List all .Q files in the directory
files <- list.files(x, pattern = "\\.Q$", full.names = TRUE)

# Check if the directory contains at least one .Q file
if (length(files) == 0) {
stop("No .Q files found in the directory")
}
# Read all .Q files into a list
data_list <- lapply(files, function(file) utils::read.table(file, header = FALSE))
# Turn each data frame into a Q matrix
matrix_list <- lapply(data_list, FUN = as_q_matrix)
# Get the number of columns for each Q matrix (which corresponds to K)
k_values <- sapply(matrix_list, ncol)
# Create a list of lists, grouping matrices by their number of columns (K)
list_of_lists <- split(matrix_list, k_values)
# Rename the list so that each K is prefixed with "k"
names(list_of_lists) <- paste0("k", names(list_of_lists))
# Return the list of lists
class(list_of_lists) <- c("q_matrix_list", class(list_of_lists))
return(list_of_lists)

}
}

#' Tidy a Q matrix
#'
Expand All @@ -32,14 +72,32 @@ as_q_matrix <- function(x){
tidy.q_matrix <- function(x, data, ...){
rlang::check_dots_empty()

q_tbl <- x %>%
tibble::as_tibble()
if(inherits(data,"grouped_df")){
q_tbl <- x %>%
tibble::as_tibble()

q_tbl <- q_tbl %>%
dplyr::mutate(id = data$id,
#@TODO
group = data$population)
groupdf <- data %>% dplyr::group_data()
colu <- seq(1,nrow(data))

for (i in seq_len(nrow(groupdf))) {
group_rows <- groupdf$.rows[[i]]
group_name <- groupdf[[1]][i]
colu[group_rows] <- group_name
}

q_tbl <- q_tbl %>%
dplyr::mutate(id = data$id,
group = colu)
} else if ("population" %in% names(data)){
q_tbl <- x %>%
tibble::as_tibble() %>%
dplyr::mutate(id = data$id,
group = data$population)
} else {
q_tbl <- x %>%
tibble::as_tibble() %>%
dplyr::mutate(id = data$id)
}
q_tbl
}

Expand All @@ -63,50 +121,37 @@ tidy.q_matrix <- function(x, data, ...){

augment.q_matrix <- function(x, data = NULL, ...) {

if (!".rownames" %in% names(data)) {
data <- data %>%
dplyr::mutate(.rownames = data$id)
}
if(inherits(data,"grouped_df")){

if (!".rownames" %in% names(data)) {
group_vars <- group_vars(data)
data <- data %>% dplyr::ungroup()
data <- data %>%
dplyr::mutate(.rownames = row_number())
data <- data %>% dplyr::group_by(across(all_of(group_vars)))
}
} else {
if (!".rownames" %in% names(data)) {
data <- data %>%
dplyr::mutate(.rownames = data$id)
}
}

q_tbl <- tidy(x,data)

data <- dplyr::left_join(data,q_tbl, by = "id")

}


#' Tidy ADMXITURE output files into plots
#'
#' Takes the name of a directory containing .Q file outputs, and
#' produces a list of tidied tibbles ready to plot.
#'
#' @param x the name of a directory containing .Q files
#' @returns a list of `q_matrix` objects to plot
#'
#' @export

read_q_matrix_list <- function(x){


files <- list.files(x, pattern = "\\.Q$", full.names = TRUE)

# Read all .Q files into a list
data_list <- lapply(files, function(file) utils::read.table(file, header = FALSE))

# Turn each into a Q matrix
matrix_list <- lapply(data_list, FUN = as_q_matrix)

# Sort matrix_list by the number of columns
matrix_list <- matrix_list[order(sapply(matrix_list, ncol))]
if ("population" %in% names(data)) {
if(all(data$population == q_tbl$group)){
q_tbl <- q_tbl %>% dplyr::select(-.data$group)
}
}

matrix_list
data <- dplyr::left_join(data,q_tbl, by = "id")
}


#' Autoplots for `q_matrix` objects
#'
#' @param object A Q matrix object (as returned by [as_q_matrix()]).
#' @param object A Q matrix object (as returned by [q_matrix()]).
#' @param data An associated tibble (e.g. a [`gen_tibble`]), with the individuals in the same order as the data used to
#' generate the Q matrix
#' @param annotate_group Boolean determining whether to annotate the plot with the
Expand Down Expand Up @@ -191,6 +236,42 @@ autoplot.q_matrix <- function(object, data = NULL, annotate_group = TRUE, ...){

}

#' Summarise a Q matrix list
#'
#' Takes a `q_matrix_list` object and returns a summary.
#'
#' @param object A `q_matrix_list` object.
#' @param ... not currently used
#' @return A summary of the object.
#' @export
summary.q_matrix_list <- function(object, ...) {
k_values <- names(object)

summary_df <- data.frame(K = integer(), Repeats = integer())

for (k in k_values) {
k_numeric <- as.numeric(sub("k", "", k))

num_repeats <- length(object[[k]])

summary_df <- rbind(summary_df, data.frame(K = k_numeric, Repeats = num_repeats))
}
return(summary_df)
}


as_q_matrix <- function(x){
if (inherits(x,"data.frame")){
x <- as.matrix(x)
}
colnames(x)<- paste0(".Q",seq_len(ncol(x)))
class(x) <- c("q_matrix",class(x))
x
}







2 changes: 1 addition & 1 deletion _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ reference:
- title: "K-clustering"
desc: "Functions for creating, tidying, and visualising q_matrix objects."
contents:
- as_q_matrix
- q_matrix
- read_q_matrix_list
- tidy.q_matrix
- augment.q_matrix
Expand Down
Loading

0 comments on commit 7e708c0

Please sign in to comment.