From 8b0d6d981c8e91697353d628f444a3ec293177ec Mon Sep 17 00:00:00 2001 From: emosca-cnr Date: Fri, 20 Oct 2023 11:54:50 +0200 Subject: [PATCH] v1.1.2 --- DESCRIPTION | 7 +- NAMESPACE | 25 +- NEWS | 2 + R/CNV_analysis.R | 13 +- R/barplot_cluster.R | 7 +- R/boxplot_cluster.R | 6 +- R/boxplot_points.R | 3 +- R/calculate_CNV.R | 55 ++-- R/calculate_gs_scores.R | 2 +- R/cluster_by_features.R | 60 ++-- R/create_scMuffinList.R | 4 +- R/csea.R | 8 +- R/detect_CNV_regions.R | 21 +- R/diff_map.R | 6 +- R/get_msigdb_geneset.R | 4 +- R/gs_score.R | 4 +- R/gs_scores_in_clusters.R | 8 +- R/heatmap_CNV.R | 12 +- R/inter_datasets_comparison.R | 2 - R/ora.R | 2 +- R/ora1gs.R | 2 +- R/plot_diff_map.R | 14 +- R/plot_heatmap_dataset_comparison.R | 10 +- R/plot_heatmap_features_by_clusters.R | 10 +- R/plot_profile_CNV.R | 5 +- R/plot_umap.R | 12 +- R/plot_umap_colored_features.R | 20 +- R/preprocess_for_heatmap_CNV.R | 16 - R/preprocess_object_for_CNV.R | 76 +++-- R/process_GTEx_gene_reads.R | 4 +- R/regions_to_genes.R | 11 +- R/sc_create_null.R | 2 +- R/sc_data_bin.R | 6 +- R/transcr_compl.R | 7 +- docs/404.html | 6 +- docs/articles/index.html | 4 +- docs/articles/scMuffin.html | 287 ++++++++++++++---- docs/authors.html | 8 +- .../bootstrap-5.2.2/bootstrap.bundle.min.js | 7 + .../bootstrap.bundle.min.js.map | 1 + docs/deps/bootstrap-5.2.2/bootstrap.min.css | 6 + docs/deps/data-deps.txt | 4 +- docs/index.html | 6 +- docs/pkgdown.yml | 4 +- docs/reference/CNV.html | 4 +- docs/reference/CNV_analysis.html | 31 +- docs/reference/add_features.html | 4 +- docs/reference/add_partitions.html | 4 +- docs/reference/adj_outliers_col.html | 4 +- docs/reference/apply_CNV_reference.html | 4 +- docs/reference/assess_cluster_enrichment.html | 4 +- docs/reference/barplot_cluster.html | 4 +- docs/reference/boxplot_cluster.html | 4 +- docs/reference/boxplot_points.html | 4 +- docs/reference/calc_gs_perm.html | 4 +- docs/reference/calculate_CNV.html | 19 +- docs/reference/calculate_gs_scores.html | 4 +- .../calculate_gs_scores_in_clusters.html | 4 +- docs/reference/cluster_by_features.html | 36 +-- docs/reference/cluster_csea.html | 4 +- docs/reference/cluster_hyper.html | 4 +- docs/reference/cluster_stats.html | 4 +- docs/reference/create_scMuffinList.html | 4 +- docs/reference/csea.html | 4 +- docs/reference/detect_CNV_regions.html | 10 +- docs/reference/diff_map.html | 4 +- docs/reference/es.html | 4 +- .../extract_cluster_enrichment_table.html | 4 +- .../extract_cluster_enrichment_tags.html | 4 +- docs/reference/filter_gsl.html | 4 +- .../genes_in_detected_CNV_regions.html | 4 +- docs/reference/get_msigdb_geneset.html | 4 +- docs/reference/gs_score.html | 4 +- docs/reference/gs_scores_in_clusters.html | 4 +- docs/reference/gsls_EntrezID.html | 4 +- docs/reference/gsls_Symbol.html | 4 +- docs/reference/heatmap_CNV.html | 4 +- docs/reference/index.html | 9 +- docs/reference/inter_dataset_comparison.html | 4 +- .../keep_strongest_representative.html | 4 +- docs/reference/ora.html | 4 +- docs/reference/ora1gs.html | 4 +- docs/reference/overlap_coefficient.html | 4 +- docs/reference/overlap_matrix.html | 4 +- docs/reference/partitions_to_list.html | 4 +- docs/reference/plot_diff_map.html | 4 +- .../plot_heatmap_dataset_comparison.html | 4 +- .../plot_heatmap_features_by_clusters.html | 4 +- docs/reference/plot_profile_CNV.html | 4 +- docs/reference/plot_umap.html | 4 +- .../reference/plot_umap_colored_features.html | 4 +- .../prepare_cluster_markers_list.html | 4 +- docs/reference/prepare_gsls.html | 4 +- docs/reference/preprocess_object_for_CNV.html | 10 +- docs/reference/process_GTEx_gene_reads.html | 4 +- docs/reference/proliferation_analysis.html | 4 +- docs/reference/regions_to_genes.html | 4 +- docs/reference/scMuffinList_demo.html | 4 +- docs/reference/sc_create_null.html | 4 +- docs/reference/sc_data_bin.html | 4 +- docs/reference/show_tissues.html | 4 +- docs/reference/transcr_compl.html | 4 +- docs/search.json | 2 +- man/CNV_analysis.Rd | 19 +- man/calculate_CNV.Rd | 11 +- man/cluster_by_features.Rd | 22 +- man/detect_CNV_regions.Rd | 5 +- man/preprocess_for_heatmap_CNV.Rd | 17 -- man/preprocess_object_for_CNV.Rd | 4 +- 109 files changed, 673 insertions(+), 509 deletions(-) delete mode 100644 R/preprocess_for_heatmap_CNV.R create mode 100644 docs/deps/bootstrap-5.2.2/bootstrap.bundle.min.js create mode 100644 docs/deps/bootstrap-5.2.2/bootstrap.bundle.min.js.map create mode 100644 docs/deps/bootstrap-5.2.2/bootstrap.min.css delete mode 100644 man/preprocess_for_heatmap_CNV.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 0b28611..15fef64 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: scMuffin Title: MUlti-Features INtegrative approach for single-cell data analysis -Version: 1.1.1 -Date: 2023-09-05 +Version: 1.1.2 +Date: 2023-10-12 Authors@R: c(person(given = "Valentina", family = "Nale", @@ -57,5 +57,6 @@ Suggests: rmarkdown, knitr, BiocStyle, - devtools + devtools, + sf VignetteBuilder: knitr diff --git a/NAMESPACE b/NAMESPACE index 7eb22a6..db878ae 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -42,7 +42,6 @@ export(plot_umap) export(plot_umap_colored_features) export(prepare_cluster_markers_list) export(prepare_gsls) -export(preprocess_for_heatmap_CNV) export(preprocess_object_for_CNV) export(process_GTEx_gene_reads) export(proliferation_analysis) @@ -53,33 +52,42 @@ export(show_tissues) export(transcr_compl) import(ComplexHeatmap) import(Seurat) -import(ggplot2) import(grDevices) import(graphics) import(grid) -import(msigdbr) -import(org.Hs.eg.db) import(pals) import(parallel) importFrom(Matrix,Matrix) importFrom(Matrix,colMeans) importFrom(Matrix,colSums) importFrom(Matrix,rowSums) +importFrom(Seurat,AddMetaData) +importFrom(Seurat,DimPlot) +importFrom(Seurat,FeaturePlot) +importFrom(Seurat,FetchData) +importFrom(Seurat,NormalizeData) importFrom(circlize,colorRamp2) importFrom(destiny,DPT) importFrom(destiny,DiffusionMap) importFrom(destiny,random_root) +importFrom(ggplot2,annotate) +importFrom(ggplot2,cut_interval) importFrom(ggplot2,cut_number) +importFrom(ggplot2,element_text) +importFrom(ggplot2,theme) importFrom(grDevices,boxplot.stats) importFrom(grDevices,jpeg) +importFrom(grDevices,png) +importFrom(grid,gpar) importFrom(methods,is) +importFrom(msigdbr,msigdbr) +importFrom(org.Hs.eg.db,org.Hs.egCHRLOC) +importFrom(org.Hs.eg.db,org.Hs.egCHRLOCEND) +importFrom(org.Hs.eg.db,org.Hs.egENSEMBL) +importFrom(org.Hs.eg.db,org.Hs.egSYMBOL) importFrom(pals,alphabet) -importFrom(pals,brewer.purples) -importFrom(pals,brewer.rdylbu) -importFrom(pals,brewer.ylorrd) importFrom(parallel,mclapply) importFrom(plotrix,thigmophobe.labels) -importFrom(stats,as.hclust) importFrom(stats,dhyper) importFrom(stats,glm) importFrom(stats,median) @@ -93,4 +101,3 @@ importFrom(stats,wilcox.test) importFrom(utils,data) importFrom(utils,read.delim) importFrom(utils,read.table) -importFrom(utils,write.table) diff --git a/NEWS b/NEWS index 62bd3b5..bccbb88 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,5 @@ +version 1.1.2 +- solved issue in CNV analysis, arising when number of chromosome genes is equal to wnd_size + 1 version 1.1.1 - various improvements: mainly documentation, checks on input and some errors arising whit particular input data version 1.1.0 diff --git a/R/CNV_analysis.R b/R/CNV_analysis.R index 4aa5b35..fdf6976 100644 --- a/R/CNV_analysis.R +++ b/R/CNV_analysis.R @@ -6,21 +6,24 @@ #' @param min_genes minimun number of genes expressed in a cell; #' @param min_cells minimum numbver of cells in which a gene must be expressed; #' @param expr_lim min and max values of relative expression; by default, lower and higher whiskers returned by grDevices::boxplot.stats will be used. Set to NA or anything other value such that length(expr_lim) != 2 to disable the removal of outliers. -#' @param scale_cells whether to scale cells +#' @param center_cells whether to center cells. Default is TRUE #' @param na.rm whether to remove 0 values in CNV estimation -#' @param center_genes whether to center genes or not +#' @param center_genes whether to center genes or not. Default is FALSE. When using a reference it is useful to set this to TRUE #' @param method "mean": subtract the average profile of the reference cluster to every cell (default); "min_max" (from Tirosh et al., DOI: 10.1126/science.aad0501) can be used but is not tested yet #' @param z.score whether to use z-scores of the cluster median CNV profile, instead of the median itself. #' @param eps absolute threshold to call CNV regions. +#' @param ... arguments passed to Seurat::FindClusters() +#' @param n_comp number of PCA components used for clustering +#' @param gene_ann optional data.frame with gene annotation with mandatory columns "Chromosome", "symbol" and "pos" (genomic location). If NULL gene locations will be collected from org.Hs.eg.db #' @export -CNV_analysis <- function(scMuffinList=NULL, mc.cores=1, reference = NULL, min_cells = 100, min_genes = 100, wnd_size = 100, scale_cells = TRUE, center_genes = FALSE, expr_lim = FALSE, method="mean", na.rm=FALSE, z.score=FALSE, eps=NULL){ +CNV_analysis <- function(scMuffinList=NULL, mc.cores=1, reference = NULL, min_cells = 100, min_genes = 200, wnd_size = 100, center_cells = TRUE, center_genes = FALSE, expr_lim = FALSE, method="mean", na.rm=FALSE, z.score=FALSE, eps=NULL, n_comp=10, gene_ann=NULL, ...){ cat("IMPORTANT: CNV analysis requires gene symbols as gene identifiers!\n") - cnv_res <- calculate_CNV(scMuffinList$normalized, mc.cores = mc.cores, reference = reference, min_cells = min_cells, min_genes = min_genes, wnd_size = wnd_size, scale_cells = scale_cells, center_genes = center_genes, expr_lim = expr_lim, na.rm=na.rm) + cnv_res <- calculate_CNV(scMuffinList$normalized, mc.cores = mc.cores, reference = reference, min_cells = min_cells, min_genes = min_genes, wnd_size = wnd_size, center_cells = center_cells, center_genes = center_genes, expr_lim = expr_lim, na.rm=na.rm, gene_ann=gene_ann) - cnv_clustering <- cluster_by_features(cnv_res$CNV, cnv=TRUE) + cnv_clustering <- cluster_by_features(cnv_res$CNV, n_comp=n_comp, ...) if(!is.null(reference)){ cnv_res$CNV <- apply_CNV_reference(cnv = cnv_res$CNV, cnv_clustering = cnv_clustering, reference="reference", method=method) diff --git a/R/barplot_cluster.R b/R/barplot_cluster.R index 3a19307..76b8d00 100644 --- a/R/barplot_cluster.R +++ b/R/barplot_cluster.R @@ -14,6 +14,7 @@ #' @importFrom grDevices jpeg #' @importFrom stats setNames #' @importFrom plotrix thigmophobe.labels +#' @import pals #' @description Produce barplots (1 for each cluster) of distribution of cells associated with the values of the selected feature. A png figure for each cluster is saved in dir_out. #' @export @@ -52,7 +53,7 @@ barplot_cluster <- function(scMuffinList=NULL, feature_name=NULL, feature_id=NUL #boxplot for each cluster for(cl in 1:length(cell_clusters_set)){ ###for each cluster - grDevices::png(paste0(dir_out, "/cluster_", cell_clusters_set[cl],".png"), width=width, height=height, units=units, res=res) + png(paste0(dir_out, "/cluster_", cell_clusters_set[cl],".png"), width=width, height=height, units=units, res=res) layout(matrix(c(1, 1, 2, 3), nrow = 2, byrow = T)) par(mar = c(3, 3, 3, 1)) par(mgp = c(1.5, .5, 0)) @@ -67,11 +68,11 @@ barplot_cluster <- function(scMuffinList=NULL, feature_name=NULL, feature_id=NUL cell_category_cluster <- cell_category_cluster[c(2,1), ] #remove the other clusters colnames(cell_category_cluster) <- cell_category_names[match(colnames(cell_category_cluster), names(cell_category_names))] - barplot(cell_category_cluster, beside = T, legend.text = T, main=paste("Cluster", cell_clusters_set[cl]), col=pals::alphabet2(2), cex.axis=cex.axis, cex.names=cex.axis, cex.lab=cex.axis, cex.main=cex.axis, args.legend=list(cex=cex.axis)) + barplot(cell_category_cluster, beside = T, legend.text = T, main=paste("Cluster", cell_clusters_set[cl]), col=alphabet2(2), cex.axis=cex.axis, cex.names=cex.axis, cex.lab=cex.axis, cex.main=cex.axis, args.legend=list(cex=cex.axis)) plot(en_res_er[rownames(en_res_er)==cell_clusters_set[cl], ], -log10(en_res_p[rownames(en_res_er)==cell_clusters_set[cl], ]), pch=16, xlab="ER", ylab="-log10(p)", cex.axis=cex.axis, cex.lab=cex.axis, cex=cex.axis) - plotrix::thigmophobe.labels(en_res_er[rownames(en_res_er)==cell_clusters_set[cl], ], -log10(en_res_p[rownames(en_res_er)==cell_clusters_set[cl], ]), cell_category_names, cex=cex.axis) + thigmophobe.labels(en_res_er[rownames(en_res_er)==cell_clusters_set[cl], ], -log10(en_res_p[rownames(en_res_er)==cell_clusters_set[cl], ]), cell_category_names, cex=cex.axis) plot.new() legend(x = "center", legend=paste(cell_category_names, ":", names(cell_category_names)), cex = cex.axis, bty = "n", title = "LEGEND") diff --git a/R/boxplot_cluster.R b/R/boxplot_cluster.R index 7af4807..304e000 100644 --- a/R/boxplot_cluster.R +++ b/R/boxplot_cluster.R @@ -12,7 +12,7 @@ #' @param res image resolution #' @param feature_name the names of the feature that should be considered. It must be one of names(scMuffinList) #' @description Produce boxplots to visualize the distribution of cell values according to the selected figure. A png figure for each cluster is saved in dir_out. -#' @importFrom grDevices jpeg +#' @importFrom grDevices png #' @importFrom plotrix thigmophobe.labels #' @import graphics #' @export @@ -88,7 +88,7 @@ boxplot_cluster <- function(scMuffinList=NULL, feature_name=NULL, partition_id=N cbf_cl <- cells_by_features[, match(names(top_features$fdr), colnames(cells_by_features)), drop=FALSE] - grDevices::png(paste0(dir_out, "/cluster_", cell_clusters_set[cl], ".png"), width=width, height=height, units=units, res=res) + png(paste0(dir_out, "/cluster_", cell_clusters_set[cl], ".png"), width=width, height=height, units=units, res=res) layout(matrix(c(1, 1, 2, 3), nrow = 2, byrow = T)) par(mar = c(3, 3, 2, 1)) @@ -128,7 +128,7 @@ boxplot_cluster <- function(scMuffinList=NULL, feature_name=NULL, partition_id=N #barplot(rev(-log10(top_features$fdr)), horiz = T, names.arg = "", main = "FDR") plot(top_features$nes, -log10(top_features$fdr), pch=16, xlab="NES", ylab="-log10(q)", cex.axis=cex.axis, cex.lab=cex.axis, cex=cex.axis) - plotrix::thigmophobe.labels(top_features$nes, -log10(top_features$fdr), box_names, cex=cex.axis) + thigmophobe.labels(top_features$nes, -log10(top_features$fdr), box_names, cex=cex.axis) plot.new() legend(x = "center", legend=paste(box_names, ":", colnames(cbf_cl)), cex = cex.axis, bty = "n", title = "LEGEND") diff --git a/R/boxplot_points.R b/R/boxplot_points.R index f57fc1e..e351f4d 100644 --- a/R/boxplot_points.R +++ b/R/boxplot_points.R @@ -15,6 +15,7 @@ #' @param image_format png or jpeg #' @param ... further argument to boxplot #' @importFrom pals alphabet +#' @importFrom grDevices png jpeg #' @export boxplot_points <- function(x=NULL, f=NULL, col=NULL, amount=0.2, adj.col=1, pch=16, cex=0.6, ylim=NULL, file=NULL, width=180, height=180, units="mm", res=300, image_format="png", ...){ @@ -23,7 +24,7 @@ boxplot_points <- function(x=NULL, f=NULL, col=NULL, amount=0.2, adj.col=1, pch= f_lev <- levels(f) if(is.null(col)){ - col <- pals::alphabet(length(f_lev)) + col <- alphabet(length(f_lev)) } if(is.null(ylim)){ diff --git a/R/calculate_CNV.R b/R/calculate_CNV.R index def3c42..eb3ddb0 100644 --- a/R/calculate_CNV.R +++ b/R/calculate_CNV.R @@ -6,28 +6,36 @@ #' @param min_genes minimun number of genes expressed in a cell; #' @param min_cells minimum number of cells in which a gene must be expressed; #' @param expr_lim min and max values of relative expression; by default, lower and higher whiskers returned by grDevices::boxplot.stats will be used. Set to NA or anything other value such that length(expr_lim) != 2 to disbale the removal of outliers. -#' @param scale_cells whether to scale cells +#' @param center_cells whether to center cells #' @param na.rm whether to remove 0 values in CNV estimation #' @param center_genes whether to center genes or not +#' @param gene_ann optional data.frame with gene annotation with mandatory columns "Chromosome", "symbol" and "pos" (genomic location). If NULL gene locations will be collected from org.Hs.eg.db #' @importFrom grDevices boxplot.stats +#' @importFrom Matrix Matrix rowSums colSums #' @description Calculate CNV using the moving average approach firstly described in Patel et al., 2014 Science (DOI: 10.1126/science) #' @export #' -calculate_CNV <- function(genes_by_cells, reference=NULL, mc.cores=1, wnd_size=100, min_genes=1000, min_cells=100, expr_lim=NULL, scale_cells=TRUE, na.rm=FALSE, center_genes=FALSE) { +calculate_CNV <- function(genes_by_cells, reference=NULL, mc.cores=1, wnd_size=100, min_genes=200, min_cells=100, expr_lim=NULL, center_cells=TRUE, na.rm=FALSE, center_genes=FALSE, gene_ann=NULL) { genes_by_cells[is.na(genes_by_cells)] <- 0 - cat("Filtering...") - idx_keep <- Matrix::rowSums(genes_by_cells !=0) >= min_cells #genes not missing in at least... + cat("Genes-by-cells matrix size", dim(genes_by_cells), "\n") + + cat("Filtering...") + idx_keep <- rowSums(genes_by_cells !=0) >= min_cells #genes not missing in at least... #print(table(idx_keep)) genes_by_cells <- genes_by_cells[idx_keep, ] + + if(nrow(genes_by_cells) < min_genes){ + stop("Less than ", min_genes, "available: considering reducing the parameter min_genes.\n") + } - idx_keep <- Matrix::colSums(genes_by_cells !=0) >= min(min_genes, nrow(genes_by_cells)) + idx_keep <- colSums(genes_by_cells !=0) >= min(min_genes, nrow(genes_by_cells)) #print(table(idx_keep)) genes_by_cells <- genes_by_cells[, idx_keep] cat(" done\n") - cat("genes-by-cells:", dim(genes_by_cells), "\n") - + cat("Genes-by-cells matrix size", dim(genes_by_cells), "\n") + # **************************************************************** if (!is.null(reference)) { cat("Adding reference vector...\n") @@ -59,7 +67,7 @@ calculate_CNV <- function(genes_by_cells, reference=NULL, mc.cores=1, wnd_size=1 rm(temp) if(is.null(expr_lim)){ - expr_lim <- grDevices::boxplot.stats(as.numeric(genes_by_cells))$stats[c(1, 5)] + expr_lim <- boxplot.stats(as.numeric(genes_by_cells))$stats[c(1, 5)] } #constraints over relative expression Tirosh et al. @@ -77,7 +85,7 @@ calculate_CNV <- function(genes_by_cells, reference=NULL, mc.cores=1, wnd_size=1 # *************************************************************** cat("Preprocess object to calculate CNV...\n") - ans <- preprocess_object_for_CNV(genes_by_cells) + ans <- preprocess_object_for_CNV(genes_by_cells = genes_by_cells, gene_ann=gene_ann) temp <- factor(names(ans), levels = c(1, 2, 3, 4 ,5 ,6 ,7 ,8 , 9, 10, 11, 12 , 13, 14 ,15, 16, 17, 18, 19, 20, 21, 22, "X", "Y")) temp <- as.character(sort(temp)) @@ -85,29 +93,30 @@ calculate_CNV <- function(genes_by_cells, reference=NULL, mc.cores=1, wnd_size=1 ans <- ans[match(temp, names(ans))] #exclude chrom with < wnd_size genes - ans <- ans[unlist(lapply(ans, nrow)) > wnd_size] - - for(i in 1:length(ans)){ - rownames(ans[[i]]) <- paste(rownames(ans[[i]]), ans[[i]]$pos, sep="_") - ans[[i]] <- ans[[i]][, -c(1:3)] + chr_size <- unlist(lapply(ans, nrow)) + cat("Genes in chromosomes:\n") + print(chr_size) + if(any(chr_size <= wnd_size)){ + cat("Excluding chromosomes", names(chr_size)[chr_size<=wnd_size], "\n") + cat("Consider reducing wnd_size parameter.\n") } + ans <- ans[chr_size > wnd_size] #calculate CNV cat("Calculating CNV...\n") - CNV_data_in <- lapply(ans, function(x) Matrix::Matrix(as.matrix(x), sparse = TRUE)) - ans <- mclapply(ans, function(x) apply(x, 2, function(y) CNV(setNames(y, rownames(x)), wnd_size=wnd_size, na.rm = na.rm)), mc.cores = mc.cores) + CNV_data_in <- lapply(ans, function(x) Matrix(as.matrix(x), sparse = TRUE)) - #merging chromosomes for(i in 1:length(ans)){ - if(!is.matrix(ans[[i]])){ - ans[[i]] <- matrix(ans[[i]], nrow = 1) - } - rownames(ans[[i]]) <- paste0("chr", names(ans)[i], "__", rownames(ans[[i]])) - } + + cat("Chromosome", names(ans)[i], "...\n") + ans[[i]] <- mclapply(setNames(1:ncol(ans[[i]]), colnames(ans[[i]])), function(j_column) CNV(setNames(ans[[i]][, j_column], rownames(ans[[i]])), wnd_size=wnd_size, na.rm = na.rm), mc.cores = mc.cores) + ans[[i]] <- do.call(cbind, ans[[i]]) + + } ans <- do.call(rbind, ans) - if(scale_cells){ + if(center_cells){ temp <- apply(ans, 2, scale, scale=F) rownames(temp) <- rownames(ans) ans <- temp diff --git a/R/calculate_gs_scores.R b/R/calculate_gs_scores.R index bda4890..764f1fe 100644 --- a/R/calculate_gs_scores.R +++ b/R/calculate_gs_scores.R @@ -79,7 +79,7 @@ calculate_gs_scores <- function(scMuffinList=NULL, gs_list=NULL, mc.cores=1, nbi }else{ - gs_scores <- parallel::mclapply(gs_list, function(i_marker_set) gs_score(i_marker_set, genes_by_cells = as.matrix(scMuffinList$normalized), bins = data_bins, k=k, nmark_min = nmark_min, ncells_min = ncells_min, null_model=null_model, kmin = kmin, verbose=verbose, na.rm=na.rm), mc.cores = mc.cores) + gs_scores <- mclapply(gs_list, function(i_marker_set) gs_score(i_marker_set, genes_by_cells = as.matrix(scMuffinList$normalized), bins = data_bins, k=k, nmark_min = nmark_min, ncells_min = ncells_min, null_model=null_model, kmin = kmin, verbose=verbose, na.rm=na.rm), mc.cores = mc.cores) } diff --git a/R/cluster_by_features.R b/R/cluster_by_features.R index 883cb19..c0a5062 100644 --- a/R/cluster_by_features.R +++ b/R/cluster_by_features.R @@ -1,62 +1,36 @@ #' Cluster by features #' #' @description Clustering of the features_by_cell matrix -#' @param features features object or result of CNV calculation +#' @param features_by_cells features by cells #' @param n_comp numeric, Dimensions of reduction to use as input -#' @param cnv TRUE/FALSE set it to TRUE for clustering CNV results -#' @param plot_umap whether to plot the UMAP -#' @param out_dir output directory -#' @param scale_features whether to scale features -#' @param ... arguments passed to Seurat::DimPlot +#' @param ... arguments passed to Seurat::FindCLusters #' @return features_by_cells Seurat Object, object with saved dimension reduction components calculate on features by cells matrix #' @import Seurat -#' @importFrom stats as.hclust #' @export -cluster_by_features <- function(features, n_comp = 10, cnv=FALSE, plot_umap=FALSE, out_dir="./", scale_features=TRUE, ...){ +cluster_by_features <- function(features_by_cells=NULL, n_comp = 10, ...){ - if(!dir.exists(out_dir)){ - dir.create(out_dir) - } - - if(cnv){ - features_by_cells <- features - }else{ - features_by_cells <- t(features$df[, features$type != "factor", drop=F]) - features_by_cells[is.na(features_by_cells)] <- 0 - } - - - features_by_cells <- Seurat::CreateSeuratObject(counts = features_by_cells, min.cells = 0, min.features = 0) + features_by_cells <- CreateSeuratObject(counts = features_by_cells, min.cells = 0, min.features = 0) all.genes <- rownames(features_by_cells) - #scale data - if(scale_features){ - features_by_cells <- Seurat::ScaleData(features_by_cells, features = all.genes) - } - features_by_cells <- Seurat::RunPCA(features_by_cells, features = all.genes) + #scale data + features_by_cells <- ScaleData(features_by_cells, features = all.genes) + features_by_cells <- RunPCA(features_by_cells, npcs=n_comp, features = all.genes) n_comp <- min(n_comp, ncol(features_by_cells@reductions$pca)) - features_by_cells <- Seurat::FindNeighbors(features_by_cells, dims = 1:n_comp) - features_by_cells <- Seurat::FindClusters(features_by_cells) - #features_by_cells <- Seurat::RunUMAP(features_by_cells, dims = 1:n_comp) + features_by_cells <- FindNeighbors(features_by_cells, dims = 1:n_comp) + features_by_cells <- FindClusters(features_by_cells, ...) - if(plot_umap){ - grDevices::jpeg(paste0(out_dir, "/feature_umap.jpg"), width = 180, height = 180, res=300, units="mm") - res <- DimPlot(features_by_cells, ..., combine = F) - plot(res[[1]]) - dev.off() - } - if(cnv){ - #features_by_cells <- Seurat::BuildClusterTree(features_by_cells, features = all.genes, reorder = T) - #hc_cells <- stats::as.hclust(features_by_cells@tools$BuildClusterTree) - #ans <- list(clusters=features_by_cells@active.ident, hc=hc_cells, sobj=features_by_cells) - ans <- list(clusters=features_by_cells@active.ident, sobj=features_by_cells) - - }else{ + # if(cnv){ + # #features_by_cells <- Seurat::BuildClusterTree(features_by_cells, features = all.genes, reorder = T) + # #hc_cells <- stats::as.hclust(features_by_cells@tools$BuildClusterTree) + # #ans <- list(clusters=features_by_cells@active.ident, hc=hc_cells, sobj=features_by_cells) + # ans <- list(clusters=features_by_cells@active.ident, sobj=features_by_cells) + # + # }else{ ans <- list(clusters=features_by_cells@active.ident, sobj=features_by_cells) - } + # } return(ans) diff --git a/R/create_scMuffinList.R b/R/create_scMuffinList.R index a434a97..6c667ac 100644 --- a/R/create_scMuffinList.R +++ b/R/create_scMuffinList.R @@ -40,10 +40,10 @@ create_scMuffinList <- function(counts=NULL, normalized=NULL){ } - scMuffinList$counts <- Matrix::Matrix(counts, sparse = TRUE) + scMuffinList$counts <- Matrix(counts, sparse = TRUE) cat("counts created\n") - scMuffinList$normalized <- Matrix::Matrix(normalized, sparse = TRUE) + scMuffinList$normalized <- Matrix(normalized, sparse = TRUE) cat("normalized created\n") diff --git a/R/csea.R b/R/csea.R index 39ac3ec..67fd831 100644 --- a/R/csea.R +++ b/R/csea.R @@ -90,17 +90,17 @@ csea <- function(rl, gsl, k=100, min.size=100, ord.mode=-1, min.k.nes=10, mc_cor }else{ cat(k, "permutations on", mc_cores_perm, "cores\n") #res <- lapply(gsl, function(x) unlist(mclapply(x_perm, function(y) calc_gs_perm(rl, y, x), mc.cores=mc_cores_perm))) - res <- lapply(gsl, function(x) do.call(rbind, parallel::mclapply(x_perm, function(y) calc_gs_perm(rll, y, x), mc.cores=mc_cores_perm))) + res <- lapply(gsl, function(x) do.call(rbind, mclapply(x_perm, function(y) calc_gs_perm(rll, y, x), mc.cores=mc_cores_perm))) } }else{ cat(length(gsl), "gene sets on", mc_cores_path, "cores\n") if(mc_cores_perm == 1){ #res <- parallel::mclapply(gsl, function(x) unlist(lapply(x_perm, function(y) calc_gs_perm(rl, y, x))), mc.cores = mc_cores_path) - res <- parallel::mclapply(gsl, function(x) do.call(rbind, lapply(x_perm, function(y) calc_gs_perm(rll, y, x))), mc.cores = mc_cores_path) + res <- mclapply(gsl, function(x) do.call(rbind, lapply(x_perm, function(y) calc_gs_perm(rll, y, x))), mc.cores = mc_cores_path) }else{ cat(k, "permutations on", mc_cores_perm, "cores\n") #res <- parallel::mclapply(gsl, function(x) unlist(parallel::mclapply(x_perm, function(y) calc_gs_perm(rl, y, x), mc.cores=mc_cores_perm)), mc.cores = mc_cores_path) - res <- parallel::mclapply(gsl, function(x) do.call(rbind, parallel::mclapply(x_perm, function(y) calc_gs_perm(rll, y, x), mc.cores=mc_cores_perm)), mc.cores = mc_cores_path) + res <- mclapply(gsl, function(x) do.call(rbind, mclapply(x_perm, function(y) calc_gs_perm(rll, y, x), mc.cores=mc_cores_perm)), mc.cores = mc_cores_path) } } @@ -175,7 +175,7 @@ csea <- function(rl, gsl, k=100, min.size=100, ord.mode=-1, min.k.nes=10, mc_cor fdrq[fdrq>1] <- 1 #out table - out[[i]] <- data.frame(id=rownames(res[[i]]), es=res[[i]][, 1], p_val=p_val, adj_p_val=stats::p.adjust(p_val, method='fdr'), n_pos_perm=n_pos_perm, n_neg_perm=n_neg_perm, nes=nes[, 1], FDRq=fdrq, stringsAsFactors=FALSE) + out[[i]] <- data.frame(id=rownames(res[[i]]), es=res[[i]][, 1], p_val=p_val, adj_p_val=p.adjust(p_val, method='fdr'), n_pos_perm=n_pos_perm, n_neg_perm=n_neg_perm, nes=nes[, 1], FDRq=fdrq, stringsAsFactors=FALSE) } diff --git a/R/detect_CNV_regions.R b/R/detect_CNV_regions.R index 773248a..ef0c0e7 100644 --- a/R/detect_CNV_regions.R +++ b/R/detect_CNV_regions.R @@ -2,9 +2,7 @@ #' @param scMuffinList scMuffinList object #' @param z.score whether to use clustere median z-scores of CNV profile (TRUE) or cluster median CNV profile (FALSE). #' @param eps the value that defines a CNV. If NUll the stard deviation of the dataset is used. -#' @description Plot an heatmap of the CNV. -#' @details CNV Profile of every cluster -#' @import ComplexHeatmap grDevices grid +#' @description Detect the CNV regions in eaach cluster and chromosome #' @export detect_CNV_regions <- function(scMuffinList = NULL, z.score=FALSE, eps=NULL){ @@ -19,7 +17,7 @@ detect_CNV_regions <- function(scMuffinList = NULL, z.score=FALSE, eps=NULL){ cnv <- cnv[, match(names(cnv_clustering), colnames(cnv))] - row_chr <- row_chr_vector <- gsub("(chr[^_]+)_.+", "\\1", rownames(cnv)) + row_chr <- row_chr_vector <- gsub("(chr[^_]+)__.+", "\\1", rownames(cnv)) row_chr <- factor(row_chr, levels = unique(row_chr)) row_chr <- split(row_chr, row_chr) ngenes_chr <- unlist(lapply(row_chr, length)) @@ -95,7 +93,20 @@ detect_CNV_regions <- function(scMuffinList = NULL, z.score=FALSE, eps=NULL){ } } - temp <- data.frame(chr=gsub("^(chr[^_]+)__.+$", "\\1", temp$region[temp$pattern==1]), start=temp$region[temp$pattern==1], start.loc=as.numeric(gsub("^.+_(\\d+)__.+$", "\\1", temp$region[temp$pattern==1])), stop=temp$region[temp$pattern==-1], stop.loc=as.numeric(gsub("^.+_(\\d+)$", "\\1", temp$region[temp$pattern==-1])), cluster=colnames(cnv_begin_end[[i]])[j], stringsAsFactors = F) + start_info <- setNames(data.frame(do.call(rbind, strsplit(temp$region[temp$pattern==1], "__")), stringsAsFactors = F), c("chr_start", "pos_start", "symbol_start", "chr_end", "pos_end", "symbol_end")) + stop_info <- setNames(data.frame(do.call(rbind, strsplit(temp$region[temp$pattern==-1], "__")), stringsAsFactors = F), c("chr_start", "pos_start", "symbol_start", "chr_end", "pos_end", "symbol_end")) + + temp <- data.frame(chr=gsub("^(chr[^_]+)__.+$", "\\1", temp$region[temp$pattern==1]), + start=temp$region[temp$pattern==1], + start.loc=as.numeric(start_info$pos_start), + stop=temp$region[temp$pattern==-1], + stop.loc=as.numeric(stop_info$pos_end), + cluster=colnames(cnv_begin_end[[i]])[j], + stringsAsFactors = F + ) + + #temp <- data.frame(chr=gsub("^(chr[^_]+)__.+$", "\\1", temp$region[temp$pattern==1]), start=temp$region[temp$pattern==1], start.loc=as.numeric(gsub("^.+__(\\d+)__.+$", "\\1", temp$region[temp$pattern==1])), stop=temp$region[temp$pattern==-1], stop.loc=as.numeric(gsub("^.+_(\\d+)$", "\\1", temp$region[temp$pattern==-1])), cluster=colnames(cnv_begin_end[[i]])[j], stringsAsFactors = F) + temp$length <- temp$stop.loc - temp$start.loc if(length(cnv_regions[[i]])==0){ diff --git a/R/diff_map.R b/R/diff_map.R index e5794ae..d0983bc 100644 --- a/R/diff_map.R +++ b/R/diff_map.R @@ -12,17 +12,17 @@ diff_map <- function(scMuffinList = NULL, root_cell="random", n_pcs=50, ...){ cat("Calculating the diffusion map...\n") - res <- destiny::DiffusionMap(t(as.matrix(scMuffinList$normalized)), n_pcs=n_pcs, ...) + res <- DiffusionMap(t(as.matrix(scMuffinList$normalized)), n_pcs=n_pcs, ...) #Il DPT ?? metrica che dipende dalla cellula scelta, identical(dpt[root.idx], dpt$dpt) dpt[["dpt"]] dpt <- NA if(root_cell == "random"){ cat("calculating a random root...\n") - root_cell <- destiny::random_root(res) #NUll + root_cell <-random_root(res) #NUll } cat("Diffusion pseudotimes...\n") - dpt <- destiny::DPT(res, tips = root_cell) + dpt <- DPT(res, tips = root_cell) scMuffinList$diffusion_map_pseudo_t <- list( summary = data.frame(res@eigenvectors[, 1:2], dpt=dpt$dpt, branch=dpt@branch[, 1], tips=dpt@tips[, 1], stringsAsFactors = F, row.names = rownames(res@eigenvectors)), diff --git a/R/get_msigdb_geneset.R b/R/get_msigdb_geneset.R index c83c322..38db5cb 100644 --- a/R/get_msigdb_geneset.R +++ b/R/get_msigdb_geneset.R @@ -4,7 +4,7 @@ #' @param subcategory MsigDB subcategory name, see msigdbr_collections() #' @param type Gene name of interest, can be gene_symbol or entrez_gene #' @description Returns msigdb geneset in a format compatible with calculate_signatures() -#' @import msigdbr +#' @importFrom msigdbr msigdbr #' @return List with msigdb_output, the output of msigdbr::msigdbr, and path_list, the list of gene sets #' @export @@ -15,7 +15,7 @@ get_msigdb_geneset <- function(species, category=NULL, subcategory=NULL, type="g if(is.na(subcategory)){ subcategory <- NULL } - msig <- msigdbr::msigdbr(species = species, category = category, subcategory = subcategory) + msig <-msigdbr(species = species, category = category, subcategory = subcategory) if(type=="gene_symbol") { msig_list <- split(x=msig$gene_symbol, f = msig$gs_name) } else if (type == "entrez_gene") { diff --git a/R/gs_score.R b/R/gs_score.R index 5ffb6a0..b58be79 100755 --- a/R/gs_score.R +++ b/R/gs_score.R @@ -64,7 +64,7 @@ gs_score <- function(gene_set=NULL, genes_by_cells=NULL, bins=NULL, nmark_min=5, #ans$score <- ans$case - ans$control #new version - ans <- list(gs=data.frame(case=Matrix::colMeans(gene_set_data, na.rm = na.rm), case.N=nrow(gene_set_data), case.AV=Matrix::colSums(!is.na(gene_set_data)), stringsAsFactors = F)) #real + ans <- list(gs=data.frame(case=colMeans(gene_set_data, na.rm = na.rm), case.N=nrow(gene_set_data), case.AV=colSums(!is.na(gene_set_data)), stringsAsFactors = F)) #real #z-scores.... #temp <- gene_set_data @@ -77,7 +77,7 @@ gs_score <- function(gene_set=NULL, genes_by_cells=NULL, bins=NULL, nmark_min=5, #attach to ans the genes-by-cells matrix of controls if(null_model){ - ans <- c(ans, lapply(control_set_data, function(x) data.frame(control=Matrix::colMeans(x, na.rm = na.rm), control.N=nrow(x), control.AV=Matrix::colSums(!is.na(x)), stringsAsFactors = F))) #controls + ans <- c(ans, lapply(control_set_data, function(x) data.frame(control=colMeans(x, na.rm = na.rm), control.N=nrow(x), control.AV=colSums(!is.na(x)), stringsAsFactors = F))) #controls } #case and control will have NaN for cells with all NA diff --git a/R/gs_scores_in_clusters.R b/R/gs_scores_in_clusters.R index ce2b3eb..bb5e058 100755 --- a/R/gs_scores_in_clusters.R +++ b/R/gs_scores_in_clusters.R @@ -32,7 +32,7 @@ gs_scores_in_clusters <- function(score_table=NULL, cell_clusters=NULL, ncells_m if(null_model){ cluster_scores <- data.frame( cells=tapply(score_table_clusters$nmark_min, score_table_clusters$cluster, sum), - med_case=tapply(score_table_clusters$case, score_table_clusters$cluster, function(x) stats::median(x, na.rm = T)), + med_case=tapply(score_table_clusters$case, score_table_clusters$cluster, function(x) median(x, na.rm = T)), med_control=tapply(score_table_clusters$avg_control, score_table_clusters$cluster, function(x) median(x, na.rm = T)), stringsAsFactors = F ) #median cell score for markers(i) in each cell cluster @@ -44,7 +44,7 @@ gs_scores_in_clusters <- function(score_table=NULL, cell_clusters=NULL, ncells_m temp <- split(score_table_clusters, score_table_clusters$cluster) for(i in 1:length(temp)){ if(nrow(temp[[i]]) >= ncells_min ){ #at least ncells_min with acceptable score - temp[[i]] <- stats::wilcox.test(temp[[i]]$case, temp[[i]]$avg_control, alternative = alt, paired = F) + temp[[i]] <- wilcox.test(temp[[i]]$case, temp[[i]]$avg_control, alternative = alt, paired = F) }else{ temp[[i]] <- NA } @@ -56,7 +56,7 @@ gs_scores_in_clusters <- function(score_table=NULL, cell_clusters=NULL, ncells_m temp <- split(score_table_clusters, score_table_clusters$cluster) for(i in 1:length(temp)){ if(nrow(temp[[i]]) >= ncells_min ){ #at least ncells_min with acceptable score - temp[[i]] <- stats::t.test(temp[[i]]$case, temp[[i]]$avg_control, alternative = alt, paired = T) + temp[[i]] <- t.test(temp[[i]]$case, temp[[i]]$avg_control, alternative = alt, paired = T) }else{ temp[[i]] <- NA } @@ -70,7 +70,7 @@ gs_scores_in_clusters <- function(score_table=NULL, cell_clusters=NULL, ncells_m cluster_scores <- merge(cluster_scores, res_p, by.x=1, by.y=0) colnames(cluster_scores)[c(1, 6, 7)] <- c("cluster", "stat", "p") - cluster_scores$fdr <- stats::p.adjust(cluster_scores$p, method = "fdr") + cluster_scores$fdr <- p.adjust(cluster_scores$p, method = "fdr") #missing values to 0 if(!all(clusters %in% cluster_scores$cluster)){ diff --git a/R/heatmap_CNV.R b/R/heatmap_CNV.R index cd5073e..69dd3b3 100644 --- a/R/heatmap_CNV.R +++ b/R/heatmap_CNV.R @@ -13,7 +13,9 @@ #' @param legend_fontsize legend fontsize #' @param genes.labels.fontsize gene labels fontsize #' @param ... arguments passed to ComplexHeatmap::Heatmap -#' @import ComplexHeatmap grDevices grid +#' @import ComplexHeatmap +#' @importFrom grid gpar +#' @importFrom grDevices png #' @export heatmap_CNV <- function(scMuffinList = NULL, genes=NULL, genes.labels=FALSE, mark.detected.cnv=FALSE, file=NULL, width=180, height=180, units="mm", res=300, cluster_fontsize=8, chrom_fontsize=8, legend_fontsize=8, genes.labels.fontsize=8, ...) { @@ -83,9 +85,9 @@ heatmap_CNV <- function(scMuffinList = NULL, genes=NULL, genes.labels=FALSE, mar clust_color[levels(cnv_clustering)==ref_cluster] <- "red" } - column_title_gp <- grid::gpar(col = clust_color, fontsize = cluster_fontsize) - row_title_gp <- grid::gpar(fontsize = chrom_fontsize) - heatmap_legend_param <- list(title_gp = gpar(fontsize = legend_fontsize), labels_gp = grid::gpar(fontsize = legend_fontsize)) + column_title_gp <- gpar(col = clust_color, fontsize = cluster_fontsize) + row_title_gp <- gpar(fontsize = chrom_fontsize) + heatmap_legend_param <- list(title_gp = gpar(fontsize = legend_fontsize), labels_gp = gpar(fontsize = legend_fontsize)) max_abs <- max(abs(cnv), na.rm = T) @@ -94,7 +96,7 @@ heatmap_CNV <- function(scMuffinList = NULL, genes=NULL, genes.labels=FALSE, mar } - temp<- ComplexHeatmap::Heatmap(cnv, cluster_rows = F, cluster_columns = F, show_row_names = F, show_column_names = F, row_split = row_splits, row_title_rot=0, row_gap = unit(0, "mm"), column_split = col_splits, column_gap = unit(0, "mm"), border=TRUE, column_title_gp=column_title_gp, row_title_gp=row_title_gp, name="C", heatmap_legend_param=heatmap_legend_param, right_annotation = ha, ...) + temp <- Heatmap(cnv, cluster_rows = F, cluster_columns = F, show_row_names = F, show_column_names = F, row_split = row_splits, row_title_rot=0, row_gap = unit(0, "mm"), column_split = col_splits, column_gap = unit(0, "mm"), border=TRUE, column_title_gp=column_title_gp, row_title_gp=row_title_gp, name="C", heatmap_legend_param=heatmap_legend_param, right_annotation = ha, ...) draw(temp) if(!is.null(file)){ diff --git a/R/inter_datasets_comparison.R b/R/inter_datasets_comparison.R index ef8f691..c76194b 100644 --- a/R/inter_datasets_comparison.R +++ b/R/inter_datasets_comparison.R @@ -11,8 +11,6 @@ #' @param ... arguments passed to calculate_gs_scores #' @export #' @description Quantify the similarity between clusters of two datasets, on the basis of the average cluster marker expression -#' @import pals -#' @importFrom circlize colorRamp2 #' @return A list with: #' \itemize{ #' \item{clust_sim, matrix of clucster similarity;} diff --git a/R/ora.R b/R/ora.R index 9757f28..44649c6 100644 --- a/R/ora.R +++ b/R/ora.R @@ -16,7 +16,7 @@ ora <- function(wb, bb, gsl, p_adj_method='fdr'){ out$N <- length(wb) + length(bb) out$exp <- out$wb * out$bd / out$N out$id <- rownames(out) - out$p_adj <- stats::p.adjust(out$p, method = p_adj_method) + out$p_adj <- p.adjust(out$p, method = p_adj_method) out$er <- out$wbd / out$exp return(out[, c('id', 'N', 'wb', 'bb', 'bd', 'wbd', 'exp', 'er', 'p', 'p_adj')]) diff --git a/R/ora1gs.R b/R/ora1gs.R index eb58773..e2c43c7 100644 --- a/R/ora1gs.R +++ b/R/ora1gs.R @@ -7,7 +7,7 @@ ora1gs <- function(wb, bb, bd){ wbd <- bd[bd %in% wb] - p_out <- stats::phyper(length(wbd), length(wb), length(bb), length(bd), lower.tail = FALSE) + stats::dhyper(length(wbd), length(wb), length(bb), length(bd)) + p_out <- phyper(length(wbd), length(wb), length(bb), length(bd), lower.tail = FALSE) + dhyper(length(wbd), length(wb), length(bb), length(bd)) return(data.frame(wb=length(wb), bb=length(bb), bd=length(bd), wbd=length(wbd), p=p_out, stringsAsFactors = F)) } diff --git a/R/plot_diff_map.R b/R/plot_diff_map.R index c958dfe..ef81cf4 100644 --- a/R/plot_diff_map.R +++ b/R/plot_diff_map.R @@ -12,7 +12,9 @@ #' @param scale_feature logical, whether to scale col_data #' @param adj_outliers logical, whether to adjust the group.by scores, removing outliers #' @description Produce a scatter plot where cells are placed according to the results of diffusion map analysis and colored by values given in col_data. -#' @import grDevices +#' @importFrom grDevices png +#' @importFrom ggplot2 cut_interval +#' @import pals #' @export plot_diff_map <- function(scMuffinList = NULL, columns=c(1:2), col_data=NULL, file=NULL, width=200, height=200, units="mm", res=300, adj_outliers=TRUE, scale_feature=FALSE, min_cells=50, ...){ @@ -62,18 +64,18 @@ plot_diff_map <- function(scMuffinList = NULL, columns=c(1:2), col_data=NULL, fi #reorder cells according to feature data idx_neg <- col_data <= 0 idx_pos <- col_data > 0 - md <- c(setNames(ggplot2::cut_interval(col_data[idx_neg], 5, dig.lab = 2), names(col_data)[idx_neg]), setNames(ggplot2::cut_interval(col_data[col_data>0], 5, dig.lab = 2), names(col_data)[idx_pos])) + md <- c(setNames(cut_interval(col_data[idx_neg], 5, dig.lab = 2), names(col_data)[idx_neg]), setNames(cut_interval(col_data[col_data>0], 5, dig.lab = 2), names(col_data)[idx_pos])) md <- md[match(names(col_data), names(md))] md <- factor(md, levels=rev(levels(md))) - pal <- pals::brewer.rdylbu(10) + pal <- brewer.rdylbu(10) cols <- pal[as.numeric(md)] }else{ #if only positive values - md <- ggplot2::cut_interval(col_data, 5, dig.lab = 2) + md <- cut_interval(col_data, 5, dig.lab = 2) md <- factor(md, levels=rev(levels(md))) - pal <- rev(pals::brewer.ylorrd(5)) + pal <- rev(brewer.ylorrd(5)) cols <- pal[as.numeric(md)] } @@ -83,7 +85,7 @@ plot_diff_map <- function(scMuffinList = NULL, columns=c(1:2), col_data=NULL, fi md <- as.factor(col_data) n_colors <- length(levels(md)) - pal <- pals::alphabet(n_colors) + pal <- alphabet(n_colors) cols <- pal[as.numeric(as.factor(col_data))] } diff --git a/R/plot_heatmap_dataset_comparison.R b/R/plot_heatmap_dataset_comparison.R index aa7bc25..1ac2614 100644 --- a/R/plot_heatmap_dataset_comparison.R +++ b/R/plot_heatmap_dataset_comparison.R @@ -21,11 +21,11 @@ plot_heatmap_dataset_comparison <- function(dataset_cmp_list=NULL, type="score", if(type=="score"){ M <- dataset_cmp_list$score_matrix - pal <- rev(pals::brewer.rdylbu(11)) + pal <- rev(brewer.rdylbu(11)) name <- "score" }else{ M <- -log10(dataset_cmp_list$significance_matrix) - pal <- pals::brewer.purples(11) + pal <- brewer.purples(11) name <- "sig" } M <- as.matrix(M) @@ -39,14 +39,14 @@ plot_heatmap_dataset_comparison <- function(dataset_cmp_list=NULL, type="score", X_extreme <- c(0, X_extreme) } - col_fun <- circlize::colorRamp2(seq(X_extreme[1], X_extreme[2], length.out = 11), pal) + col_fun <- colorRamp2(seq(X_extreme[1], X_extreme[2], length.out = 11), pal) ra <- factor(gsub("^([^_]+)_.+", "\\1", rownames(M))) - ra <- rowAnnotation(clusters=ra, col=list(clusters=setNames(pals::brewer.accent(length(levels(ra)))[as.numeric(ra)], ra))) + ra <- rowAnnotation(clusters=ra, col=list(clusters=setNames(brewer.accent(length(levels(ra)))[as.numeric(ra)], ra))) ca <- NULL if(show.gs.source){ ca <- factor(gsub("^([^_]+)_.+", "\\1", colnames(M))) - ca <- columnAnnotation(markers=ca, col=list(markers=setNames(pals::brewer.pastel1(length(levels(ca)))[as.numeric(ca)], ca))) + ca <- columnAnnotation(markers=ca, col=list(markers=setNames(brewer.pastel1(length(levels(ca)))[as.numeric(ca)], ca))) } hm <- Heatmap(as.matrix(M), col=col_fun, right_annotation = ra, top_annotation = ca, name = name, ...) diff --git a/R/plot_heatmap_features_by_clusters.R b/R/plot_heatmap_features_by_clusters.R index 489a82d..3e29220 100644 --- a/R/plot_heatmap_features_by_clusters.R +++ b/R/plot_heatmap_features_by_clusters.R @@ -13,9 +13,7 @@ #' @param pal color palette. Default to rev(pals::brewer.rdylbu(10)) (negative values) or pals::brewer.ylorrd(5)) (positive values) #' @param ... further arguments to ComplexHeatmap::Heatmap #' @export -#' @import ComplexHeatmap grDevices -#' @importFrom utils write.table -#' @importFrom pals brewer.rdylbu brewer.ylorrd +#' @import ComplexHeatmap grDevices pals #' @importFrom circlize colorRamp2 plot_heatmap_features_by_clusters <- function(scMuffinList=NULL, feature_source=NULL, partition_id=NULL, significance_matrix=NULL, sig_threshold=0.05, file=NULL, width=180, height=180, units="mm", res=300, scale=TRUE, pal=NULL, ...){ @@ -73,10 +71,10 @@ plot_heatmap_features_by_clusters <- function(scMuffinList=NULL, feature_source= if(is.null(pal)){ X_abs_max <- max(abs(c(min(X, na.rm = TRUE), max(X, na.rm = TRUE)))) if(any(X<0)){ - pal <- rev(pals::brewer.rdylbu(5)) + pal <- rev(brewer.rdylbu(5)) pal <- colorRamp2(c(-X_abs_max, 0, X_abs_max), c(pal[1], pal[3], pal[5])) }else{ - pal <- pals::brewer.ylorrd(5) + pal <- brewer.ylorrd(5) } } @@ -99,7 +97,7 @@ plot_heatmap_features_by_clusters <- function(scMuffinList=NULL, feature_source= X <- t(scale(t(X))) } - h_tot_go <- ComplexHeatmap::Heatmap(X, show_row_names = T, cell_fun = cell_fun_asterisk, col=pal, ...) + h_tot_go <- Heatmap(X, show_row_names = T, cell_fun = cell_fun_asterisk, col=pal, ...) draw(h_tot_go, heatmap_legend_side = "left") if(!is.null(file)){ diff --git a/R/plot_profile_CNV.R b/R/plot_profile_CNV.R index 91d46a4..f71638c 100644 --- a/R/plot_profile_CNV.R +++ b/R/plot_profile_CNV.R @@ -14,6 +14,7 @@ #' @description Plot an heatmap of the CNV. #' @details CNV Profile of every cluster #' @import ComplexHeatmap grDevices grid +#' @importFrom ggplot2 cut_interval #' @export plot_profile_CNV <- function(scMuffinList = NULL, cluster=0, z.score=TRUE, file=NULL, width=300, height=90, units="mm", res=300, cex.points=0.7, cex.lab=0.7, cex.axis=0.7, cex.main=0.7){ @@ -72,11 +73,11 @@ plot_profile_CNV <- function(scMuffinList = NULL, cluster=0, z.score=TRUE, file= ### colors idx_neg <- avg_cl_cnv[, j] <= 0 idx_pos <- avg_cl_cnv[, j] > 0 - md <- c(setNames(ggplot2::cut_interval(avg_cl_cnv[idx_neg, j], 5, dig.lab = 2), rownames(avg_cl_cnv)[idx_neg]), setNames(ggplot2::cut_interval(avg_cl_cnv[idx_pos, j], 5, dig.lab = 2), rownames(avg_cl_cnv)[idx_pos])) + md <- c(setNames(cut_interval(avg_cl_cnv[idx_neg, j], 5, dig.lab = 2), rownames(avg_cl_cnv)[idx_neg]), setNames(cut_interval(avg_cl_cnv[idx_pos, j], 5, dig.lab = 2), rownames(avg_cl_cnv)[idx_pos])) md <- md[match(rownames(avg_cl_cnv), names(md))] md <- factor(md, levels=levels(md)) - col <- rev(pals::brewer.rdylbu(10))[as.numeric(md)] + col <- rev(brewer.rdylbu(10))[as.numeric(md)] chr_start <- c(1, cumsum(ngenes_chr)[-length(ngenes_chr)]+1) chr_end <- cumsum(ngenes_chr) diff --git a/R/plot_umap.R b/R/plot_umap.R index 66cd6fc..4c29a9a 100644 --- a/R/plot_umap.R +++ b/R/plot_umap.R @@ -15,7 +15,9 @@ #' @param res image resolution #' @param text.size text size #' @param ... further arguments for Seurat::FeaturePlot or Seurat::DimPlot -#' @import Seurat graphics ggplot2 +#' @importFrom grDevices png +#' @importFrom ggplot2 annotate theme element_text +#' @importFrom Seurat FeaturePlot DimPlot FetchData #' @description Generate a UMAP visualization #' @export #' @@ -36,20 +38,20 @@ plot_umap <- function(Seu_obj, file="umap.jpg", labels=NULL, group.by=NULL, feat par(mgp=c(2, 0.7, 0)) if(feature_plot){ - res <- Seurat::FeaturePlot(Seu_obj, features = group.by, ...) + res <- FeaturePlot(Seu_obj, features = group.by, ...) }else{ - res <- Seurat::DimPlot(Seu_obj, group.by=group.by, ...) + res <- DimPlot(Seu_obj, group.by=group.by, ...) } if(!is.null(labels)){ - data_plot <- Seurat::FetchData(Seu_obj, vars = c("UMAP_1", "UMAP_2", group.by)) + data_plot <- FetchData(Seu_obj, vars = c("UMAP_1", "UMAP_2", group.by)) labels <- lapply(labels, function(x) paste0(x, collapse = "\n")) #remove sort cluster_xy <- split(data_plot[, 1:2], data_plot[, 3]) cluster_xy <- do.call(rbind, lapply(cluster_xy, colMeans)) - plot(res + ggplot2::annotate(geom="text", x=cluster_xy[, 1], y=cluster_xy[, 2], label=labels, color=lab_color, size=lab_size, fontface = "bold") + theme(text = element_text(size = text.size), axis.text = element_text(size = text.size))) + plot(res + annotate(geom="text", x=cluster_xy[, 1], y=cluster_xy[, 2], label=labels, color=lab_color, size=lab_size, fontface = "bold") + theme(text = element_text(size = text.size), axis.text = element_text(size = text.size))) }else{ plot(res) diff --git a/R/plot_umap_colored_features.R b/R/plot_umap_colored_features.R index 975644c..d9b2bdb 100644 --- a/R/plot_umap_colored_features.R +++ b/R/plot_umap_colored_features.R @@ -13,8 +13,10 @@ #' @param res image resolution #' @param out_dir output directory #' @param ... further arguments to plot_umap -#' @importFrom pals brewer.rdylbu brewer.purples alphabet +#' @import pals #' @importFrom stats setNames +#' @importFrom ggplot2 cut_interval +#' @importFrom Seurat AddMetaData #' @export plot_umap_colored_features <- function(Seu_obj=NULL, scMuffinList=NULL, feature_name=NULL, scale_feature=TRUE, adj_outliers=FALSE, min_cells=10, out_dir="./", width=180, height=180, units="mm", res=300, ...){ @@ -65,21 +67,21 @@ plot_umap_colored_features <- function(Seu_obj=NULL, scMuffinList=NULL, feature_ #reorder cells according to feature data idx_neg <- feature_data_i <= 0 idx_pos <- feature_data_i > 0 - md <- c(setNames(ggplot2::cut_interval(feature_data_i[idx_neg], 5, dig.lab = 2), rownames(feature_data)[idx_neg]), setNames(ggplot2::cut_interval(feature_data_i[feature_data_i>0], 5, dig.lab = 2), rownames(feature_data)[idx_pos])) + md <- c(setNames(cut_interval(feature_data_i[idx_neg], 5, dig.lab = 2), rownames(feature_data)[idx_neg]), setNames(cut_interval(feature_data_i[feature_data_i>0], 5, dig.lab = 2), rownames(feature_data)[idx_pos])) md <- md[match(rownames(feature_data), names(md))] md <- factor(md, levels=rev(levels(md))) #add the metadata - Seu_obj <- Seurat::AddMetaData(Seu_obj, metadata=md, col.name=colnames(feature_data)[i]) - plot_umap(Seu_obj, file = out_file, group.by = colnames(feature_data)[i], cols=(pals::brewer.rdylbu(10)), width=width, height=height, units=units, res=res, ...) + Seu_obj <- AddMetaData(Seu_obj, metadata=md, col.name=colnames(feature_data)[i]) + plot_umap(Seu_obj, file = out_file, group.by = colnames(feature_data)[i], cols=(brewer.rdylbu(10)), width=width, height=height, units=units, res=res, ...) }else{ #if only positive values - md <- ggplot2::cut_interval(feature_data_i, 5, dig.lab = 2) + md <- cut_interval(feature_data_i, 5, dig.lab = 2) md <- factor(md, levels=rev(levels(md))) - Seu_obj <- Seurat::AddMetaData(Seu_obj, metadata=md, col.name=colnames(feature_data)[i]) - plot_umap(Seu_obj, file = out_file, group.by = colnames(feature_data)[i], cols=rev(pals::brewer.ylorrd(5)), width=width, height=height, units=units, res=res, ...) + Seu_obj <- AddMetaData(Seu_obj, metadata=md, col.name=colnames(feature_data)[i]) + plot_umap(Seu_obj, file = out_file, group.by = colnames(feature_data)[i], cols=rev(brewer.ylorrd(5)), width=width, height=height, units=units, res=res, ...) } } @@ -89,8 +91,8 @@ plot_umap_colored_features <- function(Seu_obj=NULL, scMuffinList=NULL, feature_ #if not numeric... n_colors <- length(levels(as.factor(feature_data_i))) - pal <- setNames(pals::alphabet(n_colors), levels(as.factor(feature_data_i))) - Seu_obj <- Seurat::AddMetaData(Seu_obj, metadata=feature_data_i[match(rownames(feature_data), colnames(Seu_obj))], col.name=colnames(feature_data)[i]) + pal <- setNames(alphabet(n_colors), levels(as.factor(feature_data_i))) + Seu_obj <- AddMetaData(Seu_obj, metadata=feature_data_i[match(rownames(feature_data), colnames(Seu_obj))], col.name=colnames(feature_data)[i]) plot_umap(Seu_obj, file = out_file, group.by = colnames(feature_data)[i], cols=pal, width=width, height=height, units=units, res=res, ...) } diff --git a/R/preprocess_for_heatmap_CNV.R b/R/preprocess_for_heatmap_CNV.R deleted file mode 100644 index 1acb060..0000000 --- a/R/preprocess_for_heatmap_CNV.R +++ /dev/null @@ -1,16 +0,0 @@ -#' preprocess_for_heatmap_CNV -#' @param result_cnv object resulting from CNV analyiss -#' @description Bind chromosomes together as a matrix. -#' @return A matrix to be used by the function heatmap_CNV. -#' @export - -preprocess_for_heatmap_CNV <- function(result_cnv=NULL) { - - for(i in 1:length(result_cnv)){ - rownames(result_cnv[[i]]) <- paste0("chr", names(result_cnv)[i], "_", rownames(result_cnv[[i]])) - } - - chr_merged <- do.call(rbind, result_cnv) - - return(chr_merged) -} diff --git a/R/preprocess_object_for_CNV.R b/R/preprocess_object_for_CNV.R index 80a5abb..282b64b 100644 --- a/R/preprocess_object_for_CNV.R +++ b/R/preprocess_object_for_CNV.R @@ -1,54 +1,72 @@ #' preprocess_object_for_CNV #' @param genes_by_cells genes-by-cells input matrix +#' @param gene_ann optional data.frame with gene annotation with mandatory columns "Chromosome", "symbol" and "pos" (genomic location). If NULL gene locations will be collected from org.Hs.eg.db #' @description Preprocessing function to obtain a genomically-ordered list of chromosomes. #' @details The preliminary step consist of annotation, duplicates and NA values removal. #' Then, the matrix is splitted as a list of dataframe, where every dataframe is a chromosome. #' Chromosomes are ordered from 1 to 22 + X +Y, and then re-ordered by start position. #' @return list of genomically-ordered chromosomes -#' @import org.Hs.eg.db +#' @importFrom org.Hs.eg.db org.Hs.egCHRLOC org.Hs.egCHRLOCEND org.Hs.egSYMBOL #' @export -preprocess_object_for_CNV <- function(genes_by_cells) { +preprocess_object_for_CNV <- function(genes_by_cells=NULL, gene_ann=NULL) { # retrieve gene informations - # genes_by_cells=cellObj + use_org <- TRUE + if(!is.null(gene_ann)){ + gene_locations <- gene_ann + cat("User-provided gene annotations...\n") + use_org <- FALSE + if(!all(c("Chromosome", "pos", "symbol") %in% colnames(gene_ann))){ + cat("Can't use the given annotation: proceeding with org.Hs.eg.db...\n") + use_org <- TRUE + } + } - cat("Retrieving gene locations...\n") - gene_locations <- as.data.frame(org.Hs.eg.db::org.Hs.egCHRLOC) - temp <- as.data.frame(org.Hs.eg.db::org.Hs.egCHRLOCEND) - eg2sym <- as.data.frame(org.Hs.eg.db::org.Hs.egSYMBOL) - - gene_locations <- merge(eg2sym, gene_locations, by="gene_id", sort=F) - gene_locations <- merge(gene_locations, temp, by=c("gene_id", "Chromosome"), sort=F) - gene_locations$pos <- apply(abs(gene_locations[, c("start_location", "end_location")]), 1, min) - gene_locations$start_location <- NULL - gene_locations$end_location <- NULL + if(use_org){ + cat("Retrieving gene locations...\n") + gene_locations <- as.data.frame(org.Hs.egCHRLOC) + temp <- as.data.frame(org.Hs.egCHRLOCEND) + eg2sym <- as.data.frame(org.Hs.egSYMBOL) + + gene_locations <- merge(eg2sym, gene_locations, by="gene_id", sort=F) + gene_locations <- merge(gene_locations, temp, by=c("gene_id", "Chromosome"), sort=F) + gene_locations$pos <- apply(abs(gene_locations[, c("start_location", "end_location")]), 1, min) + gene_locations$gene_id <- gene_locations$start_location <- gene_locations$end_location <- NULL + #gene_locations$end_location <- NULL + gene_locations$Chrom_symbol <- apply(gene_locations[, c("Chromosome", "symbol")], 1, function(x) paste0("chr", x[1], "__", x[2])) + gene_locations$Chrom_pos_symbol <- apply(gene_locations[, c("Chromosome", "pos", "symbol")], 1, function(x) paste0("chr", x[1], "__", x[2], "__", x[3])) + gene_locations$Chrom_pos_symbol <- gsub(" +", "", gene_locations$Chrom_pos_symbol ) + } matrix_complete <- merge(gene_locations, genes_by_cells, by.x="symbol", by.y=0, sort=FALSE) # remove chromosome names not in 1:22 and X,Y -- long names, duplicates matrix_complete <- matrix_complete[-which(grepl("_", matrix_complete$Chromosome)), ] - # remove NA values across entrezid - matrix_complete <- matrix_complete[!is.na(matrix_complete$gene_id), ] + # remove NA values across symbol + #matrix_complete <- matrix_complete[!is.na(matrix_complete$gene_id), ] + matrix_complete <- matrix_complete[!is.na(matrix_complete$symbol), ] - # remove duplicates (entregene_id and on row.names('symbol')) - matrix_reduced <- cbind(matrix_complete, mean = rowMeans(matrix_complete[, 5:dim(matrix_complete)[2]])) + # remove duplicates keep the gene with the highest average expression + matrix_reduced <- cbind(matrix_complete, mean = rowMeans(matrix_complete[, 6:ncol(matrix_complete)])) matrix_reduced <- matrix_reduced[order(-matrix_reduced$mean), ] - matrix_reduced <- matrix_reduced[!duplicated(matrix_reduced$gene_id),] - matrix_reduced <- matrix_reduced[!duplicated(matrix_reduced$symbol),] - matrix_reduced$mean <- NULL + #matrix_reduced <- matrix_reduced[!duplicated(matrix_reduced$gene_id),] + matrix_reduced <- matrix_reduced[!duplicated(matrix_reduced$Chrom_symbol), ] #it happens that symbol maps to multiple locations - # change rownames with 'SYMBOL' - mat_rn <- matrix_reduced[,-1] - rownames(mat_rn) <- matrix_reduced[,1] - rm(matrix_reduced) + #add rownames and remove columns + rownames(matrix_reduced) <- matrix_reduced$Chrom_pos_symbol + matrix_reduced$symbol <- NULL + matrix_reduced$Chrom_pos_symbol <- NULL + matrix_reduced$Chrom_symbol <- NULL + matrix_reduced$mean <- NULL + cat("Genes with known location", nrow(matrix_reduced), "\n") # split chromosomes into a list of dataframe - mat_splitted <- split(mat_rn, mat_rn$Chromosome) + matrix_reduced <- split(matrix_reduced, matrix_reduced$Chromosome) # sort chromosomes by position - mat_sorted <- lapply(mat_splitted, function(df){ - df[order(df$pos),] - }) - return(mat_sorted) + matrix_reduced <- lapply(matrix_reduced, function(x) x[order(x$pos), ]) + matrix_reduced <- lapply(matrix_reduced, function(x) x[, -which(colnames(x) %in% c("Chromosome", "pos"))]) + + return(matrix_reduced) } diff --git a/R/process_GTEx_gene_reads.R b/R/process_GTEx_gene_reads.R index 10852e2..58fe03f 100644 --- a/R/process_GTEx_gene_reads.R +++ b/R/process_GTEx_gene_reads.R @@ -10,8 +10,8 @@ #' @description GTEx gene reads file is processed to obtain the average normalized gene expression in a tissue #' @return average normalized gene expression values #' @export -#' @import Seurat -#' @import org.Hs.eg.db +#' @importFrom Seurat NormalizeData +#' @importFrom org.Hs.eg.db org.Hs.egENSEMBL #' @importFrom utils read.delim read.table diff --git a/R/regions_to_genes.R b/R/regions_to_genes.R index 8c5171e..dc9a844 100644 --- a/R/regions_to_genes.R +++ b/R/regions_to_genes.R @@ -9,19 +9,20 @@ regions_to_genes <- function(CNV=NULL, CNV_input=NULL){ regions <- setNames(strsplit(rownames(CNV), "__"), rownames(CNV)) #get all genes occurring in regions - check_regions <- unique(unlist(lapply(regions, function(x) x[-1]))) + check_regions <- unique(unlist(lapply(regions, function(x) x[c(3, 6)]))) ### find region-related genes genes <- unlist(lapply(CNV_input, rownames)) + genes <- setNames(data.frame(do.call(rbind, strsplit(genes, "__")), stringsAsFactors = F), c("chr", "pos", "symbol")) + #genes <- unique(unlist(lapply(genes, function(x) x[3]))) #check that all such genes are available in the rows of the initial input - if(!all(check_regions %in% genes)){ + if(!all(check_regions %in% genes$symbol)){ warning("Something went wrong with the reconstruction of CNV regions. 'regions2genes' was not calculated. The following genes were not found in the input.\n") - print(check_regions[!check_regions %in% genes]) + print(check_regions[!check_regions %in% genes$symbol]) regions_genes <- NULL }else{ - regions_genes <- lapply(regions, function(x) genes[c(which(genes == x[2]):which(genes == x[3]))]) #start:end - regions_genes <- lapply(regions_genes, function(x) setNames(data.frame(do.call(rbind, strsplit(x, "_")), stringsAsFactors = F), c("symbol", "location"))) + regions_genes <- lapply(regions, function(x) genes[c(which(genes$chr == x[1] & genes$symbol == x[3]) : which(genes$chr == x[4] & genes$symbol == x[6])), ]) #start:end } ### assemble the output diff --git a/R/sc_create_null.R b/R/sc_create_null.R index f02173b..89b0730 100755 --- a/R/sc_create_null.R +++ b/R/sc_create_null.R @@ -10,7 +10,7 @@ sc_create_null <- function(genes_by_cells=NULL, bins=NULL, gene_set=NULL, k=100){ - gene_set_idx <- stats::na.omit(match(gene_set, rownames(genes_by_cells))) + gene_set_idx <- na.omit(match(gene_set, rownames(genes_by_cells))) ans <- bins[gene_set_idx] ###bins that contain the gene set #remove the gene of the gene sets from the bins diff --git a/R/sc_data_bin.R b/R/sc_data_bin.R index ea865bd..92cb5ca 100755 --- a/R/sc_data_bin.R +++ b/R/sc_data_bin.R @@ -12,15 +12,15 @@ sc_data_bin <- function(genes_by_cells=NULL, nbins=25, na.rm=FALSE){ - ans <- Matrix::rowSums(genes_by_cells) + ans <- rowSums(genes_by_cells) #if excluding NA every gene will have a different number of cells; therefore mean is used to define the expression bins if(na.rm){ - n <- Matrix::rowSums(genes_by_cells>0) + n <- rowSums(genes_by_cells>0) ans <- ans/n } - ans <- ggplot2::cut_number(ans, n = nbins, labels=FALSE) + ans <- cut_number(ans, n = nbins, labels=FALSE) return(ans) diff --git a/R/transcr_compl.R b/R/transcr_compl.R index d7d2d7e..cdb57af 100644 --- a/R/transcr_compl.R +++ b/R/transcr_compl.R @@ -12,6 +12,7 @@ #'} #' @export #' @importFrom stats glm +#' @importFrom Matrix colSums rowSums transcr_compl <- function(scMuffinList = NULL, min_counts = 5, min_cells=10, min_genes=500){ @@ -25,16 +26,16 @@ transcr_compl <- function(scMuffinList = NULL, min_counts = 5, min_cells=10, min if(any(idx)){ counts[idx] <- 0 } - idx <- Matrix::rowSums(sign(counts)) < min_cells + idx <- rowSums(sign(counts)) < min_cells if(any(idx)){ counts[idx, ] <- 0 #only genes in at least 10 cells } - idx <- Matrix::colSums(sign(counts)) < min_genes + idx <- colSums(sign(counts)) < min_genes if(any(idx)){ counts[, idx] <- 0 #only cells in with least 100 genes } - N <- Matrix::colSums(counts) + N <- colSums(counts) if(sum(N>0) < min_cells){ stop("not enough cells after filtering") } diff --git a/docs/404.html b/docs/404.html index 3ba18cb..96a87b4 100644 --- a/docs/404.html +++ b/docs/404.html @@ -7,8 +7,8 @@ Page not found (404) • scMuffin - - + + Articles • scMuffinArticles • scMuffin @@ -10,7 +10,7 @@ scMuffin - 1.1.1 + 1.1.2