diff --git a/DESCRIPTION b/DESCRIPTION index dc71648..1ea8640 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: raer Type: Package Title: RNA editing tools in R -Version: 1.1.2 +Version: 1.1.3 Authors@R: c( person("Kent", "Riemondy", , "kent.riemondy@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0003-0750-1273")), diff --git a/NEWS.md b/NEWS.md index 861048f..e03df74 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,7 @@ -# raer 1.1.0 + +# raer 1.1.3 + +* Fixed bug in smart-seq2 pileup_cells reporting # raer 1.0.0 diff --git a/R/sc-pileup.R b/R/sc-pileup.R index d98fb77..01984dc 100644 --- a/R/sc-pileup.R +++ b/R/sc-pileup.R @@ -33,10 +33,10 @@ #' `min_variant_reads` parameters if set > 0 specify the number of reads #' from any cell required in order to report a site. E.g. if #' `min_variant_reads` is set to 2, then at least 2 reads (from any cell) must -#' have a variant in order to report the site. Setting `min_depth` and -#' `min_variant_reads` to 0 reports all sites present in the `sites` object. -#' The following options are not enabled for pileup_cells(): -#' `max_mismatch_type`, `homopolymer_len`, and `min_allelic_freq`. +#' have a variant in order to report the site. Setting `min_depth` and +#' `min_variant_reads` to 0 reports all sites present in the `sites` object. +#' The following options are not enabled for pileup_cells(): +#' `max_mismatch_type`, `homopolymer_len`, and `min_allelic_freq`. #' @param umi_tag tag in BAM containing the UMI sequence #' @param cb_tag tag in BAM containing the cell-barcode sequence #' @param return_sce if `TRUE`, data is returned as a SingleCellExperiment, if @@ -82,13 +82,18 @@ #' #' many_small_bams <- rep(bam_fn, 10) #' bam_ids <- LETTERS[1:10] +#' +#' # for unstranded libraries, sites and alleles should be provided on + strand +#' gr <- GRanges(c("2:579:+", "2:625:+", "2:645:+", "2:589:+", "2:601:+")) +#' gr$REF <- c(rep("T", 4), "A") +#' gr$ALT <- c(rep("C", 4), "G") #' #' fp <- FilterParam( #' library_type = "unstranded", #' remove_overlaps = TRUE #' ) #' -#' pileup_cells(many_small_bams, +#' sce <- pileup_cells(many_small_bams, #' sites = gr, #' cell_barcodes = bam_ids, #' cb_tag = NULL, @@ -96,6 +101,7 @@ #' outdir, #' param = fp #' ) +#' sce #' #' unlink(bai) #' @@ -242,8 +248,26 @@ pileup_cells <- function(bamfiles, } return(res) } - - sce <- do.call(rbind, sces) + if(process_nbam){ + sce <- do.call(cbind, sces) + } else { + sce <- do.call(rbind, sces) + } + + to_keep <- rep(TRUE, nrow(sce)) + if(param@min_depth > 0) { + pass_mc <- Matrix::rowSums(assays(sce)$nRef + + assays(sce)$nAlt) >= param@min_depth + to_keep <- to_keep & pass_mc + + } + + if(param@min_variant_reads > 0) { + pass_mv <- Matrix::rowSums(assays(sce)$nAlt) >= param@min_variant_reads + to_keep <- to_keep & pass_mv + } + sce <- sce[to_keep, ] + rownames(sce) <- site_names(rowRanges(sce), allele = TRUE) write_sparray(sce, outfns["counts"], outfns["sites"], outfns["bc"]) diff --git a/src/sc-plp.c b/src/sc-plp.c index 3fd5240..19ac4dd 100644 --- a/src/sc-plp.c +++ b/src/sc-plp.c @@ -143,7 +143,6 @@ typedef struct { char* cb_tag; // cb tag value int remove_overlaps; // 1 if enable overlap detection, 0 if not int min_counts; // if 0 report all sites provide in output sparseMatrix - int site_idx; // counter of sites written, used for row index if min_counts > 0 int is_ss2; // 1 if smart-seq2 mode, 0 otherwise } sc_mplp_conf_t; @@ -428,18 +427,12 @@ static int write_counts(sc_mplp_conf_t* conf, payload_t* pld, const char* seqnam } if (cbdat->ref == 0 && cbdat->alt == 0) continue; - r_idx = conf->min_counts == 0 ? pld->idx : conf->site_idx; + r_idx = pld->idx; ret = fprintf(conf->fps[0], "%d %d %d %d\n", r_idx, c_idx, cbdat->ref, cbdat->alt); if (ret < 0) return (-1); n_rec += 1; } - // write site - if (conf->min_counts > 0) { - ret = fprintf(conf->fps[1], "%d\t%s\t%d\t%d\t%s\t%s\n", pld->idx, seqname, pos + 1, pld->strand, pld->ref, pld->alt); - if (ret < 0) return (-1); - conf->site_idx += 1; - } return (n_rec); } @@ -535,7 +528,7 @@ static int screadaln(void* data, bam1_t* b) { static int process_one_site(const bam_pileup1_t** plp, sc_mplp_conf_t* conf, payload_t* pld, bam_hdr_t* h, int tid, int pos, int nbam, int* n_plp, char* bamid) { - int i, j,n_ref,n_alt; + int i, j, n_ref, n_alt; i = j = n_ref = n_alt = 0; base_counts_t all_cell_bc; memset(&all_cell_bc, 0, sizeof(base_counts_t)); @@ -632,7 +625,7 @@ static int run_scpileup(sc_mplp_conf_t* conf, char* bamfn, char* index, char* ba data = calloc(1, sizeof(mplp_sc_aux_t*)); plp = calloc(1, sizeof(bam_pileup1_t*)); n_plp = calloc(1, sizeof(int)); - + // read the header of bam file and initialize data data[0] = calloc(1, sizeof(mplp_sc_aux_t)); @@ -846,7 +839,6 @@ static int set_sc_mplp_conf(sc_mplp_conf_t* conf, int nbams, conf->remove_overlaps = remove_overlaps; conf->min_mq = (min_mapq < 0) ? 0 : min_mapq; conf->min_counts = min_counts < 0 ? 0 : min_counts; - conf->site_idx = 1; return (0); } @@ -1009,8 +1001,8 @@ SEXP scpileup(SEXP bampaths, SEXP indexes, SEXP query_region, SEXP lst, if (ret >= 0) { // write barcodes file, all barcodes will be reported in matrix write_barcodes(ga.fps[2], bcs, nbcs); - // write sites, if all sites requested, otherwise write during pileup - if (ga.min_counts == 0) write_all_sites(&ga); + // write sites, all sites will be in initial sparse matrix, to be subsequently filtered on R side + write_all_sites(&ga); if (ga.is_ss2) { for (i = 0; i < nbams; ++i) { res = run_scpileup(&ga, cbampaths[i], cindexes[i], bcs[i]); diff --git a/tests/testthat/test_pileup_cells.R b/tests/testthat/test_pileup_cells.R index 0c288bd..6051ea5 100644 --- a/tests/testthat/test_pileup_cells.R +++ b/tests/testthat/test_pileup_cells.R @@ -41,15 +41,6 @@ test_that("basic functionality works", { expect_true(sum(assays(sce)$nRef) > 0) expect_true(sum(assays(sce)$nAlt) > 0) - expect_warning(pileup_cells(bam_fn, gr, - c("non", "existent", "barcodes"), - outdir, - param = FilterParam( - library_type = "fr-second-strand", - min_depth = 1 - ) - )) - # returns empty matrices sce <- pileup_cells(bam_fn, gr, c("non", "existent", "barcodes"), @@ -130,6 +121,21 @@ test_that("if multiple bams are supplied, treat each as a separate cell", { ) expect_true(all(gr == rowRanges(sce))) expect_equal(ncol(sce), 10) + expect_equal(nrow(sce), length(gr)) + expect_true(all(colnames(sce) == LETTERS[1:10])) +}) + +test_that("if multiple bams are supplied, make sure rows aren't duplicated", { + fp <- FilterParam(library_type = "fr-second-strand", min_depth = 1) + sce <- pileup_cells(rep(bam_fn, 10), + sites = gr, + cell_barcodes = LETTERS[1:10], + cb_tag = NULL, + umi_tag = NULL, + outdir, param = fp + ) + expect_equal(ncol(sce), 10) + expect_equal(nrow(sce), 4) expect_true(all(colnames(sce) == LETTERS[1:10])) })