diff --git a/NEWS b/NEWS index d93d82d..4c54986 100755 --- a/NEWS +++ b/NEWS @@ -1,3 +1,10 @@ +Changes in version 2.14.0 +------------------------- + +BUGFIXES + o Fixed large runtime of readAllelicCounts. Thanks luyh-xp (#378). + + Changes in version 2.12.0 ------------------------- diff --git a/R/readAllelicCountsFile.R b/R/readAllelicCountsFile.R index 643bbd7..162de6a 100644 --- a/R/readAllelicCountsFile.R +++ b/R/readAllelicCountsFile.R @@ -68,7 +68,10 @@ readAllelicCountsFile <- function(file, format, zero=NULL) { if (!is.null(zero)) flog.warn("zero ignored for GATK4 files.") con <- file(file, open = "r") header <- .parseGATKHeader(con) - inputCounts <- read.delim(con, header = FALSE, stringsAsFactors = FALSE) + inputCounts <- try(read.delim(con, header = FALSE, stringsAsFactors = FALSE)) + if (is(inputCounts, "try-error")) { + .stopUserError("Error reading AllelicCountsFile ", file) + } colnames(inputCounts) <- strsplit(header$last_line, "\t")[[1]] close(con) gr <- GRanges(seqnames = inputCounts$CONTIG, IRanges(start = inputCounts$POSITION, end = inputCounts$POSITION)) @@ -76,7 +79,9 @@ readAllelicCountsFile <- function(file, format, zero=NULL) { colData = DataFrame(Samples = 1, row.names = header$sid), exptData = list(header = VCFHeader(samples = header$sid))) ref(vcf) <- DNAStringSet(inputCounts$REF_NUCLEOTIDE) - alt(vcf) <- DNAStringSetList(lapply(inputCounts$ALT_NUCLEOTIDE, DNAStringSet)) + #alt(vcf) <- DNAStringSetList(split(inputCounts$ALT_NUCLEOTIDE, seq(length(vcf)))) + alt(vcf) <- DNAStringSetList(as.list(inputCounts$ALT_NUCLEOTIDE)) + info(header(vcf)) <- DataFrame( Number = "0", Type = "Flag", diff --git a/inst/extdata/example_allelic_counts_empty.tsv b/inst/extdata/example_allelic_counts_empty.tsv new file mode 100644 index 0000000..cabb0b0 --- /dev/null +++ b/inst/extdata/example_allelic_counts_empty.tsv @@ -0,0 +1,28 @@ +@HD VN:1.6 +@SQ SN:chr1 LN:249250621 +@SQ SN:chr2 LN:243199373 +@SQ SN:chr3 LN:198022430 +@SQ SN:chr4 LN:191154276 +@SQ SN:chr5 LN:180915260 +@SQ SN:chr6 LN:171115067 +@SQ SN:chr7 LN:159138663 +@SQ SN:chr8 LN:146364022 +@SQ SN:chr9 LN:141213431 +@SQ SN:chr10 LN:135534747 +@SQ SN:chr11 LN:135006516 +@SQ SN:chr12 LN:133851895 +@SQ SN:chr13 LN:115169878 +@SQ SN:chr14 LN:107349540 +@SQ SN:chr15 LN:102531392 +@SQ SN:chr16 LN:90354753 +@SQ SN:chr17 LN:81195210 +@SQ SN:chr18 LN:78077248 +@SQ SN:chr19 LN:59128983 +@SQ SN:chr20 LN:63025520 +@SQ SN:chr21 LN:48129895 +@SQ SN:chr22 LN:51304566 +@SQ SN:chrX LN:155270560 +@SQ SN:chrY LN:59373566 +@SQ SN:chrM LN:16571 +@RG ID:PureCN SM:LIB-02240e4 +CONTIG POSITION REF_COUNT ALT_COUNT REF_NUCLEOTIDE ALT_NUCLEOTIDE diff --git a/tests/testthat/test_readAllelicCountsFile.R b/tests/testthat/test_readAllelicCountsFile.R index 8ff1c37..8ac10eb 100644 --- a/tests/testthat/test_readAllelicCountsFile.R +++ b/tests/testthat/test_readAllelicCountsFile.R @@ -2,6 +2,7 @@ context("readAllelicCountsFile") vcf.file <- system.file("extdata", "example.vcf.gz", package = "PureCN") ac.file <- system.file("extdata", "example_allelic_counts.tsv", package = "PureCN") +ac.empty.file <- system.file("extdata", "example_allelic_counts_empty.tsv", package = "PureCN") vcf <- readVcf(vcf.file, "hg19") data(purecn.example.output) normal.coverage.file <- system.file('extdata', 'example_normal.txt.gz', @@ -12,6 +13,7 @@ tumor.coverage.file <- system.file('extdata', 'example_tumor.txt.gz', test_that("example parses correctly", { vcf_ac <- readAllelicCountsFile(ac.file) expect_equal(as.character(ref(vcf_ac)), as.character(ref(head(vcf,20)))) + expect_error(readAllelicCountsFile(ac.empty.file), "Error reading AllelicCountsFile") }) test_that("parsing -> writing -> parsing works", { @@ -29,4 +31,3 @@ test_that("parsing -> writing -> parsing works", { expect_true(length(ret$results) > 0) file.remove(output.file) }) -