From 17b6c50f8d7ba668d4f4a4dbbd498d7a1bbb6e68 Mon Sep 17 00:00:00 2001 From: Tim Millar Date: Fri, 1 Mar 2024 09:35:58 +1300 Subject: [PATCH 1/2] Avoid holding alignment file handles open #173 --- mchap/application/arguments.py | 19 ++++-- mchap/application/baseclass.py | 51 +++++++--------- mchap/application/find_snvs.py | 105 +++++++++++++++++---------------- mchap/io/bam.py | 4 +- 4 files changed, 90 insertions(+), 89 deletions(-) diff --git a/mchap/application/arguments.py b/mchap/application/arguments.py index 627f6b23..be4a748d 100644 --- a/mchap/application/arguments.py +++ b/mchap/application/arguments.py @@ -717,7 +717,9 @@ def parse_sample_pools(samples, sample_bams, sample_pool_argument): return pools, pool_bams -def parse_sample_bam_paths(bam_argument, sample_pool_argument, read_group_field): +def parse_sample_bam_paths( + bam_argument, sample_pool_argument, read_group_field, reference_path +): """Combine arguments relating to sample bam file specification. Parameters @@ -738,7 +740,7 @@ def parse_sample_bam_paths(bam_argument, sample_pool_argument, read_group_field) textfile = False if len(bam_argument) == 1: try: - pysam.AlignmentFile(bam_argument[0]) + pysam.AlignmentFile(bam_argument[0], reference_filename=reference_path) except ValueError: # not a bam textfile = True @@ -747,7 +749,9 @@ def parse_sample_bam_paths(bam_argument, sample_pool_argument, read_group_field) else: bams = bam_argument if not textfile: - sample_bams = extract_sample_ids(bams, id=read_group_field) + sample_bams = extract_sample_ids( + bams, id=read_group_field, reference_path=reference_path + ) samples = list(sample_bams) # case of plain-text filepath @@ -761,7 +765,9 @@ def parse_sample_bam_paths(bam_argument, sample_pool_argument, read_group_field) if n_fields == 1: # list of bam paths bams = [line[0] for line in lines] - sample_bams = extract_sample_ids(bams, id=read_group_field) + sample_bams = extract_sample_ids( + bams, id=read_group_field, reference_path=reference_path + ) samples = list(sample_bams) elif n_fields == 2: # list of sample-bam pairs @@ -871,7 +877,10 @@ def collect_default_program_arguments(arguments): ) # merge sample specific data with defaults samples, sample_bams = parse_sample_bam_paths( - arguments.bam, arguments.sample_pool[0], arguments.read_group_field[0] + arguments.bam, + arguments.sample_pool[0], + arguments.read_group_field[0], + reference_path=arguments.reference[0], ) sample_ploidy = parse_sample_value_map( arguments.ploidy[0], diff --git a/mchap/application/baseclass.py b/mchap/application/baseclass.py index 1d946d95..17915457 100644 --- a/mchap/application/baseclass.py +++ b/mchap/application/baseclass.py @@ -99,16 +99,6 @@ def format_fields(self): def loci(self): raise NotImplementedError() - def alignment_files(self): - out = {} - for pool, pairs in self.sample_bams.items(): - pairs = [ - (sample, pysam.AlignmentFile(path, reference_filename=self.ref)) - for sample, path in pairs - ] - out[pool] = pairs - return out - def header_contigs(self): with pysam.VariantFile(self.vcf) as f: contigs = f.header.contigs.values() @@ -135,7 +125,7 @@ def header(self): header = meta_fields + contigs + filters + info_fields + format_fields + columns return [str(line) for line in header] - def _locus_data(self, locus, alignment_files): + def _locus_data(self, locus, sample_bams): """Generate a LocusAssemblyData object for a given locus to be populated with data relating to a single vcf record. """ @@ -144,7 +134,7 @@ def _locus_data(self, locus, alignment_files): return LocusAssemblyData( locus=locus, samples=self.samples, - sample_bams=alignment_files, + sample_bams=sample_bams, sample_ploidy=self.sample_ploidy, sample_inbreeding=self.sample_inbreeding, infofields=infofields, @@ -186,19 +176,22 @@ def encode_sample_reads(self, data): # of bam sample-path pairs. pairs = data.sample_bams[sample] read_chars, read_quals = [], [] - for name, alignment_file in pairs: - chars, quals = extract_read_variants( - data.locus, - alignment_file=alignment_file, - samples=name, - id=self.read_group_field, - min_quality=self.mapping_quality, - skip_duplicates=self.skip_duplicates, - skip_qcfail=self.skip_qcfail, - skip_supplementary=self.skip_supplementary, - )[name] - read_chars.append(chars) - read_quals.append(quals) + for name, path in pairs: + with pysam.AlignmentFile( + path, reference_filename=self.ref + ) as alignment_file: + chars, quals = extract_read_variants( + data.locus, + alignment_file=alignment_file, + samples=name, + id=self.read_group_field, + min_quality=self.mapping_quality, + skip_duplicates=self.skip_duplicates, + skip_qcfail=self.skip_qcfail, + skip_supplementary=self.skip_supplementary, + )[name] + read_chars.append(chars) + read_quals.append(quals) read_chars = np.concatenate(read_chars) read_quals = np.concatenate(read_quals) @@ -343,7 +336,7 @@ def sumarise_vcf_record(self, data): data.infodata["AOP"] = prob_occurring.round(self.precision) return data - def call_locus(self, locus, alignment_files): + def call_locus(self, locus, sample_bams): """Call samples at a locus and formats resulting data into a VCF record line. @@ -366,7 +359,7 @@ def call_locus(self, locus, alignment_files): VCF variant line. """ - data = self._locus_data(locus, alignment_files) + data = self._locus_data(locus, sample_bams) self.encode_sample_reads(data) self.call_sample_genotypes(data) self.sumarise_sample_genotypes(data) @@ -374,11 +367,9 @@ def call_locus(self, locus, alignment_files): return data.format_vcf_record() def _assemble_loci_wrapped(self, loci): - # create single set of alignment files to use for every locus in the job - alignment_files = self.alignment_files() for locus in loci: try: - result = self.call_locus(locus, alignment_files) + result = self.call_locus(locus, self.sample_bams) except Exception as e: message = LOCUS_ASSEMBLY_ERROR.format( name=locus.name, diff --git a/mchap/application/find_snvs.py b/mchap/application/find_snvs.py index 367bf1bb..111bcac4 100644 --- a/mchap/application/find_snvs.py +++ b/mchap/application/find_snvs.py @@ -215,65 +215,69 @@ def _count_alleles(zeros, alleles): return -def bam_samples(alignment_files, tag="SM"): - out = [None] * len(alignment_files) - for i, bam in enumerate(alignment_files): - read_groups = bam.header["RG"] - sample_id = read_groups[0][tag] - if len(read_groups) > 1: - for rg in read_groups: - if rg[tag] != sample_id: - raise ValueError( - "Expected one sample per bam but found {} and {} in {}".format( - sample_id, rg[tag], bam.filename.decode() +def bam_samples(bam_paths, reference_path, tag="SM"): + out = [None] * len(bam_paths) + for i, path in enumerate(bam_paths): + with pysam.AlignmentFile(path, reference_filename=reference_path) as bam: + read_groups = bam.header["RG"] + sample_id = read_groups[0][tag] + if len(read_groups) > 1: + for rg in read_groups: + if rg[tag] != sample_id: + raise ValueError( + "Expected one sample per bam but found {} and {} in {}".format( + sample_id, rg[tag], bam.filename.decode() + ) ) - ) - out[i] = sample_id + out[i] = sample_id return np.array(out) def bam_region_depths( - alignment_files, + bam_paths, + reference_path, contig, start, stop, dtype=np.int64, **kwargs, ): - n_samples = len(alignment_files) + n_samples = len(bam_paths) n_pos = stop - start shape = (n_pos, n_samples, 4) depths = np.zeros(shape, dtype=dtype) - for j, bam in enumerate(alignment_files): - for column in bam.pileup( - contig=contig, - start=start, - stop=stop, - truncate=True, - multiple_iterators=False, - **kwargs, - ): - # if start <= column.pos < stop: - i = column.pos - start - alleles = column.get_query_sequences() - if isinstance(alleles, list): - alleles = bases_to_indices(alleles) - _count_alleles(depths[i, j], alleles) + for j, path in enumerate(bam_paths): + with pysam.AlignmentFile(path, reference_filename=reference_path) as bam: + for column in bam.pileup( + contig=contig, + start=start, + stop=stop, + truncate=True, + multiple_iterators=False, + **kwargs, + ): + # if start <= column.pos < stop: + i = column.pos - start + alleles = column.get_query_sequences() + if isinstance(alleles, list): + alleles = bases_to_indices(alleles) + _count_alleles(depths[i, j], alleles) return depths def write_vcf_header( - command, reference, info_fields=None, format_fields=None, samples=None + command, reference_path, info_fields=None, format_fields=None, samples=None ): vcfversion_header = str(headermeta.fileformat("v4.3")) date_header = str(headermeta.filedate()) source_header = str(headermeta.source()) command_header = str(headermeta.commandline(command)) - reference_header = str(headermeta.reference(reference.filename.decode())) - contig_header = "\n".join( - str(headermeta.ContigHeader(s, i)) - for s, i in zip(reference.references, reference.lengths) - ) + with pysam.FastaFile(reference_path) as reference: + reference_header = str(headermeta.reference(reference.filename.decode())) + contig_header = "\n".join( + str(headermeta.ContigHeader(s, i)) + for s, i in zip(reference.references, reference.lengths) + ) components = [ vcfversion_header, date_header, @@ -400,8 +404,8 @@ def write_vcf_block( contig, start, stop, - reference, - alignment_files, + reference_path, + bam_paths, # sample_ploidy, # sample_inbreeding, # base_error_rate, @@ -420,10 +424,12 @@ def write_vcf_block( assert start < stop variant_position = np.arange(start, stop) variant_contig = np.full(len(variant_position), contig) - variant_reference = np.array(list(reference.fetch(contig, start, stop).upper())) + with pysam.FastaFile(reference_path) as reference: + variant_reference = np.array(list(reference.fetch(contig, start, stop).upper())) variant_reference_index = bases_to_indices(variant_reference) allele_depth = bam_region_depths( - alignment_files, + bam_paths, + reference_path, contig, start, stop, @@ -587,9 +593,8 @@ def main(command): bed = pd.read_table(bed_path, header=None)[[0, 1, 2]] bed.columns = ["contig", "start", "stop"] reference_path = args.reference[0] - reference = pysam.Fastafile(reference_path) samples, sample_bams = arguments.parse_sample_bam_paths( - args.bam, None, args.read_group_field[0] + args.bam, None, args.read_group_field[0], reference_path=reference_path ) # sample_ploidy = arguments.parse_sample_value_map( # args.ploidy[0], @@ -611,13 +616,9 @@ def main(command): # create alignment file objects and reuse them throughout # this is important for cram performance! # also pass reference name explicitly for robustness - alignment_files = [ - pysam.AlignmentFile(path, reference_filename=reference_path) - for path in bam_paths - ] - samples_found = bam_samples(alignment_files, tag=args.read_group_field[0]).astype( - "U" - ) + samples_found = bam_samples( + bam_paths, reference_path, tag=args.read_group_field[0] + ).astype("U") mismatch = samples_found != samples if np.any(mismatch): raise IOError( @@ -630,7 +631,7 @@ def main(command): format_fields = [formatfields.GT, formatfields.AD] write_vcf_header( command, - reference, + reference_path, samples=samples, info_fields=info_fields, format_fields=format_fields, @@ -641,8 +642,8 @@ def main(command): interval.contig, interval.start, interval.stop, - reference, - alignment_files, + reference_path, + bam_paths, # sample_ploidy, # sample_inbreeding, # base_error_rate=args.base_error_rate[0], diff --git a/mchap/io/bam.py b/mchap/io/bam.py index 13120f16..77879b75 100644 --- a/mchap/io/bam.py +++ b/mchap/io/bam.py @@ -19,7 +19,7 @@ ID_TAGS = {"ID", "SM"} -def extract_sample_ids(bam_paths, id="SM"): +def extract_sample_ids(bam_paths, id="SM", reference_path=None): """Extract sample id's from a list of bam files. Parameters @@ -38,7 +38,7 @@ def extract_sample_ids(bam_paths, id="SM"): assert id in ID_TAGS data = {} for path in bam_paths: - bam = pysam.AlignmentFile(path) + bam = pysam.AlignmentFile(path, reference_filename=reference_path) # allow multiple read groups for a single sample within a bam file # but guard against duplicate sample identifier across multiple bams bam_data = {read_group[id]: path for read_group in bam.header["RG"]} From 1e97aa5fae7e6f338e8d3b74602784829c959e45 Mon Sep 17 00:00:00 2001 From: Tim Millar Date: Fri, 1 Mar 2024 11:38:04 +1300 Subject: [PATCH 2/2] Bump version to v0.9.2 --- CHANGELOG.md | 6 ++++++ Dockerfile | 2 +- docs/assemble.rst | 2 +- docs/call.rst | 2 +- mchap/tests/test_io/data/mock.input.frequencies.vcf | 2 +- mchap/tests/test_io/data/simple.output.assemble.vcf | 2 +- mchap/tests/test_io/data/simple.output.basis.minad2.vcf | 2 +- mchap/tests/test_io/data/simple.output.basis.minaf0.3.vcf | 2 +- .../test_io/data/simple.output.basis.minaf0.minad0.vcf | 2 +- .../test_io/data/simple.output.basis.mixed_depth.mad10.vcf | 2 +- .../test_io/data/simple.output.basis.mixed_depth.maf0.1.vcf | 2 +- .../tests/test_io/data/simple.output.basis.mixed_depth.vcf | 2 +- mchap/tests/test_io/data/simple.output.basis.vcf | 2 +- mchap/tests/test_io/data/simple.output.call-exact.vcf | 2 +- mchap/tests/test_io/data/simple.output.call.vcf | 2 +- .../test_io/data/simple.output.deep.assemble.pools.vcf | 2 +- mchap/tests/test_io/data/simple.output.deep.assemble.vcf | 2 +- .../test_io/data/simple.output.deep.call-exact.pools.vcf | 2 +- mchap/tests/test_io/data/simple.output.deep.call.pools.vcf | 2 +- .../data/simple.output.mixed_depth.assemble.frequencies.vcf | 2 +- .../data/simple.output.mixed_depth.assemble.occurrence.vcf | 2 +- .../simple.output.mixed_depth.assemble.pool.frequencies.vcf | 2 +- .../test_io/data/simple.output.mixed_depth.assemble.vcf | 2 +- ...xed_depth.call-exact.frequencies.posteriors.skiprare.vcf | 2 +- ...mple.output.mixed_depth.call-exact.frequencies.prior.vcf | 2 +- ...e.output.mixed_depth.call-exact.frequencies.skiprare.vcf | 2 +- .../simple.output.mixed_depth.call-exact.frequencies.vcf | 2 +- .../simple.output.mixed_depth.call-exact.likelihoods.vcf | 2 +- .../simple.output.mixed_depth.call-exact.occurrence.vcf | 2 +- .../simple.output.mixed_depth.call-exact.posteriors.vcf | 2 +- .../test_io/data/simple.output.mixed_depth.call-exact.vcf | 2 +- .../simple.output.mixed_depth.call.frequencies.prior.vcf | 2 +- .../simple.output.mixed_depth.call.frequencies.skiprare.vcf | 2 +- .../data/simple.output.mixed_depth.call.frequencies.vcf | 2 +- .../data/simple.output.mixed_depth.call.likelihoods.vcf | 2 +- .../data/simple.output.mixed_depth.call.occurrence.vcf | 2 +- .../data/simple.output.mixed_depth.call.posteriors.vcf | 2 +- mchap/tests/test_io/data/simple.output.mixed_depth.call.vcf | 2 +- .../test_io/data/simple.output.nullallele.assemble.vcf | 2 +- mchap/version.py | 2 +- 40 files changed, 45 insertions(+), 39 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c5c15c4e..db430ef3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,12 @@ ## Unreleased +## Beta v0.9.2 + +Bug Fixes: +- Avoid holding alignment file handles open #173 + + ## Beta v0.9.1 New Features: diff --git a/Dockerfile b/Dockerfile index 2c4c5c91..60ab1d6c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -9,7 +9,7 @@ RUN apt-get update \ RUN git clone https://github.com/PlantandFoodResearch/MCHap.git \ && cd MCHap \ - && git checkout v0.9.1 \ + && git checkout v0.9.2 \ && pip install -r requirements.txt \ && python3 setup.py sdist \ && python3 -m pip install dist/mchap-*tar.gz diff --git a/docs/assemble.rst b/docs/assemble.rst index e90c64c6..700eb29f 100644 --- a/docs/assemble.rst +++ b/docs/assemble.rst @@ -3,7 +3,7 @@ MCHap assemble De novo assembly of micro-haplotypes. -*(Last updated for MCHap version 0.9.1)* +*(Last updated for MCHap version 0.9.2)* Background ---------- diff --git a/docs/call.rst b/docs/call.rst index 7b9d2d6b..01500100 100644 --- a/docs/call.rst +++ b/docs/call.rst @@ -3,7 +3,7 @@ MCHap call Calling genotypes from known haplotypes. -*(Last updated for MCHap version 0.9.1)* +*(Last updated for MCHap version 0.9.2)* Background ---------- diff --git a/mchap/tests/test_io/data/mock.input.frequencies.vcf b/mchap/tests/test_io/data/mock.input.frequencies.vcf index 32b635f3..1356f6e2 100644 --- a/mchap/tests/test_io/data/mock.input.frequencies.vcf +++ b/mchap/tests/test_io/data/mock.input.frequencies.vcf @@ -1,6 +1,6 @@ ##fileformat=VCFv4.3 ##fileDate=20220406 -##source=mchap v0.9.1 +##source=mchap v0.9.2 ##phasing=None ##commandline="mockup" ##randomseed=11 diff --git a/mchap/tests/test_io/data/simple.output.assemble.vcf b/mchap/tests/test_io/data/simple.output.assemble.vcf index 49a27884..7064b0b0 100644 --- a/mchap/tests/test_io/data/simple.output.assemble.vcf +++ b/mchap/tests/test_io/data/simple.output.assemble.vcf @@ -1,6 +1,6 @@ ##fileformat=VCFv4.3 ##fileDate=20230710 -##source=mchap v0.9.1 +##source=mchap v0.9.2 ##phasing=None ##commandline="mchap assemble --bam simple.sample1.bam simple.sample2.bam simple.sample3.bam --ploidy 4 --targets simple.bed.gz --variants simple.vcf.gz --reference simple.fasta --mcmc-steps 500 --mcmc-burn 100 --mcmc-seed 11" ##randomseed=11 diff --git a/mchap/tests/test_io/data/simple.output.basis.minad2.vcf b/mchap/tests/test_io/data/simple.output.basis.minad2.vcf index 02cc5893..55514f93 100644 --- a/mchap/tests/test_io/data/simple.output.basis.minad2.vcf +++ b/mchap/tests/test_io/data/simple.output.basis.minad2.vcf @@ -1,6 +1,6 @@ ##fileformat=VCFv4.3 ##fileDate=20230710 -##source=mchap v0.9.1 +##source=mchap v0.9.2 ##commandline="mchap find-snvs --targets simple.bed --reference simple.fasta --bam simple.sample1.bam simple.sample2.bam simple.sample3.bam --ind-mad 2" ##reference=file:simple.fasta ##contig= diff --git a/mchap/tests/test_io/data/simple.output.basis.minaf0.3.vcf b/mchap/tests/test_io/data/simple.output.basis.minaf0.3.vcf index ab165b10..4045a648 100644 --- a/mchap/tests/test_io/data/simple.output.basis.minaf0.3.vcf +++ b/mchap/tests/test_io/data/simple.output.basis.minaf0.3.vcf @@ -1,6 +1,6 @@ ##fileformat=VCFv4.3 ##fileDate=20230710 -##source=mchap v0.9.1 +##source=mchap v0.9.2 ##commandline="mchap find-snvs --targets simple.bed --reference simple.fasta --bam simple.sample1.bam simple.sample2.bam simple.sample3.bam --ind-maf 0.3" ##reference=file:simple.fasta ##contig= diff --git a/mchap/tests/test_io/data/simple.output.basis.minaf0.minad0.vcf b/mchap/tests/test_io/data/simple.output.basis.minaf0.minad0.vcf index 0fbc6492..0c37a4e5 100644 --- a/mchap/tests/test_io/data/simple.output.basis.minaf0.minad0.vcf +++ b/mchap/tests/test_io/data/simple.output.basis.minaf0.minad0.vcf @@ -1,6 +1,6 @@ ##fileformat=VCFv4.3 ##fileDate=20230710 -##source=mchap v0.9.1 +##source=mchap v0.9.2 ##commandline="mchap find-snvs --targets simple.bed --reference simple.fasta --bam simple.sample1.bam simple.sample2.bam simple.sample3.bam --ind-maf 0.0 --ind-mad 0" ##reference=file:simple.fasta ##contig= diff --git a/mchap/tests/test_io/data/simple.output.basis.mixed_depth.mad10.vcf b/mchap/tests/test_io/data/simple.output.basis.mixed_depth.mad10.vcf index ee393858..f8bb822b 100644 --- a/mchap/tests/test_io/data/simple.output.basis.mixed_depth.mad10.vcf +++ b/mchap/tests/test_io/data/simple.output.basis.mixed_depth.mad10.vcf @@ -1,6 +1,6 @@ ##fileformat=VCFv4.3 ##fileDate=20230710 -##source=mchap v0.9.1 +##source=mchap v0.9.2 ##commandline="mchap find-snvs --targets simple.bed --reference simple.fasta --bam simple.sample1.bam simple.sample2.deep.bam simple.sample3.bam --ind-maf 0 --ind-mad 0 --mad 10" ##reference=file:simple.fasta ##contig= diff --git a/mchap/tests/test_io/data/simple.output.basis.mixed_depth.maf0.1.vcf b/mchap/tests/test_io/data/simple.output.basis.mixed_depth.maf0.1.vcf index f8c2480d..3a2b20d5 100644 --- a/mchap/tests/test_io/data/simple.output.basis.mixed_depth.maf0.1.vcf +++ b/mchap/tests/test_io/data/simple.output.basis.mixed_depth.maf0.1.vcf @@ -1,6 +1,6 @@ ##fileformat=VCFv4.3 ##fileDate=20230710 -##source=mchap v0.9.1 +##source=mchap v0.9.2 ##commandline="mchap find-snvs --targets simple.bed --reference simple.fasta --bam simple.sample1.bam simple.sample2.deep.bam simple.sample3.bam --ind-maf 0 --ind-mad 0 --maf 0.1" ##reference=file:simple.fasta ##contig= diff --git a/mchap/tests/test_io/data/simple.output.basis.mixed_depth.vcf b/mchap/tests/test_io/data/simple.output.basis.mixed_depth.vcf index 8e453e97..881dc56d 100644 --- a/mchap/tests/test_io/data/simple.output.basis.mixed_depth.vcf +++ b/mchap/tests/test_io/data/simple.output.basis.mixed_depth.vcf @@ -1,6 +1,6 @@ ##fileformat=VCFv4.3 ##fileDate=20230710 -##source=mchap v0.9.1 +##source=mchap v0.9.2 ##commandline="mchap find-snvs --targets simple.bed --reference simple.fasta --bam simple.sample1.bam simple.sample2.deep.bam simple.sample3.bam" ##reference=file:simple.fasta ##contig= diff --git a/mchap/tests/test_io/data/simple.output.basis.vcf b/mchap/tests/test_io/data/simple.output.basis.vcf index 69cc0413..b254f080 100644 --- a/mchap/tests/test_io/data/simple.output.basis.vcf +++ b/mchap/tests/test_io/data/simple.output.basis.vcf @@ -1,6 +1,6 @@ ##fileformat=VCFv4.3 ##fileDate=20230710 -##source=mchap v0.9.1 +##source=mchap v0.9.2 ##commandline="mchap find-snvs --targets simple.bed --reference simple.fasta --bam simple.sample1.bam simple.sample2.bam simple.sample3.bam" ##reference=file:simple.fasta ##contig= diff --git a/mchap/tests/test_io/data/simple.output.call-exact.vcf b/mchap/tests/test_io/data/simple.output.call-exact.vcf index 2220dfc1..5c0fbd7c 100644 --- a/mchap/tests/test_io/data/simple.output.call-exact.vcf +++ b/mchap/tests/test_io/data/simple.output.call-exact.vcf @@ -1,6 +1,6 @@ ##fileformat=VCFv4.3 ##fileDate=20230710 -##source=mchap v0.9.1 +##source=mchap v0.9.2 ##phasing=None ##commandline="mchap call-exact --bam simple.sample1.bam simple.sample2.bam simple.sample3.bam --ploidy 4 --haplotypes simple.output.assemble.vcf" ##randomseed=None diff --git a/mchap/tests/test_io/data/simple.output.call.vcf b/mchap/tests/test_io/data/simple.output.call.vcf index 8a66f45a..afcde860 100644 --- a/mchap/tests/test_io/data/simple.output.call.vcf +++ b/mchap/tests/test_io/data/simple.output.call.vcf @@ -1,6 +1,6 @@ ##fileformat=VCFv4.3 ##fileDate=20230710 -##source=mchap v0.9.1 +##source=mchap v0.9.2 ##phasing=None ##commandline="mchap call --bam simple.sample1.bam simple.sample2.bam simple.sample3.bam --ploidy 4 --haplotypes simple.output.assemble.vcf --mcmc-steps 500 --mcmc-burn 100 --mcmc-seed 11" ##randomseed=11 diff --git a/mchap/tests/test_io/data/simple.output.deep.assemble.pools.vcf b/mchap/tests/test_io/data/simple.output.deep.assemble.pools.vcf index 9d1b89af..52cd19b7 100644 --- a/mchap/tests/test_io/data/simple.output.deep.assemble.pools.vcf +++ b/mchap/tests/test_io/data/simple.output.deep.assemble.pools.vcf @@ -1,6 +1,6 @@ ##fileformat=VCFv4.3 ##fileDate=20210420 -##source=mchap v0.9.1 +##source=mchap v0.9.2 ##phasing=None ##commandline="mchap assemble --bam simple.sample1.deep.bam simple.sample2.deep.bam simple.sample3.deep.bam --ploidy simple.pools-ploidy --targets simple.bed.gz --variants simple.vcf.gz --reference simple.fasta --mcmc-steps 500 --mcmc-burn 100 --mcmc-seed 11 --sample-pool simple.pools" ##randomseed=11 diff --git a/mchap/tests/test_io/data/simple.output.deep.assemble.vcf b/mchap/tests/test_io/data/simple.output.deep.assemble.vcf index 020e5796..b47fc0e2 100644 --- a/mchap/tests/test_io/data/simple.output.deep.assemble.vcf +++ b/mchap/tests/test_io/data/simple.output.deep.assemble.vcf @@ -1,6 +1,6 @@ ##fileformat=VCFv4.3 ##fileDate=20210420 -##source=mchap v0.9.1 +##source=mchap v0.9.2 ##phasing=None ##commandline="mchap assemble --bam simple.sample1.deep.bam simple.sample2.deep.bam simple.sample3.deep.bam --ploidy 4 --targets simple.bed.gz --variants simple.vcf.gz --reference simple.fasta --mcmc-steps 500 --mcmc-burn 100 --mcmc-seed 11 --use-assembly-posteriors" ##randomseed=11 diff --git a/mchap/tests/test_io/data/simple.output.deep.call-exact.pools.vcf b/mchap/tests/test_io/data/simple.output.deep.call-exact.pools.vcf index 51577a1d..75a6af84 100644 --- a/mchap/tests/test_io/data/simple.output.deep.call-exact.pools.vcf +++ b/mchap/tests/test_io/data/simple.output.deep.call-exact.pools.vcf @@ -1,6 +1,6 @@ ##fileformat=VCFv4.3 ##fileDate=20210420 -##source=mchap v0.9.1 +##source=mchap v0.9.2 ##phasing=None ##commandline="mchap call-exact --bam simple.sample1.deep.bam simple.sample2.deep.bam simple.sample3.deep.bam --ploidy simple.pools-ploidy --haplotypes simple.output.assemble.vcf --reference simple.fasta --sample-pool simple.pools" ##randomseed=None diff --git a/mchap/tests/test_io/data/simple.output.deep.call.pools.vcf b/mchap/tests/test_io/data/simple.output.deep.call.pools.vcf index bfcae450..4c5d4391 100644 --- a/mchap/tests/test_io/data/simple.output.deep.call.pools.vcf +++ b/mchap/tests/test_io/data/simple.output.deep.call.pools.vcf @@ -1,6 +1,6 @@ ##fileformat=VCFv4.3 ##fileDate=20210420 -##source=mchap v0.9.1 +##source=mchap v0.9.2 ##phasing=None ##commandline="mchap call --bam simple.sample1.deep.bam simple.sample2.deep.bam simple.sample3.deep.bam --ploidy simple.pools-ploidy --haplotypes simple.output.assemble.vcf --reference simple.fasta --mcmc-steps 500 --mcmc-burn 100 --mcmc-seed 11 --sample-pool simple.pools" ##randomseed=11 diff --git a/mchap/tests/test_io/data/simple.output.mixed_depth.assemble.frequencies.vcf b/mchap/tests/test_io/data/simple.output.mixed_depth.assemble.frequencies.vcf index 325cf7a4..a5798919 100644 --- a/mchap/tests/test_io/data/simple.output.mixed_depth.assemble.frequencies.vcf +++ b/mchap/tests/test_io/data/simple.output.mixed_depth.assemble.frequencies.vcf @@ -1,6 +1,6 @@ ##fileformat=VCFv4.3 ##fileDate=20220406 -##source=mchap v0.9.1 +##source=mchap v0.9.2 ##phasing=None ##commandline="mchap assemble --bam simple.sample1.bam simple.sample2.deep.bam simple.sample3.bam --ploidy 4 --targets simple.bed.gz --variants simple.vcf.gz --reference simple.fasta --mcmc-steps 500 --mcmc-burn 100 --mcmc-seed 11 --report AFP" ##randomseed=11 diff --git a/mchap/tests/test_io/data/simple.output.mixed_depth.assemble.occurrence.vcf b/mchap/tests/test_io/data/simple.output.mixed_depth.assemble.occurrence.vcf index 9b63a291..ef0e651e 100644 --- a/mchap/tests/test_io/data/simple.output.mixed_depth.assemble.occurrence.vcf +++ b/mchap/tests/test_io/data/simple.output.mixed_depth.assemble.occurrence.vcf @@ -1,6 +1,6 @@ ##fileformat=VCFv4.3 ##fileDate=20220406 -##source=mchap v0.9.1 +##source=mchap v0.9.2 ##phasing=None ##commandline="mchap assemble --bam simple.sample1.bam simple.sample2.deep.bam simple.sample3.bam --ploidy 4 --targets simple.bed.gz --variants simple.vcf.gz --reference simple.fasta --mcmc-steps 500 --mcmc-burn 100 --mcmc-seed 11 --report AOP" ##randomseed=11 diff --git a/mchap/tests/test_io/data/simple.output.mixed_depth.assemble.pool.frequencies.vcf b/mchap/tests/test_io/data/simple.output.mixed_depth.assemble.pool.frequencies.vcf index a262619d..e5277478 100644 --- a/mchap/tests/test_io/data/simple.output.mixed_depth.assemble.pool.frequencies.vcf +++ b/mchap/tests/test_io/data/simple.output.mixed_depth.assemble.pool.frequencies.vcf @@ -1,6 +1,6 @@ ##fileformat=VCFv4.3 ##fileDate=20220412 -##source=mchap v0.9.1 +##source=mchap v0.9.2 ##phasing=None ##commandline="mchap assemble --bam simple.sample1.bam simple.sample2.deep.bam simple.sample3.bam --ploidy 4 --targets simple.bed.gz --variants simple.vcf.gz --reference simple.fasta --mcmc-steps 500 --mcmc-burn 100 --mcmc-seed 11 --read-group-field POOL --report AFP" ##randomseed=11 diff --git a/mchap/tests/test_io/data/simple.output.mixed_depth.assemble.vcf b/mchap/tests/test_io/data/simple.output.mixed_depth.assemble.vcf index 221c27d4..26e5c2c6 100644 --- a/mchap/tests/test_io/data/simple.output.mixed_depth.assemble.vcf +++ b/mchap/tests/test_io/data/simple.output.mixed_depth.assemble.vcf @@ -1,6 +1,6 @@ ##fileformat=VCFv4.3 ##fileDate=20210420 -##source=mchap v0.9.1 +##source=mchap v0.9.2 ##phasing=None ##commandline="mchap assemble --bam simple.sample1.bam simple.sample2.deep.bam simple.sample3.bam --ploidy 4 --targets simple.bed.gz --variants simple.vcf.gz --reference simple.fasta --mcmc-steps 500 --mcmc-burn 100 --mcmc-seed 11" ##randomseed=11 diff --git a/mchap/tests/test_io/data/simple.output.mixed_depth.call-exact.frequencies.posteriors.skiprare.vcf b/mchap/tests/test_io/data/simple.output.mixed_depth.call-exact.frequencies.posteriors.skiprare.vcf index 9dcfdc90..c502e081 100644 --- a/mchap/tests/test_io/data/simple.output.mixed_depth.call-exact.frequencies.posteriors.skiprare.vcf +++ b/mchap/tests/test_io/data/simple.output.mixed_depth.call-exact.frequencies.posteriors.skiprare.vcf @@ -1,6 +1,6 @@ ##fileformat=VCFv4.3 ##fileDate=20220406 -##source=mchap v0.9.1 +##source=mchap v0.9.2 ##phasing=None ##commandline="/home/cfltxm/Software/miniconda3/envs/mchap/bin/mchap call-exact --bam simple.sample1.bam simple.sample2.deep.bam simple.sample3.bam --ploidy 4 --haplotypes mock.input.frequencies.vcf --prior-frequencies AFP --filter-input-haplotypes AFP>=0.1 --report AFP GP" ##randomseed=None diff --git a/mchap/tests/test_io/data/simple.output.mixed_depth.call-exact.frequencies.prior.vcf b/mchap/tests/test_io/data/simple.output.mixed_depth.call-exact.frequencies.prior.vcf index 09425b3d..abfa8fd6 100644 --- a/mchap/tests/test_io/data/simple.output.mixed_depth.call-exact.frequencies.prior.vcf +++ b/mchap/tests/test_io/data/simple.output.mixed_depth.call-exact.frequencies.prior.vcf @@ -1,6 +1,6 @@ ##fileformat=VCFv4.3 ##fileDate=20210420 -##source=mchap v0.9.1 +##source=mchap v0.9.2 ##phasing=None ##commandline="mchap call-exact --bam simple.sample1.bam simple.sample2.deep.bam simple.sample3.bam --ploidy 4 --haplotypes mock.input.frequencies.vcf --prior-frequencies AFP --report AFPRIOR AFP" ##randomseed=None diff --git a/mchap/tests/test_io/data/simple.output.mixed_depth.call-exact.frequencies.skiprare.vcf b/mchap/tests/test_io/data/simple.output.mixed_depth.call-exact.frequencies.skiprare.vcf index 6669d1d9..682ae74e 100644 --- a/mchap/tests/test_io/data/simple.output.mixed_depth.call-exact.frequencies.skiprare.vcf +++ b/mchap/tests/test_io/data/simple.output.mixed_depth.call-exact.frequencies.skiprare.vcf @@ -1,6 +1,6 @@ ##fileformat=VCFv4.3 ##fileDate=20220406 -##source=mchap v0.9.1 +##source=mchap v0.9.2 ##phasing=None ##commandline="mchap call-exact --bam simple.sample1.bam simple.sample2.deep.bam simple.sample3.bam --ploidy 4 --haplotypes mock.input.frequencies.vcf --prior-frequencies AFP --filter-input-haplotypes AFP>=0.1 --report AFPRIOR AFP" ##randomseed=None diff --git a/mchap/tests/test_io/data/simple.output.mixed_depth.call-exact.frequencies.vcf b/mchap/tests/test_io/data/simple.output.mixed_depth.call-exact.frequencies.vcf index a4cb4476..2aa0776e 100644 --- a/mchap/tests/test_io/data/simple.output.mixed_depth.call-exact.frequencies.vcf +++ b/mchap/tests/test_io/data/simple.output.mixed_depth.call-exact.frequencies.vcf @@ -1,6 +1,6 @@ ##fileformat=VCFv4.3 ##fileDate=20220406 -##source=mchap v0.9.1 +##source=mchap v0.9.2 ##phasing=None ##commandline="mchap call-exact --bam simple.sample1.bam simple.sample2.deep.bam simple.sample3.bam --ploidy 4 --haplotypes simple.output.mixed_depth.assemble.vcf --report AFP" ##randomseed=None diff --git a/mchap/tests/test_io/data/simple.output.mixed_depth.call-exact.likelihoods.vcf b/mchap/tests/test_io/data/simple.output.mixed_depth.call-exact.likelihoods.vcf index 229cf7aa..d1e2ac9a 100644 --- a/mchap/tests/test_io/data/simple.output.mixed_depth.call-exact.likelihoods.vcf +++ b/mchap/tests/test_io/data/simple.output.mixed_depth.call-exact.likelihoods.vcf @@ -1,6 +1,6 @@ ##fileformat=VCFv4.3 ##fileDate=20210420 -##source=mchap v0.9.1 +##source=mchap v0.9.2 ##phasing=None ##commandline="mchap call-exact --bam simple.sample1.bam simple.sample2.deep.bam simple.sample3.bam --ploidy 4 --haplotypes simple.output.mixed_depth.assemble.vcf --genotype-likelihoods" ##randomseed=None diff --git a/mchap/tests/test_io/data/simple.output.mixed_depth.call-exact.occurrence.vcf b/mchap/tests/test_io/data/simple.output.mixed_depth.call-exact.occurrence.vcf index 53af3e11..0168d19c 100644 --- a/mchap/tests/test_io/data/simple.output.mixed_depth.call-exact.occurrence.vcf +++ b/mchap/tests/test_io/data/simple.output.mixed_depth.call-exact.occurrence.vcf @@ -1,6 +1,6 @@ ##fileformat=VCFv4.3 ##fileDate=20220406 -##source=mchap v0.9.1 +##source=mchap v0.9.2 ##phasing=None ##commandline="mchap call-exact --bam simple.sample1.bam simple.sample2.deep.bam simple.sample3.bam --ploidy 4 --haplotypes simple.output.mixed_depth.assemble.vcf --report AOP" ##randomseed=None diff --git a/mchap/tests/test_io/data/simple.output.mixed_depth.call-exact.posteriors.vcf b/mchap/tests/test_io/data/simple.output.mixed_depth.call-exact.posteriors.vcf index f793b727..a2e78397 100644 --- a/mchap/tests/test_io/data/simple.output.mixed_depth.call-exact.posteriors.vcf +++ b/mchap/tests/test_io/data/simple.output.mixed_depth.call-exact.posteriors.vcf @@ -1,6 +1,6 @@ ##fileformat=VCFv4.3 ##fileDate=20210420 -##source=mchap v0.9.1 +##source=mchap v0.9.2 ##phasing=None ##commandline="mchap call-exact --bam simple.sample1.bam simple.sample2.deep.bam simple.sample3.bam --ploidy 4 --haplotypes simple.output.mixed_depth.assemble.vcf --genotype-posteriors" ##randomseed=None diff --git a/mchap/tests/test_io/data/simple.output.mixed_depth.call-exact.vcf b/mchap/tests/test_io/data/simple.output.mixed_depth.call-exact.vcf index cd12470d..67d5e239 100644 --- a/mchap/tests/test_io/data/simple.output.mixed_depth.call-exact.vcf +++ b/mchap/tests/test_io/data/simple.output.mixed_depth.call-exact.vcf @@ -1,6 +1,6 @@ ##fileformat=VCFv4.3 ##fileDate=20210420 -##source=mchap v0.9.1 +##source=mchap v0.9.2 ##phasing=None ##commandline="mchap call-exact --bam simple.sample1.bam simple.sample2.deep.bam simple.sample3.bam --ploidy 4 --haplotypes simple.output.mixed_depth.assemble.vcf" ##randomseed=None diff --git a/mchap/tests/test_io/data/simple.output.mixed_depth.call.frequencies.prior.vcf b/mchap/tests/test_io/data/simple.output.mixed_depth.call.frequencies.prior.vcf index 49c8d30a..2ce6b9a3 100644 --- a/mchap/tests/test_io/data/simple.output.mixed_depth.call.frequencies.prior.vcf +++ b/mchap/tests/test_io/data/simple.output.mixed_depth.call.frequencies.prior.vcf @@ -1,6 +1,6 @@ ##fileformat=VCFv4.3 ##fileDate=20210420 -##source=mchap v0.9.1 +##source=mchap v0.9.2 ##phasing=None ##commandline="mchap call --bam simple.sample1.bam simple.sample2.deep.bam simple.sample3.bam --ploidy 4 --haplotypes mock.input.frequencies.vcf --prior-frequencies AFP --mcmc-steps 500 --mcmc-burn 100 --mcmc-seed 11 --report AFPRIOR AFP" ##randomseed=11 diff --git a/mchap/tests/test_io/data/simple.output.mixed_depth.call.frequencies.skiprare.vcf b/mchap/tests/test_io/data/simple.output.mixed_depth.call.frequencies.skiprare.vcf index 6ddbcbc2..b67cd8c6 100644 --- a/mchap/tests/test_io/data/simple.output.mixed_depth.call.frequencies.skiprare.vcf +++ b/mchap/tests/test_io/data/simple.output.mixed_depth.call.frequencies.skiprare.vcf @@ -1,6 +1,6 @@ ##fileformat=VCFv4.3 ##fileDate=20220406 -##source=mchap v0.9.1 +##source=mchap v0.9.2 ##phasing=None ##commandline="mchap call --bam simple.sample1.bam simple.sample2.deep.bam simple.sample3.bam --ploidy 4 --haplotypes mock.input.frequencies.vcf --prior-frequencies AFP --filter-input-haplotypes AFP>=0.1 --mcmc-steps 500 --mcmc-burn 100 --mcmc-seed 11 --report AFP AFPRIOR" ##randomseed=11 diff --git a/mchap/tests/test_io/data/simple.output.mixed_depth.call.frequencies.vcf b/mchap/tests/test_io/data/simple.output.mixed_depth.call.frequencies.vcf index 41f2e447..c5901fe3 100644 --- a/mchap/tests/test_io/data/simple.output.mixed_depth.call.frequencies.vcf +++ b/mchap/tests/test_io/data/simple.output.mixed_depth.call.frequencies.vcf @@ -1,6 +1,6 @@ ##fileformat=VCFv4.3 ##fileDate=20220406 -##source=mchap v0.9.1 +##source=mchap v0.9.2 ##phasing=None ##commandline="mchap call --bam simple.sample1.bam simple.sample2.deep.bam simple.sample3.bam --ploidy 4 --haplotypes simple.output.deep.assemble.vcf --mcmc-steps 500 --mcmc-burn 100 --mcmc-seed 11 --report AFP" ##randomseed=11 diff --git a/mchap/tests/test_io/data/simple.output.mixed_depth.call.likelihoods.vcf b/mchap/tests/test_io/data/simple.output.mixed_depth.call.likelihoods.vcf index 67f593ee..6567a8c6 100644 --- a/mchap/tests/test_io/data/simple.output.mixed_depth.call.likelihoods.vcf +++ b/mchap/tests/test_io/data/simple.output.mixed_depth.call.likelihoods.vcf @@ -1,6 +1,6 @@ ##fileformat=VCFv4.3 ##fileDate=20210420 -##source=mchap v0.9.1 +##source=mchap v0.9.2 ##phasing=None ##commandline="mchap call --bam simple.sample1.bam simple.sample2.deep.bam simple.sample3.bam --ploidy 4 --haplotypes simple.output.mixed_depth.assemble.vcf --mcmc-steps 500 --mcmc-burn 100 --mcmc-seed 11 --genotype-likelihoods" ##randomseed=11 diff --git a/mchap/tests/test_io/data/simple.output.mixed_depth.call.occurrence.vcf b/mchap/tests/test_io/data/simple.output.mixed_depth.call.occurrence.vcf index 8d44d479..25a69d05 100644 --- a/mchap/tests/test_io/data/simple.output.mixed_depth.call.occurrence.vcf +++ b/mchap/tests/test_io/data/simple.output.mixed_depth.call.occurrence.vcf @@ -1,6 +1,6 @@ ##fileformat=VCFv4.3 ##fileDate=20220406 -##source=mchap v0.9.1 +##source=mchap v0.9.2 ##phasing=None ##commandline="mchap call --bam simple.sample1.bam simple.sample2.deep.bam simple.sample3.bam --ploidy 4 --haplotypes simple.output.deep.assemble.vcf --mcmc-steps 500 --mcmc-burn 100 --mcmc-seed 11 --report AOP" ##randomseed=11 diff --git a/mchap/tests/test_io/data/simple.output.mixed_depth.call.posteriors.vcf b/mchap/tests/test_io/data/simple.output.mixed_depth.call.posteriors.vcf index 3a06dc30..04d59d01 100644 --- a/mchap/tests/test_io/data/simple.output.mixed_depth.call.posteriors.vcf +++ b/mchap/tests/test_io/data/simple.output.mixed_depth.call.posteriors.vcf @@ -1,6 +1,6 @@ ##fileformat=VCFv4.3 ##fileDate=20210420 -##source=mchap v0.9.1 +##source=mchap v0.9.2 ##phasing=None ##commandline="mchap call --bam simple.sample1.bam simple.sample2.deep.bam simple.sample3.bam --ploidy 4 --haplotypes simple.output.mixed_depth.assemble.vcf --mcmc-steps 500 --mcmc-burn 100 --mcmc-seed 11 --genotype-posteriors" ##randomseed=11 diff --git a/mchap/tests/test_io/data/simple.output.mixed_depth.call.vcf b/mchap/tests/test_io/data/simple.output.mixed_depth.call.vcf index f178943a..c133c48d 100644 --- a/mchap/tests/test_io/data/simple.output.mixed_depth.call.vcf +++ b/mchap/tests/test_io/data/simple.output.mixed_depth.call.vcf @@ -1,6 +1,6 @@ ##fileformat=VCFv4.3 ##fileDate=20210420 -##source=mchap v0.9.1 +##source=mchap v0.9.2 ##phasing=None ##commandline="mchap call --bam simple.sample1.bam simple.sample2.deep.bam simple.sample3.bam --ploidy 4 --haplotypes simple.output.mixed_depth.assemble.vcf --mcmc-steps 500 --mcmc-burn 100 --mcmc-seed 11" ##randomseed=11 diff --git a/mchap/tests/test_io/data/simple.output.nullallele.assemble.vcf b/mchap/tests/test_io/data/simple.output.nullallele.assemble.vcf index 3d0c0719..6d932643 100644 --- a/mchap/tests/test_io/data/simple.output.nullallele.assemble.vcf +++ b/mchap/tests/test_io/data/simple.output.nullallele.assemble.vcf @@ -1,6 +1,6 @@ ##fileformat=VCFv4.3 ##fileDate=20210420 -##source=mchap v0.9.1 +##source=mchap v0.9.2 ##phasing=None ##commandline="mchap assemble --bam simple.sample1.bam simple.sample2.bam simple.sample3.bam --ploidy 4 --targets simple.bed.gz --variants simple.vcf.gz --reference simple.fasta --mcmc-steps 500 --mcmc-burn 100 --mcmc-seed 11 --haplotype-posterior-threshold 1.0" ##randomseed=11 diff --git a/mchap/version.py b/mchap/version.py index d69d16e9..a2fecb45 100644 --- a/mchap/version.py +++ b/mchap/version.py @@ -1 +1 @@ -__version__ = "0.9.1" +__version__ = "0.9.2"