Skip to content

Commit

Permalink
Merge pull request #32 from genomewalker/fix-header
Browse files Browse the repository at this point in the history
Fix header
  • Loading branch information
genomewalker authored Apr 30, 2024
2 parents 137fa50 + e6845fd commit 369e503
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 6 deletions.
5 changes: 4 additions & 1 deletion bam_filter/reassign.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,13 +293,16 @@ def write_reassigned_bam(
else:
write_threads = threads

new_header = header.to_dict()
new_header["SQ"] = [x for x in new_header["SQ"] if x["SN"] in ref_names]

out_bam_file = pysam.AlignmentFile(
out_files["bam_reassigned_tmp"],
"wb",
referencenames=list(ref_names),
referencelengths=list(ref_lengths),
threads=write_threads,
header=header,
header=new_header,
)

# num_cores should be multiple of the write_threads
Expand Down
14 changes: 9 additions & 5 deletions bam_filter/sam_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@ def write_bam(bam, references, output_files, threads=1, sort_memory="1G"):
s_threads = threads
samfile = pysam.AlignmentFile(bam, "rb", threads=s_threads)
header = samfile.header
# get read groups

if threads > 4:
threads = 4
Expand Down Expand Up @@ -88,15 +87,19 @@ def write_bam(bam, references, output_files, threads=1, sort_memory="1G"):
else:
write_threads = threads

new_header = header.to_dict()
new_header["SQ"] = [x for x in new_header["SQ"] if x["SN"] in ref_names]

out_bam_file = pysam.AlignmentFile(
output_files["bam_tmp"],
"wb",
referencenames=list(ref_names),
referencelengths=ref_lengths,
threads=write_threads,
header=header,
header=new_header,
)

# out_bam_file.set_tag(read_groups)
# out_bam_file.set(pg)
references = [x for x in samfile.references if x in refs_idx.keys()]

logging.info(f"::: ::: Filtering {len(references):,} references sequentially...")
Expand Down Expand Up @@ -1183,14 +1186,15 @@ def filter_reference_BAM(
write_threads = 4
else:
write_threads = threads

new_header = header.to_dict()
new_header["SQ"] = [x for x in new_header["SQ"] if x["SN"] in ref_names]
out_bam_file = pysam.AlignmentFile(
out_files["bam_filtered_tmp"],
"wb",
referencenames=list(ref_names),
referencelengths=list(ref_lengths),
threads=write_threads,
header=header,
header=new_header,
)

# logging.info(
Expand Down

0 comments on commit 369e503

Please sign in to comment.