Skip to content

Commit

Permalink
fix: QA genome generation (#613)
Browse files Browse the repository at this point in the history
* fix deprecated squeeze

* fix wrong seq name

* fmt

* remove not needed file handling
  • Loading branch information
alethomas authored Nov 15, 2023
1 parent 33ac5f7 commit 2bb74bf
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 11 deletions.
8 changes: 2 additions & 6 deletions workflow/scripts/masking.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ def get_base_count(pileupcolumn):
for pileupread in pileupcolumn.pileups:
# TODO Check pileupread for missing bases
if not pileupread.is_del and not pileupread.is_refskip:

read_base = pileupread.alignment.query_sequence[pileupread.query_position]

bases.append(read_base)
Expand Down Expand Up @@ -102,7 +101,6 @@ def mask_sequence(sequence, coverages, base_counts):
covered_postions = coverages.keys()

for position, base in enumerate(sequence):

if position not in covered_postions:
# TODO Check why there are postions that are not covered by any reads and are not Ns
# sequence[position] = "N"
Expand Down Expand Up @@ -160,10 +158,8 @@ def mask_sequence(sequence, coverages, base_counts):


def write_sequence(sequence):
with pysam.FastxFile(snakemake.input.sequence) as infile, open(
snakemake.output.masked_sequence, mode="w"
) as outfile:
print(">%s" % next(infile).name.split(".")[0], file=outfile)
with open(snakemake.output.masked_sequence, mode="w") as outfile:
print(">%s" % snakemake.wildcards.sample, file=outfile)
print(sequence, file=outfile)


Expand Down
8 changes: 3 additions & 5 deletions workflow/scripts/quality-filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,11 @@ def get_identity(quast_report_paths: List[str]) -> dict:
sample = path.dirname(report_path).split("/")[-1]

# load report
report_df = pd.read_csv(
report_path, delimiter="\t", index_col=0, squeeze=True, names=["value"]
)

report_df = pd.read_csv(report_path, delimiter="\t", names=["name", "value"])
report_df.set_index("name", inplace=True)
# select genome fraction (%)
try:
fraction = float(report_df.at["Genome fraction (%)"]) / 100
fraction = float(report_df.at["Genome fraction (%)", "value"]) / 100
except:
# no "Genome fraction (%)" in quast report. Case for not assemblable samples
fraction = 0.0
Expand Down

0 comments on commit 2bb74bf

Please sign in to comment.