Skip to content

Commit

Permalink
Merge pull request #184 from rhpvorderman/floornotround
Browse files Browse the repository at this point in the history
Use floor rather than round for phred scores
  • Loading branch information
rhpvorderman authored Aug 9, 2024
2 parents eda2c55 + 8e3a085 commit 691d488
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 8 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ Changelog
version 0.11.0-dev
------------------
+ Fix a bug where the average phred score per read would be rounded, not
floored. This would lead reads with a phred score such as 9.7 to be counted
towards the Q>=10 results.
+ Replace some of the hand vectorized code with more generic code that can be
automatically be optimized by the compiler. This should make things faster on
Windows and ARM64 platforms.
Expand Down
4 changes: 3 additions & 1 deletion src/sequali/_qcmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -1849,7 +1849,9 @@ QCMetrics_add_meta(QCMetrics *self, struct FastqMeta *meta)
meta->accumulated_error_rate = accumulated_error_rate;
double average_error_rate = accumulated_error_rate / (double)sequence_length;
double average_phred = -10.0 * log10(average_error_rate);
uint64_t phred_score_index = (uint64_t)round(average_phred);
// Floor the average phred so q9.7 does not get represented as q10 but
// q9. Otherwise the Q>=10 count is going to be off.
uint64_t phred_score_index = (uint64_t)floor(average_phred);
assert(phred_score_index >= 0);
assert(phred_score_index <= PHRED_MAX);
self->phred_scores[phred_score_index] += 1;
Expand Down
21 changes: 14 additions & 7 deletions tests/test_qc_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def test_qc_metrics():
phred_content = metrics.phred_scores()
this_read_error = (10 ** -1) * 25 + (10 ** -3) * 25
this_read_phred = -10 * math.log10(this_read_error / len(sequence))
phred_index = round(this_read_phred)
phred_index = math.floor(this_read_phred)
assert phred_content[phred_index] == 1
assert sum(phred_content) == 1
phred_array = metrics.phred_count_table()
Expand Down Expand Up @@ -76,13 +76,20 @@ def test_qc_metrics():

def test_long_sequence():
metrics = QCMetrics()
# This will test the base counting in vectors properly as that is limited
# at 255 * 16 nucleotides (4080 bytes) and thus needs to flush the counts
# properly.

sequence = 4096 * 'A' + 4096 * 'C'
qualities = 8192 * chr(20 + 33)
qualities = (2048 * chr(00 + 33) +
2048 * chr(10 + 33) +
2048 * chr(20 + 33) +
2048 * chr(30 + 33))
errors = (2048 * (10 ** -0) +
2048 * (10 ** -1) +
2048 * (10 ** -2) +
2048 * (10 ** -3))
phred = -10 * math.log10(errors / 8192)
floored_phred = math.floor(phred)
metrics.add_read(FastqRecordView("name", sequence, qualities))
assert metrics.phred_scores()[20] == 1
assert metrics.phred_scores()[floored_phred] == 1
assert metrics.gc_content()[50] == 1


Expand All @@ -97,4 +104,4 @@ def test_average_long_quality():
error_rate = (1 + 19999 * 10 ** -5) / 20000
phred = -10 * math.log10(error_rate)
metrics.add_read(FastqRecordView("name", sequence, qualities))
assert metrics.phred_scores()[round(phred)] == 1
assert metrics.phred_scores()[math.floor(phred)] == 1

0 comments on commit 691d488

Please sign in to comment.