Skip to content

Commit

Permalink
BUG: make tsv 7 number summary tabluate base positions starting from 1 (
Browse files Browse the repository at this point in the history
#167)

Previously the graph started counting sequence positions at 1 while the tsv started counting the same positions at 0. 

Fixes #110.
  • Loading branch information
colinvwood authored Jan 15, 2025
1 parent 0162ce9 commit 64ac759
Showing 1 changed file with 10 additions and 3 deletions.
13 changes: 10 additions & 3 deletions q2_demux/_summarize/_visualizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,9 @@ def _subsample(fastq_map):
def _compute_stats_of_df(df):
df_stats = df.describe(
percentiles=[0.02, 0.09, 0.25, 0.5, 0.75, 0.91, 0.98])
drop_cols = df_stats.index.isin(['std', 'mean', 'min', 'max'])
df_stats = df_stats[~drop_cols]
drop_rows = df_stats.index.isin(['std', 'mean', 'min', 'max'])
df_stats = df_stats[~drop_rows]

return df_stats


Expand Down Expand Up @@ -189,10 +190,16 @@ def summarize(output_dir: str, data: _PlotQualView, n: int = 10000) -> None:
scores = pd.DataFrame(quality_scores)
if not scores.empty:
stats = _compute_stats_of_df(scores)
stats.to_csv(

# ensure base positions begin from 1
stats_output = stats.copy()
stats_output.columns = range(1, len(stats_output.columns) + 1)

stats_output.to_csv(
os.path.join(output_dir,
'%s-seven-number-summaries.tsv' % (direction,)),
header=True, index=True, sep='\t')

length_table = _build_seq_len_table(scores)
qual_stats[direction] = stats

Expand Down

0 comments on commit 64ac759

Please sign in to comment.