Skip to content

Commit

Permalink
test: check whether pharmcat_position.vcf and pharmcat_position.unial…
Browse files Browse the repository at this point in the history
…lelic.vcf have the same number of lines
  • Loading branch information
BinglanLi committed Aug 9, 2024
1 parent 7250db4 commit 3eebdca
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 10 deletions.
30 changes: 20 additions & 10 deletions preprocessor/tests/helpers.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import hashlib
import shutil
import urllib.request
import gzip
from pathlib import Path
from typing import Optional, List

Expand All @@ -21,6 +22,7 @@
test_dir: Path = Path(globals().get("__file__", "./_")).absolute().parent
src_dir: Path = test_dir / '../preprocessor'
pharmcat_positions_file: Path = test_dir / '../../pharmcat_positions.vcf.bgz'
uniallelic_pharmcat_positions_file: Path = test_dir / '../../pharmcat_positions.uniallelic.vcf.bgz'


def get_reference_fasta(pharmcat_positions: Path) -> Path:
Expand Down Expand Up @@ -51,17 +53,25 @@ def md5hash(file: Path):
return file_hash.hexdigest()


def read_vcf(file: Path, skip_comments: bool = True):
def read_vcf(file: Path, bgzipped: bool = False, skip_comments: bool = True):
if bgzipped:
with gzip.open(file, mode='rt', encoding='utf-8') as in_f:
return _read_vcf(in_f, skip_comments=skip_comments)
else:
with open(file, mode='r', encoding='utf-8') as in_f:
return _read_vcf(in_f, skip_comments=skip_comments)


def _read_vcf(in_f, skip_comments: bool = True):
"""Reads VCF file and (1) strips trailing spaces, (2) removes empty lines and (3) normalizes line endings."""
with open(file, 'r') as f:
lines = []
for line in f:
line = line.rstrip()
if line.startswith('##') and skip_comments:
continue
if line:
lines.append(line)
return '\n'.join(lines)
lines = []
for line in in_f:
line = line.rstrip()
if line.startswith('##') and skip_comments:
continue
if line:
lines.append(line)
return '\n'.join(lines)


def compare_vcf_files(expected: Path, tmp_dir: Path, basename: str, sample: str = None, split_sample: bool = False,
Expand Down
12 changes: 12 additions & 0 deletions preprocessor/tests/test_utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,18 @@ def test_prep_pharmcat_positions():
assert tmp_uniallelic.is_file()
assert uniallelic_mtime == tmp_uniallelic.stat().st_mtime

# check whether the uniallelic position file has the same number of positions as the position file
uniallelic_file_lines = helpers.read_vcf(helpers.uniallelic_pharmcat_positions_file, bgzipped=True).split('\n')
n_uniallelic_file_lines: int = len(uniallelic_file_lines)
# note that tmp_uniallelic is the equivalent uniallelic file generated from the pharmcat position file
tmp_uniallelic_lines = helpers.read_vcf(tmp_uniallelic, bgzipped=True).split('\n')
n_tmp_uniallelic_lines: int = len(tmp_uniallelic_lines)
# uniallelic_pharmcat_positions_file should
# have the same number of lines as its equivalent file generated from the pharmcat position file
assert n_uniallelic_file_lines == n_tmp_uniallelic_lines, \
'mismatching numbers of positions between %s and %s' % \
(helpers.uniallelic_pharmcat_positions_file.name, helpers.pharmcat_positions_file.name)


def test_extract_pgx_regions():
vcf_file = helpers.test_dir / 'raw.vcf.bgz'
Expand Down

0 comments on commit 3eebdca

Please sign in to comment.