Skip to content

Commit

Permalink
feat: Ingestion of CSI-indexed VCF files
Browse files Browse the repository at this point in the history
  • Loading branch information
emmcauley authored and msto committed Jan 15, 2025
1 parent 125c35d commit c718c17
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 3 deletions.
4 changes: 2 additions & 2 deletions pixy/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1004,9 +1004,9 @@ def check_and_validate_args(
'[pixy] ERROR: The vcf is not compressed with bgzip (or has no .gz extension). To fix this, run "bgzip [filename].vcf" first (and then index with "tabix [filename].vcf.gz" if necessary)'
)

if not os.path.exists(args.vcf + ".tbi"):
if not (os.path.exists(args.vcf + ".tbi") or os.path.exists(args.vcf + ".csi")):
raise Exception(
'[pixy] ERROR: The vcf is not indexed with tabix. To fix this, run "tabix [filename].vcf.gz" first'
'[pixy] ERROR: The vcf is not indexed. Please either use `tabix` or `bcftools` to produce a `.tbi` or `.csi` index.'
)

if os.path.exists(args.populations) is not True:
Expand Down
6 changes: 6 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,12 @@ def ag1000_vcf_path(datadir: Path) -> Path:
return datadir / "ag1000_pixy_test.vcf.gz"


@pytest.fixture()
def ag1000_csi_path(datadir: Path) -> Path:
"""Path to ag1000 VCF CSI index."""
return datadir / "ag1000_pixy_test.vcf.gz.csi"


@pytest.fixture()
def missing50_vcf_path(datadir: Path) -> Path:
"""Path to a simulated VCF that is known to be missing a small number of genotypes."""
Expand Down
Binary file added tests/main/data/ag1000_pixy_test.vcf.gz.csi
Binary file not shown.
55 changes: 54 additions & 1 deletion tests/main/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def test_vcf_missing_index(
"""Assert that we raise an exception when missing .tbi index."""
missing_index_vcf_path: Path = tmp_path / "ag1000_pixy_test.vcf.gz"
shutil.copy(ag1000_vcf_path, missing_index_vcf_path)
with pytest.raises(Exception, match="ERROR: The vcf is not indexed with tabix"):
with pytest.raises(Exception, match="ERROR: The vcf is not indexed."):
run_pixy_helper(
pixy_out_dir=pixy_out_dir,
stats=["pi", "fst", "dxy"],
Expand Down Expand Up @@ -351,6 +351,59 @@ def test_bypass_invariant_check_warns(
# Tests for pixy.main(): valid inputs and expected results
################################################################################

#############################
# Tests for VCF file indexes
#############################


def test_pixy_csi_index(
tmp_path: Path,
ag1000_pop_path: Path,
ag1000_vcf_path: Path,
ag1000_csi_path: Path,
pixy_out_dir: Path,
expected_outputs: Path,
) -> None:
"""
Assert that a VCF can have either a `.tbi` or a `.csi` index with valid inputs.
The outputs with a `.csi` index should match the outputs of the `.tbi` index.
NB, we copy `ag1000_pixy_test.vcf.gz` and `ag1000_pixy_test.vcf.gz.csi` into a clean directory
so that we can be confident there is no interference from the pre-existing `.tbi` file.
"""
vcf_path: Path = tmp_path / "ag1000_pixy_test.vcf.gz"
csi_path: Path = tmp_path / "ag1000_pixy_test.vcf.gz.csi"
shutil.copy(ag1000_vcf_path, vcf_path)
shutil.copy(ag1000_csi_path, csi_path)

run_pixy_helper(
pixy_out_dir=pixy_out_dir,
window_size=10000,
vcf_path=vcf_path,
populations_path=ag1000_pop_path,
stats=["pi", "dxy", "fst"],
output_prefix="pixy",
)

expected_out_files: List[Path] = [
Path("pixy_dxy.txt"),
Path("pixy_fst.txt"),
Path("pixy_pi.txt"),
]
# this run of `pixy` should match the run using the same inputs and a `.tbi` index
for file in expected_out_files:
generated_data_path: Path = pixy_out_dir / file
exp_data_path: Path = expected_outputs / "baseline" / file
assert generated_data_path.exists()

assert filecmp.cmp(generated_data_path, exp_data_path)


#######################################
# Tests for output formatting/creation
#######################################


@pytest.mark.parametrize(
"output_prefix, stats_requested, expected_files",
Expand Down

0 comments on commit c718c17

Please sign in to comment.