Skip to content

Commit

Permalink
tests: add a user-friendly diff for failing tests (ksamuk#95)
Browse files Browse the repository at this point in the history
Closes ksamuk#94.

This PR uses `pandas` to compare generated dataframes between `pixy`
ground-truth data and generated data within the regression testing
suite. If a test fails, the diff is displayed to the user. For each test
failure, the `AssertionError`, test name, and diff are output.

Here is an updated example of the output:
```
============================== 1 failed in 5.35s ===============================
FAILED                         [100%]
tests/main/test_main.py:406 (test_pixy_main_valid_inputs)
pixy_out_dir = PosixPath('/private/var/folders/m8/w5g78dgx65504pv0zv2fdxcm0000gn/T/pytest-of-erin/pytest-985/test_pixy_main_valid_inputs0/output')
expected_outputs = PosixPath('/Users/erin/git/oss/pixy/tests/main/expected_outputs')
ag1000_pop_path = PosixPath('/Users/erin/git/oss/pixy/tests/main/data/ag1000_populations_file.txt')
ag1000_vcf_path = PosixPath('/Users/erin/git/oss/pixy/tests/main/data/ag1000_pixy_test.vcf.gz')
capsys = <_pytest.capture.CaptureFixture object at 0x7fe220c99c10>

    def test_pixy_main_valid_inputs(
        pixy_out_dir: Path,
        expected_outputs: Path,
        ag1000_pop_path: Path,
        ag1000_vcf_path: Path,
        capsys: pytest.CaptureFixture,
    ) -> None:
        """
        Given specific input data, assert that outputs do not change.
    
        Uses `filecmp` library to compare 2 files without opening them and reading line-by-line.
        `filecmp.cmp` returns True if 2 files are equal.
        """
        run_pixy_helper(
            pixy_out_dir=pixy_out_dir,
            stats=["pi", "fst", "dxy"],
            window_size=10000,
            vcf_path=ag1000_vcf_path,
            populations_path=ag1000_pop_path,
        )
        captured = capsys.readouterr()
        assert "Data set contains 2 population(s), 2 chromosome(s), and 36 sample(s)" in captured.out
    
        expected_out_files: List[Path] = [
            Path("pixy_dxy.txt"),
            Path("pixy_fst.txt"),
            Path("pixy_pi.txt"),
        ]
        for file in expected_out_files:
            generated_data_path: Path = pixy_out_dir / file
            exp_data_path: Path = expected_outputs / "baseline" / file
            assert generated_data_path.exists()
    
>           assert_files_are_consistent(generated_data_path,exp_data_path)
test_main.py:440: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

gen_file_path = PosixPath('/private/var/folders/m8/w5g78dgx65504pv0zv2fdxcm0000gn/T/pytest-of-erin/pytest-985/test_pixy_main_valid_inputs0/output/pixy_dxy.txt')
exp_file_path = PosixPath('/Users/erin/git/oss/pixy/tests/main/expected_outputs/baseline/pixy_dxy.txt')

    def assert_files_are_consistent(gen_file_path: Path, exp_file_path: Path) -> None:
        """
        Helper to display diff if two files are found to be different.
    
        The basename of the generated file is shown (e.g., `pixy_dxy`). The full path of the expected
        file is given to highlight which test case is failing.
    
        Args:
             exp_file_path: the path of the expected file (i.e., ground-truth)
             gen_file_path: the path of the generated file
    
        Raises:
            AssertionError, if the two files are not the same
        """
        if not files_are_consistent(gen_file_path, exp_file_path):
            diff_string: str = show_diff(gen_file_path, exp_file_path)
>           raise AssertionError(f"Files differ: {gen_file_path.stem}, {exp_file_path}\n{diff_string}")
E           AssertionError: Files differ: pixy_dxy, /Users/erin/git/oss/pixy/tests/main/expected_outputs/baseline/pixy_dxy.txt
E                         avg_dxy  count_comparisons  count_missing
E           1 Expected   0.002453          9820052.0      2703196.0
E             Generated  0.002454          9818500.0      2704748.0
E           2 Expected   0.001746         10398484.0      2161052.0
E             Generated  0.001746         10397188.0      2162348.0
E           3 Expected   0.001072         11058900.0      1285500.0
E             Generated  0.001072         11057780.0      1286620.0
E           7 Expected   0.002453          9820052.0      2703196.0
E             Generated  0.002454          9818500.0      2704748.0
E           8 Expected   0.001746         10398484.0      2161052.0
E             Generated  0.001746         10397188.0      2162348.0
E           9 Expected   0.001072         11058900.0      1285500.0
E             Generated  0.001072         11057780.0      1286620.0

../conftest.py:224: AssertionError
```
  • Loading branch information
emmcauley authored Feb 21, 2025
1 parent cde709f commit a48abb8
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 10 deletions.
49 changes: 47 additions & 2 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from typing import Optional
from unittest.mock import patch

import pandas as pd
import pytest

from pixy.__main__ import main
Expand Down Expand Up @@ -214,16 +215,61 @@ def run_pixy_helper( # noqa: C901

if fst_type is not None:
test_args.extend((["--fst_type", f"{fst_type}"]))
print(f"test_args: {test_args}")
with patch.object(sys, "argv", test_args):
main()


def assert_files_are_consistent(gen_file_path: Path, exp_file_path: Path) -> None:
"""
Helper to display diff if two files are found to be different.
The basename of the generated file is shown (e.g., `pixy_dxy`). The full path of the expected
file is given to highlight which test case is failing.
Args:
exp_file_path: the path of the expected file (i.e., ground-truth)
gen_file_path: the path of the generated file
Raises:
AssertionError, if the two files are not the same
"""
if not files_are_consistent(gen_file_path, exp_file_path):
diff_string: str = show_diff(gen_file_path, exp_file_path)
raise AssertionError(f"Files differ: {gen_file_path.stem}, {exp_file_path}\n{diff_string}")


def show_diff(expected_file: Path, generated_file: Path) -> str:
"""
Show the diff between an expected and generated file.
Useful to examine why a regression test fails.
Args:
expected_file: the path of the expected file (i.e., ground-truth)
generated_file: the path of the generated file
"""
exp = pd.read_csv(expected_file, sep="\t")
gen = pd.read_csv(generated_file, sep="\t")
diff = exp.compare(gen, result_names=("Expected", "Generated"), align_axis=0)
pretty_display: str = diff.to_string()
return pretty_display


def files_are_consistent(gen_file_path: Path, exp_file_path: Path) -> bool:
"""
Helper function to compare non-deterministic files generated by `pixy`.
Checks that the headers are the same and the length of the files matches.
Sorts the data to be deterministic before comparing to ground-truth data.
Used in regression testing to compare specific rows in generated files for reproducibility.
Raises:
FileNotFoundError: if one or both files do not exist
ValueError: if one of the provided paths is not a file
Returns:
True if lines in file match each other; False if there is a discrepancy
"""
if not gen_file_path.exists() or not exp_file_path.exists():
raise FileNotFoundError("One or both files do not exist.")
Expand All @@ -245,5 +291,4 @@ def files_are_consistent(gen_file_path: Path, exp_file_path: Path) -> bool:
for line1, line2 in zip(generated_data, expected_data):
if line1 != line2:
return False

return True
15 changes: 7 additions & 8 deletions tests/main/test_main.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import filecmp
import logging
import os
import shutil
Expand All @@ -9,7 +8,7 @@

import pytest

from tests.conftest import files_are_consistent
from tests.conftest import assert_files_are_consistent
from tests.conftest import run_pixy_helper

################################################################################
Expand Down Expand Up @@ -433,7 +432,7 @@ def test_pixy_csi_index(
exp_data_path: Path = expected_outputs / "baseline" / file
assert generated_data_path.exists()

assert filecmp.cmp(generated_data_path, exp_data_path)
assert_files_are_consistent(generated_data_path, exp_data_path)


#######################################
Expand Down Expand Up @@ -528,7 +527,7 @@ def test_pixy_main_valid_inputs(
exp_data_path: Path = expected_outputs / "baseline" / file
assert generated_data_path.exists()

assert filecmp.cmp(generated_data_path, exp_data_path)
assert_files_are_consistent(generated_data_path, exp_data_path)


################################################################################
Expand Down Expand Up @@ -583,7 +582,7 @@ def test_pixy_limited_sites(
exp_data_path: Path = expected_outputs / output_prefix / file

assert generated_data_path.exists()
assert files_are_consistent(generated_data_path, exp_data_path)
assert_files_are_consistent(generated_data_path, exp_data_path)


################################################################################
Expand Down Expand Up @@ -623,7 +622,7 @@ def test_pixy_limited_bed_file(
exp_data_path: Path = expected_outputs / "limited_bed" / file

assert generated_data_path.exists()
assert files_are_consistent(generated_data_path, exp_data_path)
assert_files_are_consistent(generated_data_path, exp_data_path)


################################################################################
Expand Down Expand Up @@ -674,7 +673,7 @@ def test_pixy_limited_sites_bed(
generated_data_path: Path = pixy_out_dir / file
exp_data_path: Path = expected_outputs / "limited_sites_and_bed" / file
assert generated_data_path.exists()
assert files_are_consistent(generated_data_path, exp_data_path)
assert_files_are_consistent(generated_data_path, exp_data_path)


###############################################################################
Expand Down Expand Up @@ -709,4 +708,4 @@ def test_pixy_hudson_fst(
generated_data_path: Path = pixy_out_dir / file
exp_data_path: Path = expected_outputs / "hudson_fst" / file
assert generated_data_path.exists()
assert files_are_consistent(generated_data_path, exp_data_path)
assert_files_are_consistent(generated_data_path, exp_data_path)

0 comments on commit a48abb8

Please sign in to comment.