From 8b8c2e397c830024b497eeec8af556e33a82cb31 Mon Sep 17 00:00:00 2001 From: R-Palazzo Date: Fri, 17 Nov 2023 07:34:46 -0600 Subject: [PATCH] tests --- .../single_table/test_diagnostic_report.py | 37 +++++++++++++++++++ .../single_table/test_diagnostic_report.py | 36 ++++++++++++++++++ tests/unit/reports/test_base_report.py | 30 +++++++++++++++ 3 files changed, 103 insertions(+) diff --git a/tests/integration/reports/single_table/test_diagnostic_report.py b/tests/integration/reports/single_table/test_diagnostic_report.py index 3e33e99f..a64774a4 100644 --- a/tests/integration/reports/single_table/test_diagnostic_report.py +++ b/tests/integration/reports/single_table/test_diagnostic_report.py @@ -204,3 +204,40 @@ def test_get_details_with_errors(self): report.get_details('Data Validity'), expected_details ) + + def test_report_runs_with_mismatch_data_metadata(self): + """Test that the report runs with mismatched data and metadata.""" + # Setup + data = pd.DataFrame({ + 'id': [0, 1, 2], + 'val1': ['a', 'a', 'b'], + 'val2': [0.1, 2.4, 5.7] + }) + synthetic_data = pd.DataFrame({ + 'id': [1, 2, 3], + 'extra_col': ['x', 'y', 'z'], + 'val1': ['c', 'd', 'd'] + }) + + metadata = { + 'columns': { + 'id': {'sdtype': 'id'}, + 'val1': {'sdtype': 'categorical'}, + 'val2': {'sdtype': 'numerical'} + }, + 'primary_key': 'id' + } + report = DiagnosticReport() + + # Run + report.generate(data, synthetic_data, metadata) + + # Assert + expected_properties = pd.DataFrame({ + 'Property': ['Data Validity', 'Data Structure'], + 'Score': [0.5, 0.4444444] + }) + assert report.get_score() == 0.47222222222222227 + pd.testing.assert_frame_equal( + report.get_properties(), expected_properties + ) diff --git a/tests/unit/reports/single_table/test_diagnostic_report.py b/tests/unit/reports/single_table/test_diagnostic_report.py index b744ce98..fb1af0be 100644 --- a/tests/unit/reports/single_table/test_diagnostic_report.py +++ b/tests/unit/reports/single_table/test_diagnostic_report.py @@ -1,3 +1,7 @@ +from unittest.mock import Mock + +import pandas as pd + from sdmetrics.reports.single_table import DiagnosticReport from sdmetrics.reports.single_table._properties import DataValidity, Structure @@ -14,3 +18,35 @@ def test___init__(self): assert report.is_generated is False assert isinstance(report._properties['Data Validity'], DataValidity) assert isinstance(report._properties['Data Structure'], Structure) + + def test__validate_with_data_metadata_mismatch(self): + """Test the ``_validate`` method doesn't raise an error.""" + # Setup + base_report = DiagnosticReport() + mock__validate_metadata_matches_data = Mock( + side_effect=ValueError('error message') + ) + base_report._validate_metadata_matches_data = mock__validate_metadata_matches_data + + real_data = pd.DataFrame({ + 'column1': [1, 2, 3], + 'column2': ['a', 'b', 'c'], + 'column3': [4, 5, 6] + }) + synthetic_data = pd.DataFrame({ + 'column1': [1, 2, 3], + 'column2': ['a', 'b', 'c'], + 'column4': [4, 5, 6] + }) + metadata = { + 'columns': { + 'column1': {'sdtype': 'numerical'}, + 'column2': {'sdtype': 'categorical'}, + } + } + + # Run + result = base_report._validate(real_data, synthetic_data, metadata) + + # Assert + assert result is None diff --git a/tests/unit/reports/test_base_report.py b/tests/unit/reports/test_base_report.py index 8e56d474..09b16ea8 100644 --- a/tests/unit/reports/test_base_report.py +++ b/tests/unit/reports/test_base_report.py @@ -137,6 +137,36 @@ def test__validate(self): real_data, synthetic_data, metadata ) + def test__validate_with_value_error(self): + """Test the ``_validate`` method with a ValueError.""" + # Setup + base_report = BaseReport() + mock__validate_metadata_matches_data = Mock( + side_effect=ValueError('error message') + ) + base_report._validate_metadata_matches_data = mock__validate_metadata_matches_data + + real_data = pd.DataFrame({ + 'column1': [1, 2, 3], + 'column2': ['a', 'b', 'c'], + 'column3': [4, 5, 6] + }) + synthetic_data = pd.DataFrame({ + 'column1': [1, 2, 3], + 'column2': ['a', 'b', 'c'], + 'column4': [4, 5, 6] + }) + metadata = { + 'columns': { + 'column1': {'sdtype': 'numerical'}, + 'column2': {'sdtype': 'categorical'}, + } + } + + # Run and Assert + with pytest.raises(ValueError, match='error message'): + base_report._validate(real_data, synthetic_data, metadata) + def test_convert_datetimes(self): """Test that ``_convert_datetimes`` tries to convert datetime columns.""" # Setup