diff --git a/sdmetrics/reports/single_table/_properties/structure.py b/sdmetrics/reports/single_table/_properties/structure.py index 8e644a91..fcc821af 100644 --- a/sdmetrics/reports/single_table/_properties/structure.py +++ b/sdmetrics/reports/single_table/_properties/structure.py @@ -31,21 +31,9 @@ def _generate_details(self, real_data, synthetic_data, metadata, progress_bar=No Returns: pandas.DataFrame """ - column_to_ignore_dtype = [] - non_pii_sdtype = [ - 'numerical', 'datetime', 'categorical', 'boolean' - ] - for column_name in metadata['columns']: - sdtype = metadata['columns'][column_name]['sdtype'] - if sdtype in non_pii_sdtype: - continue - - column_to_ignore_dtype.append(column_name) - try: score = TableStructure.compute( - real_data, synthetic_data, - ignore_dtype_columns=column_to_ignore_dtype + real_data, synthetic_data ) error_message = None diff --git a/sdmetrics/single_table/table_structure.py b/sdmetrics/single_table/table_structure.py index d7264b58..b2073c97 100644 --- a/sdmetrics/single_table/table_structure.py +++ b/sdmetrics/single_table/table_structure.py @@ -26,7 +26,7 @@ class TableStructure(SingleTableMetric): max_value = 1 @classmethod - def compute_breakdown(cls, real_data, synthetic_data, ignore_dtype_columns=None): + def compute_breakdown(cls, real_data, synthetic_data): """Compute the score breakdown of the table format metric. Args: @@ -34,11 +34,7 @@ def compute_breakdown(cls, real_data, synthetic_data, ignore_dtype_columns=None) The real data. synthetic_data (pandas.DataFrame): The synthetic data. - ignore_dtype_columns (list[str]): - List of column names to ignore when comparing data types. - Defaults to ``None``. """ - ignore_dtype_columns = ignore_dtype_columns or [] missing_columns_in_synthetic = set(real_data.columns) - set(synthetic_data.columns) invalid_names = [] invalid_sdtypes = [] @@ -47,9 +43,6 @@ def compute_breakdown(cls, real_data, synthetic_data, ignore_dtype_columns=None) invalid_names.append(column) continue - if column in ignore_dtype_columns: - continue - if synthetic_data[column].dtype != real_data[column].dtype: invalid_sdtypes.append(column) @@ -61,7 +54,7 @@ def compute_breakdown(cls, real_data, synthetic_data, ignore_dtype_columns=None) return {'score': score} @classmethod - def compute(cls, real_data, synthetic_data, ignore_dtype_columns=None): + def compute(cls, real_data, synthetic_data): """Compute the table format metric score. Args: @@ -69,12 +62,9 @@ def compute(cls, real_data, synthetic_data, ignore_dtype_columns=None): The real data. synthetic_data (pandas.DataFrame): The synthetic data. - ignore_dtype_columns (list[str]): - List of column names to ignore when comparing data types. - Defaults to ``None``. Returns: float: The metric score. """ - return cls.compute_breakdown(real_data, synthetic_data, ignore_dtype_columns)['score'] + return cls.compute_breakdown(real_data, synthetic_data)['score'] diff --git a/tests/unit/reports/single_table/_properties/test_structure.py b/tests/unit/reports/single_table/_properties/test_structure.py index 73b7f3d3..f112d5d8 100644 --- a/tests/unit/reports/single_table/_properties/test_structure.py +++ b/tests/unit/reports/single_table/_properties/test_structure.py @@ -38,7 +38,7 @@ def test__generate_details(self, table_format_mock): # Assert table_format_mock.assert_called_once_with( - real_data, synthetic_data, ignore_dtype_columns=[] + real_data, synthetic_data, ) expected_details = pd.DataFrame({ @@ -78,7 +78,7 @@ def test__generate_details_with_id_column(self, table_format_mock): # Assert table_format_mock.assert_called_once_with( - real_data, synthetic_data, ignore_dtype_columns=['id'] + real_data, synthetic_data ) expected_details = pd.DataFrame({ diff --git a/tests/unit/single_table/test_table_structure.py b/tests/unit/single_table/test_table_structure.py index 9cb6060e..2f59b2fc 100644 --- a/tests/unit/single_table/test_table_structure.py +++ b/tests/unit/single_table/test_table_structure.py @@ -109,30 +109,6 @@ def test_compute_breakdown_with_invalid_dtypes(self, real_data): expected_result = {'score': 0.6} assert result == expected_result - def test_compute_breakdown_ignore_dtype_columns(self, real_data): - """Test the ``compute_breakdown`` method when ignore_dtype_columns is set.""" - # Setup - synthetic_data = pd.DataFrame({ - 'col_1': [3.0, 2.0, 1.0, 4.0, 5.0], - 'col_2': ['A', 'B', 'C', 'D', 'E'], - 'col_3': [True, False, True, False, True], - 'col_4': [ - '2020-01-11', '2020-01-02', '2020-01-03', '2020-01-04', '2020-01-05' - ], - 'col_5': [4.0, 2.0, 3.0, 4.0, 5.0], - }) - - metric = TableStructure() - - # Run - result = metric.compute_breakdown( - real_data, synthetic_data, ignore_dtype_columns=['col_4'] - ) - - # Assert - expected_result = {'score': 0.8} - assert result == expected_result - def test_compute_breakdown_multiple_error(self, real_data): """Test the ``compute_breakdown`` method with the different failure modes.""" synthetic_data = pd.DataFrame({ @@ -174,5 +150,5 @@ def test_compute(self, compute_breakdown_mock, real_data): result = metric.compute(real_data, synthetic_data) # Assert - compute_breakdown_mock.assert_called_once_with(real_data, synthetic_data, None) + compute_breakdown_mock.assert_called_once_with(real_data, synthetic_data) assert result == 0.6