remove ignore_dtype_columns

sdv-dev · Nov 15, 2023 · a2b637f · a2b637f
1 parent 27d728c
commit a2b637f
Show file tree

Hide file tree

Showing 4 changed files with 7 additions and 53 deletions.
diff --git a/sdmetrics/reports/single_table/_properties/structure.py b/sdmetrics/reports/single_table/_properties/structure.py
@@ -31,21 +31,9 @@ def _generate_details(self, real_data, synthetic_data, metadata, progress_bar=No
         Returns:
             pandas.DataFrame
         """
-        column_to_ignore_dtype = []
-        non_pii_sdtype = [
-            'numerical', 'datetime', 'categorical', 'boolean'
-        ]
-        for column_name in metadata['columns']:
-            sdtype = metadata['columns'][column_name]['sdtype']
-            if sdtype in non_pii_sdtype:
-                continue
-
-            column_to_ignore_dtype.append(column_name)
-
         try:
             score = TableStructure.compute(
-                real_data, synthetic_data,
-                ignore_dtype_columns=column_to_ignore_dtype
+                real_data, synthetic_data
             )
             error_message = None
 

diff --git a/sdmetrics/single_table/table_structure.py b/sdmetrics/single_table/table_structure.py
@@ -26,19 +26,15 @@ class TableStructure(SingleTableMetric):
     max_value = 1
 
     @classmethod
-    def compute_breakdown(cls, real_data, synthetic_data, ignore_dtype_columns=None):
+    def compute_breakdown(cls, real_data, synthetic_data):
         """Compute the score breakdown of the table format metric.
 
         Args:
         real_data (pandas.DataFrame):
             The real data.
         synthetic_data (pandas.DataFrame):
             The synthetic data.
-        ignore_dtype_columns (list[str]):
-            List of column names to ignore when comparing data types.
-            Defaults to ``None``.
         """
-        ignore_dtype_columns = ignore_dtype_columns or []
         missing_columns_in_synthetic = set(real_data.columns) - set(synthetic_data.columns)
         invalid_names = []
         invalid_sdtypes = []
@@ -47,9 +43,6 @@ def compute_breakdown(cls, real_data, synthetic_data, ignore_dtype_columns=None)
                 invalid_names.append(column)
                 continue
 
-            if column in ignore_dtype_columns:
-                continue
-
             if synthetic_data[column].dtype != real_data[column].dtype:
                 invalid_sdtypes.append(column)
 
@@ -61,20 +54,17 @@ def compute_breakdown(cls, real_data, synthetic_data, ignore_dtype_columns=None)
         return {'score': score}
 
     @classmethod
-    def compute(cls, real_data, synthetic_data, ignore_dtype_columns=None):
+    def compute(cls, real_data, synthetic_data):
         """Compute the table format metric score.
 
         Args:
             real_data (pandas.DataFrame):
                 The real data.
             synthetic_data (pandas.DataFrame):
                 The synthetic data.
-            ignore_dtype_columns (list[str]):
-                List of column names to ignore when comparing data types.
-                Defaults to ``None``.
 
         Returns:
             float:
                 The metric score.
         """
-        return cls.compute_breakdown(real_data, synthetic_data, ignore_dtype_columns)['score']
+        return cls.compute_breakdown(real_data, synthetic_data)['score']
diff --git a/tests/unit/reports/single_table/_properties/test_structure.py b/tests/unit/reports/single_table/_properties/test_structure.py
@@ -38,7 +38,7 @@ def test__generate_details(self, table_format_mock):
 
         # Assert
         table_format_mock.assert_called_once_with(
-            real_data, synthetic_data, ignore_dtype_columns=[]
+            real_data, synthetic_data,
         )
 
         expected_details = pd.DataFrame({
@@ -78,7 +78,7 @@ def test__generate_details_with_id_column(self, table_format_mock):
 
         # Assert
         table_format_mock.assert_called_once_with(
-            real_data, synthetic_data, ignore_dtype_columns=['id']
+            real_data, synthetic_data
         )
 
         expected_details = pd.DataFrame({

diff --git a/tests/unit/single_table/test_table_structure.py b/tests/unit/single_table/test_table_structure.py
@@ -109,30 +109,6 @@ def test_compute_breakdown_with_invalid_dtypes(self, real_data):
         expected_result = {'score': 0.6}
         assert result == expected_result
 
-    def test_compute_breakdown_ignore_dtype_columns(self, real_data):
-        """Test the ``compute_breakdown`` method when ignore_dtype_columns is set."""
-        # Setup
-        synthetic_data = pd.DataFrame({
-            'col_1': [3.0, 2.0, 1.0, 4.0, 5.0],
-            'col_2': ['A', 'B', 'C', 'D', 'E'],
-            'col_3': [True, False, True, False, True],
-            'col_4': [
-                '2020-01-11', '2020-01-02', '2020-01-03', '2020-01-04', '2020-01-05'
-            ],
-            'col_5': [4.0, 2.0, 3.0, 4.0, 5.0],
-        })
-
-        metric = TableStructure()
-
-        # Run
-        result = metric.compute_breakdown(
-            real_data, synthetic_data, ignore_dtype_columns=['col_4']
-        )
-
-        # Assert
-        expected_result = {'score': 0.8}
-        assert result == expected_result
-
     def test_compute_breakdown_multiple_error(self, real_data):
         """Test the ``compute_breakdown`` method with the different failure modes."""
         synthetic_data = pd.DataFrame({
@@ -174,5 +150,5 @@ def test_compute(self, compute_breakdown_mock, real_data):
         result = metric.compute(real_data, synthetic_data)
 
         # Assert
-        compute_breakdown_mock.assert_called_once_with(real_data, synthetic_data, None)
+        compute_breakdown_mock.assert_called_once_with(real_data, synthetic_data)
         assert result == 0.6