From 463b8187d4637fcdff65e3de112b4ed92559d393 Mon Sep 17 00:00:00 2001 From: R-Palazzo <116157184+R-Palazzo@users.noreply.github.com> Date: Mon, 13 Nov 2023 14:48:40 -0600 Subject: [PATCH] KeyUniqueness metric should only be applied to primary and alternate keys (#511) --- .../single_table/_properties/data_validity.py | 1 - .../_properties/test_data_validity.py | 4 +- .../multi_table/test_diagnostic_report.py | 40 +++++++++---------- 3 files changed, 21 insertions(+), 24 deletions(-) diff --git a/sdmetrics/reports/single_table/_properties/data_validity.py b/sdmetrics/reports/single_table/_properties/data_validity.py index 3cd05e2f..8d96a12d 100644 --- a/sdmetrics/reports/single_table/_properties/data_validity.py +++ b/sdmetrics/reports/single_table/_properties/data_validity.py @@ -23,7 +23,6 @@ class DataValidity(BaseSingleTableProperty): 'datetime': BoundaryAdherence, 'categorical': CategoryAdherence, 'boolean': CategoryAdherence, - 'id': KeyUniqueness, } def _generate_details(self, real_data, synthetic_data, metadata, progress_bar=None): diff --git a/tests/integration/reports/multi_table/_properties/test_data_validity.py b/tests/integration/reports/multi_table/_properties/test_data_validity.py index 00074ec9..3f5ec7cc 100644 --- a/tests/integration/reports/multi_table/_properties/test_data_validity.py +++ b/tests/integration/reports/multi_table/_properties/test_data_validity.py @@ -18,7 +18,7 @@ def test_end_to_end(self): result = column_shapes.get_score(real_data, synthetic_data, metadata) # Assert - assert result == 0.9444444444444445 + assert result == 0.9636363636363637 def test_with_progress_bar(self): """Test that the progress bar is correctly updated.""" @@ -35,5 +35,5 @@ def test_with_progress_bar(self): result = column_shapes.get_score(real_data, synthetic_data, metadata, progress_bar) # Assert - assert result == 0.9444444444444445 + assert result == 0.9636363636363637 assert mock_update.call_count == num_columns diff --git a/tests/integration/reports/multi_table/test_diagnostic_report.py b/tests/integration/reports/multi_table/test_diagnostic_report.py index 7e2b8923..e329a8c8 100644 --- a/tests/integration/reports/multi_table/test_diagnostic_report.py +++ b/tests/integration/reports/multi_table/test_diagnostic_report.py @@ -17,7 +17,7 @@ def test_end_to_end(self): results = report.get_score() # Assert - assert results == 0.9814814814814815 + assert results == 0.9878787878787879 def test_end_to_end_with_object_datetimes(self): """Test the ``DiagnosticReport`` report with object datetimes.""" @@ -38,9 +38,9 @@ def test_end_to_end_with_object_datetimes(self): # Assert expected_dataframe = pd.DataFrame({ 'Property': ['Data Validity', 'Data Structure', 'Relationship Validity'], - 'Score': [0.9444444444444445, 1.0, 1.0] + 'Score': [0.9636363636363637, 1.0, 1.0] }) - assert results == 0.9814814814814815 + assert results == 0.9878787878787879 pd.testing.assert_frame_equal(properties, expected_dataframe) def test_end_to_end_with_metrics_failing(self): @@ -62,37 +62,36 @@ def test_end_to_end_with_metrics_failing(self): # Assert expected_properties = pd.DataFrame({ 'Property': ['Data Validity', 'Data Structure', 'Relationship Validity'], - 'Score': [0.9677777777777777, 0.7833333333333333, 1.0] + 'Score': [1.0, 0.7833333333333333, 1.0] }) expected_details = pd.DataFrame({ 'Table': [ 'users', 'users', 'users', 'users', 'sessions', 'sessions', 'sessions', - 'sessions', 'transactions', 'transactions', 'transactions', 'transactions', - 'transactions' + 'transactions', 'transactions', 'transactions', 'transactions', ], 'Column': [ - 'user_id', 'country', 'gender', 'age', 'session_id', 'user_id', 'device', - 'os', 'transaction_id', 'session_id', 'timestamp', 'amount', 'approved' + 'user_id', 'country', 'gender', 'age', 'session_id', 'device', + 'os', 'transaction_id', 'timestamp', 'amount', 'approved' ], 'Metric': [ 'KeyUniqueness', 'CategoryAdherence', 'CategoryAdherence', 'BoundaryAdherence', - 'KeyUniqueness', 'KeyUniqueness', 'CategoryAdherence', 'CategoryAdherence', - 'KeyUniqueness', 'KeyUniqueness', 'BoundaryAdherence', 'BoundaryAdherence', + 'KeyUniqueness', 'CategoryAdherence', 'CategoryAdherence', + 'KeyUniqueness', 'BoundaryAdherence', 'BoundaryAdherence', 'CategoryAdherence' ], 'Score': [ - 1.0, 1.0, 1.0, np.nan, 1.0, 0.7777777777777778, 1.0, 1.0, 1.0, 0.9, + 1.0, 1.0, 1.0, np.nan, 1.0, 1.0, 1.0, 1.0, np.nan, np.nan, 1.0 ], 'Error': [ None, None, None, "TypeError: '<=' not supported between instances of 'str' and 'int'", - None, None, None, None, None, None, + None, None, None, None, "TypeError: '<=' not supported between instances of 'str' and 'Timestamp'", "TypeError: '<=' not supported between instances of 'str' and 'float'", None ] }) - assert results == 0.9170370370370371 + assert results == 0.9277777777777777 pd.testing.assert_frame_equal( report.get_properties(), expected_properties, check_exact=False, atol=2e-2 ) @@ -111,7 +110,7 @@ def test_get_properties(self): # Assert expected_dataframe = pd.DataFrame({ 'Property': ['Data Validity', 'Data Structure', 'Relationship Validity'], - 'Score': [0.9444444444444445, 1.0, 1.0] + 'Score': [0.9636363636363637, 1.0, 1.0] }) pd.testing.assert_frame_equal(properties, expected_dataframe) @@ -130,21 +129,20 @@ def test_get_details(self): expected_dataframe = pd.DataFrame({ 'Table': [ 'users', 'users', 'users', 'users', 'sessions', 'sessions', 'sessions', - 'sessions', 'transactions', 'transactions', 'transactions', 'transactions', - 'transactions' + 'transactions', 'transactions', 'transactions', 'transactions', ], 'Column': [ - 'user_id', 'country', 'gender', 'age', 'session_id', 'user_id', 'device', - 'os', 'transaction_id', 'session_id', 'timestamp', 'amount', 'approved' + 'user_id', 'country', 'gender', 'age', 'session_id', 'device', + 'os', 'transaction_id', 'timestamp', 'amount', 'approved' ], 'Metric': [ 'KeyUniqueness', 'CategoryAdherence', 'CategoryAdherence', 'BoundaryAdherence', - 'KeyUniqueness', 'KeyUniqueness', 'CategoryAdherence', 'CategoryAdherence', - 'KeyUniqueness', 'KeyUniqueness', 'BoundaryAdherence', 'BoundaryAdherence', + 'KeyUniqueness', 'CategoryAdherence', 'CategoryAdherence', + 'KeyUniqueness', 'BoundaryAdherence', 'BoundaryAdherence', 'CategoryAdherence' ], 'Score': [ - 1.0, 1.0, 1.0, 0.9, 1.0, 0.7777777777777778, 1.0, 1.0, 1.0, 0.9, 0.9, + 1.0, 1.0, 1.0, 0.9, 1.0, 1.0, 1.0, 1.0, 0.9, 0.8, 1.0 ] })