diff --git a/tests/integration/reports/multi_table/_properties/test_cardinality.py b/tests/integration/reports/multi_table/_properties/test_cardinality.py index 7b45d778..dbcd3381 100644 --- a/tests/integration/reports/multi_table/_properties/test_cardinality.py +++ b/tests/integration/reports/multi_table/_properties/test_cardinality.py @@ -1,4 +1,5 @@ """Test multi-table cardinality properties.""" +import pandas as pd from plotly.graph_objs._figure import Figure from sdmetrics.demos import load_multi_table_demo @@ -18,3 +19,74 @@ def test_cardinality_property(): # Assert assert score == 0.8 assert isinstance(figure, Figure) + + +def test_with_multi_foreign_key(): + """Test the ``Cardinality`` with multiple foreign keys.""" + # Setup + real_data = { + 'bank': pd.DataFrame({ + 'primary_key': [1, 2, 3, 4, 5], + 'category': ['a', 'b', 'c', 'd', 'e'], + 'numerical': [1, 2, 3, 4, 5], + }), + 'transactions': pd.DataFrame({ + 'f_key_1': [1, 2, 3, 2, 1], + 'f_key_2': [1, 5, 3, 2, 4], + }) + } + + synthetic_data = { + 'bank': pd.DataFrame({ + 'primary_key': [1, 2, 3, 4, 5], + 'category': ['a', 'b', 'c', 'd', 'e'], + 'numerical': [1, 2, 3, 4, 5] + }), + 'transactions': pd.DataFrame({ + 'f_key_1': [5, 2, 3, 4, 1], + 'f_key_2': [1, 5, 5, 2, 4], + }) + } + + metadata = { + 'tables': { + 'bank': { + 'primary_key': 'primary_key', + 'columns': { + 'primary_key': {'sdtype': 'id'}, + 'category': {'sdtype': 'categorical'}, + 'numerical': {'sdtype': 'numerical'} + } + }, + 'transactions': { + 'columns': { + 'f_key_1': {'sdtype': 'id'}, + 'f_key_2': {'sdtype': 'id'} + } + } + }, + 'relationships': [ + { + 'parent_table_name': 'bank', + 'child_table_name': 'transactions', + 'parent_primary_key': 'primary_key', + 'child_foreign_key': 'f_key_1' + }, + { + 'parent_table_name': 'bank', + 'child_table_name': 'transactions', + 'parent_primary_key': 'primary_key', + 'child_foreign_key': 'f_key_2' + } + ] + } + + cardinality_property = Cardinality() + + # Run + cardinality_property.get_score(real_data, synthetic_data, metadata) + fig = cardinality_property.get_visualization('bank') + + # Assert + expected_labels = ['transactions (f_key_1) → bank', 'transactions (f_key_2) → bank'] + assert fig.data[0].x.tolist() == expected_labels diff --git a/tests/integration/reports/multi_table/test_quality_report.py b/tests/integration/reports/multi_table/test_quality_report.py index eec51cea..f1408b88 100644 --- a/tests/integration/reports/multi_table/test_quality_report.py +++ b/tests/integration/reports/multi_table/test_quality_report.py @@ -138,6 +138,7 @@ def test_multi_table_quality_report(): expected_df_2 = pd.DataFrame({ 'Child Table': ['table2'], 'Parent Table': ['table1'], + 'Foreign Key': ['col6'], 'Metric': ['CardinalityShapeSimilarity'], 'Score': [0.75], }) diff --git a/tests/unit/reports/multi_table/_properties/test_cardinality.py b/tests/unit/reports/multi_table/_properties/test_cardinality.py index 56e55e2c..d00550a2 100644 --- a/tests/unit/reports/multi_table/_properties/test_cardinality.py +++ b/tests/unit/reports/multi_table/_properties/test_cardinality.py @@ -58,8 +58,16 @@ def test_get_score(self, mock_cardinalityshapesimilarity): cardinality._compute_average = mock_compute_average progress_bar = Mock() relationships = [ - {'child_table_name': 'users_child', 'parent_table_name': 'users_parent'}, - {'child_table_name': 'sessions_child', 'parent_table_name': 'sessions_parent'} + { + 'child_table_name': 'users_child', + 'parent_table_name': 'users_parent', + 'child_foreign_key': 'users_child_id', + }, + { + 'child_table_name': 'sessions_child', + 'parent_table_name': 'sessions_parent', + 'child_foreign_key': 'sessions_child_id', + } ] metadata = {'relationships': relationships} @@ -83,8 +91,16 @@ def test_get_score_raises_errors(self, mock_cardinalityshapesimilarity): cardinality = Cardinality() progress_bar = Mock() relationships = [ - {'child_table_name': 'users_child', 'parent_table_name': 'users_parent'}, - {'child_table_name': 'sessions_child', 'parent_table_name': 'sessions_parent'} + { + 'child_table_name': 'users_child', + 'parent_table_name': 'users_parent', + 'child_foreign_key': 'users_child_id', + }, + { + 'child_table_name': 'sessions_child', + 'parent_table_name': 'sessions_parent', + 'child_foreign_key': 'sessions_child_id', + } ] metadata = {'relationships': relationships} @@ -95,6 +111,7 @@ def test_get_score_raises_errors(self, mock_cardinalityshapesimilarity): expected_details_property = pd.DataFrame({ 'Child Table': ['users_child', 'sessions_child'], 'Parent Table': ['users_parent', 'sessions_parent'], + 'Foreign Key': ['users_child_id', 'sessions_child_id'], 'Metric': ['CardinalityShapeSimilarity', 'CardinalityShapeSimilarity'], 'Score': [np.nan, np.nan], 'Error': ['ValueError: Users error', 'ValueError: Sessions error'] @@ -105,8 +122,8 @@ def test_get_score_raises_errors(self, mock_cardinalityshapesimilarity): progress_bar.update.assert_called() assert progress_bar.update.call_count == 2 - def test_get_details_for_table_name(self): - """Test the ``_get_details_for_table_name`` method. + def test_get_details_with_table_name(self): + """Test the ``get_details`` method. Test that the method returns the correct details for the given table name, either from the child or parent table. @@ -116,19 +133,21 @@ def test_get_details_for_table_name(self): cardinality.details = pd.DataFrame({ 'Child Table': ['users_child', 'sessions_child'], 'Parent Table': ['users_parent', 'sessions_parent'], + 'Foreign Key': ['users_child_id', 'sessions_child_id'], 'Metric': ['CardinalityShapeSimilarity', 'SomeOtherMetric'], 'Score': [1.0, 0.5], 'Error': [None, 'Some error'] }) # Run - details_users_child = cardinality._get_details_for_table_name('users_child') - details_sessions_parent = cardinality._get_details_for_table_name('sessions_parent') + details_users_child = cardinality.get_details('users_child') + details_sessions_parent = cardinality.get_details('sessions_parent') # Assert for child table assert details_users_child.equals(pd.DataFrame({ 'Child Table': ['users_child'], 'Parent Table': ['users_parent'], + 'Foreign Key': ['users_child_id'], 'Metric': ['CardinalityShapeSimilarity'], 'Score': [1.0], 'Error': [None] @@ -138,31 +157,12 @@ def test_get_details_for_table_name(self): assert details_sessions_parent.equals(pd.DataFrame({ 'Child Table': ['sessions_child'], 'Parent Table': ['sessions_parent'], + 'Foreign Key': ['sessions_child_id'], 'Metric': ['SomeOtherMetric'], 'Score': [0.5], 'Error': ['Some error'] }, index=[1])) - def test_get_details(self): - """Test the ``get_details`` method. - - Test that the method returns the correct details for the given property and table name. - """ - # Setup - mock__get_details_for_table_name = Mock(return_value='Details for table name') - cardinality = Cardinality() - cardinality.details = pd.DataFrame({'a': ['b']}) - cardinality._get_details_for_table_name = mock__get_details_for_table_name - - # Run - details = cardinality.get_details('table_name') - entire_details = cardinality.get_details() - - # Assert - assert details == 'Details for table name' - pd.testing.assert_frame_equal(entire_details, pd.DataFrame({'a': ['b']})) - mock__get_details_for_table_name.assert_called_once_with('table_name') - def test_get_table_relationships_plot(self): """Test the ``_get_table_relationships_plot`` method. @@ -173,6 +173,7 @@ def test_get_table_relationships_plot(self): instance.details = pd.DataFrame({ 'Child Table': ['users_child', 'sessions_child'], 'Parent Table': ['users_parent', 'sessions_parent'], + 'Foreign Key': ['users_child_id', 'sessions_child_id'], 'Metric': ['CardinalityShapeSimilarity', 'SomeOtherMetric'], 'Score': [1.0, 0.5], 'Error': [None, 'Some error'] @@ -184,7 +185,7 @@ def test_get_table_relationships_plot(self): # Assert assert isinstance(fig, Figure) - expected_x = ['users_child → users_parent'] + expected_x = ['users_child (users_child_id) → users_parent'] expected_y = [1.0] expected_title = 'Table Relationships (Average Score=1.0)' @@ -195,7 +196,7 @@ def test_get_table_relationships_plot(self): def test_get_visualization(self): """Test the ``get_visualization`` method.""" # Setup - mock__get_table_relationships_plot = Mock(return_value='Table relationships plot') + mock__get_table_relationships_plot = Mock(side_effect=[Figure()]) cardinality = Cardinality() cardinality._get_table_relationships_plot = mock__get_table_relationships_plot @@ -203,4 +204,5 @@ def test_get_visualization(self): fig = cardinality.get_visualization('table_name') # Assert - assert fig == 'Table relationships plot' + assert isinstance(fig, Figure) + mock__get_table_relationships_plot.assert_called_once_with('table_name')