Skip to content

Commit

Permalink
Add alternate keys to dropped columns
Browse files Browse the repository at this point in the history
  • Loading branch information
lajohn4747 committed Nov 22, 2023
1 parent 150f3a5 commit 33ee1c3
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 5 deletions.
3 changes: 2 additions & 1 deletion sdmetrics/single_table/detection/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from sdmetrics.errors import IncomputableMetricError
from sdmetrics.goal import Goal
from sdmetrics.single_table.base import SingleTableMetric
from sdmetrics.utils import HyperTransformer
from sdmetrics.utils import HyperTransformer, get_alternate_keys

LOGGER = logging.getLogger(__name__)

Expand Down Expand Up @@ -73,6 +73,7 @@ def compute(cls, real_data, synthetic_data, metadata=None):

if metadata is not None:
drop_columns = []
drop_columns.extend(get_alternate_keys(metadata))
if 'columns' in metadata:
for column in metadata['columns']:
if ('primary_key' in metadata and
Expand Down
12 changes: 8 additions & 4 deletions tests/unit/single_table/detection/test_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,8 @@ def test_ignore_keys_detection_metrics(self, fit_transform_mock, transform_mock)
'col3': [5, 6, 7, 8, 9],
'ID_3': ['a', 'b', 'c', 'd', 'e'],
'blob': ['Hello world!', 'Hello world!', 'This is SDV', 'This is SDV', 'Hello world!'],
'col4': [1, 3, 9, 2, 1]
'col4': [1, 3, 9, 2, 1],
'col5': [10, 20, 30, 40, 50]
})
synthetic_data = pd.DataFrame({
'ID_1': [1, 3, 4, 2, 2],
Expand All @@ -94,7 +95,8 @@ def test_ignore_keys_detection_metrics(self, fit_transform_mock, transform_mock)
'col3': [55, 66, 77, 88, 99],
'ID_3': ['a', 'b', 'e', 'd', 'c'],
'blob': ['Hello world!', 'Hello world!', 'This is SDV', 'This is SDV', 'Hello world!'],
'col4': [4, 1, 3, 1, 9]
'col4': [4, 1, 3, 1, 9],
'col5': [10, 20, 30, 40, 50]
})
metadata = {
'columns': {
Expand All @@ -105,9 +107,11 @@ def test_ignore_keys_detection_metrics(self, fit_transform_mock, transform_mock)
'col3': {'sdtype': 'numerical'},
'ID_3': {'sdtype': 'id'},
'blob': {'sdtype': 'text'},
'col4': {'sdtype': 'numerical', 'pii': False}
'col4': {'sdtype': 'numerical', 'pii': False},
'col5': {'sdtype': 'numerical'}
},
'primary_key': {'ID_1', 'ID_2'}
'primary_key': {'ID_1', 'ID_2'},
'alternate_keys': ['col5']
}

expected_real_dataframe = pd.DataFrame({
Expand Down

0 comments on commit 33ee1c3

Please sign in to comment.