Skip to content

Commit

Permalink
Update DiagnosticReport to calculate base correctness of synthetic …
Browse files Browse the repository at this point in the history
…data (#496)
  • Loading branch information
R-Palazzo authored Nov 6, 2023
1 parent 383551b commit 340b8fe
Show file tree
Hide file tree
Showing 15 changed files with 252 additions and 854 deletions.
151 changes: 0 additions & 151 deletions sdmetrics/reports/_results_handler.py

This file was deleted.

32 changes: 30 additions & 2 deletions sdmetrics/reports/base_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ def __init__(self):
self._overall_score = None
self.is_generated = False
self._properties = {}
self._results_handler = None
self.report_info = {
'report_type': self.__class__.__name__,
'generated_date': None,
Expand Down Expand Up @@ -91,6 +90,25 @@ def convert_datetimes(real_data, synthetic_data, metadata):
except Exception:
continue

def _print_results(self, verbose):
"""Print the results.
Args:
verbose (bool):
Whether or not to print results to std.out.
"""
if verbose:
sys.stdout.write(
f'\nOverall Score: {round(self._overall_score * 100, 2)}%\n\n'
)
sys.stdout.write('Properties:\n')

for property_name, property_instance in self._properties.items():
property_score = round(property_instance._compute_average() * 100, 2)
sys.stdout.write(
f'- {property_name}: {property_score}%\n'
)

def generate(self, real_data, synthetic_data, metadata, verbose=True):
"""Generate report.
Expand Down Expand Up @@ -152,7 +170,7 @@ def generate(self, real_data, synthetic_data, metadata, verbose=True):
end_time = time.time()
self.report_info['generation_time'] = end_time - start_time

self._handle_results(verbose)
self._print_results(verbose)

def _check_property_name(self, property_name):
"""Check that the given property name is valid.
Expand All @@ -168,6 +186,16 @@ def _check_property_name(self, property_name):
f" Valid property names are '{valid_property_names}'."
)

def get_score(self):
"""Return the overall score.
Returns:
float
The overall score.
"""
self._check_report_generated()
return self._overall_score

def get_info(self):
"""Get the information about the report."""
return deepcopy(self.report_info)
Expand Down
27 changes: 5 additions & 22 deletions sdmetrics/reports/multi_table/diagnostic_report.py
Original file line number Diff line number Diff line change
@@ -1,36 +1,19 @@
"""Multi table diagnostic report."""
from copy import deepcopy

from sdmetrics.reports._results_handler import DiagnosticReportResultsHandler
from sdmetrics.reports.multi_table._properties import Boundary, Coverage, Synthesis
from sdmetrics.reports.multi_table._properties import DataValidity, RelationshipValidity, Structure
from sdmetrics.reports.multi_table.base_multi_table_report import BaseMultiTableReport


class DiagnosticReport(BaseMultiTableReport):
"""Multi table diagnostic report.
This class creates a diagnostic report for multi-table data. It calculates the diagnostic
score along three properties - Synthesis, Coverage, and Boundary.
score along three properties - Relationship Validity, Data Structure, and Data Validity.
"""

def __init__(self):
super().__init__()
self._properties = {
'Coverage': Coverage(),
'Boundary': Boundary(),
'Synthesis': Synthesis()
'Data Validity': DataValidity(),
'Data Structure': Structure(),
'Relationship Validity': RelationshipValidity()
}
self._results_handler = DiagnosticReportResultsHandler()

def _handle_results(self, verbose):
self._results_handler.print_results(self._properties, verbose)

def get_results(self):
"""Return the diagnostic results.
Returns:
dict
The diagnostic results.
"""
self._check_report_generated()
return deepcopy(self._results_handler.results)
15 changes: 0 additions & 15 deletions sdmetrics/reports/multi_table/quality_report.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
"""Multi table quality report."""
from sdmetrics.reports._results_handler import QualityReportResultsHandler
from sdmetrics.reports.multi_table._properties import (
Cardinality, ColumnPairTrends, ColumnShapes, InterTableTrends)
from sdmetrics.reports.multi_table.base_multi_table_report import BaseMultiTableReport
Expand All @@ -20,17 +19,3 @@ def __init__(self):
'Cardinality': Cardinality(),
'Intertable Trends': InterTableTrends()
}
self._results_handler = QualityReportResultsHandler()

def _handle_results(self, verbose):
self._results_handler.print_results(self._properties, self._overall_score, verbose)

def get_score(self):
"""Return the overall quality score.
Returns:
float
The overall quality score.
"""
self._check_report_generated()
return self._overall_score
29 changes: 4 additions & 25 deletions sdmetrics/reports/single_table/diagnostic_report.py
Original file line number Diff line number Diff line change
@@ -1,39 +1,18 @@
"""Single table diagnostic report."""
import logging
from copy import deepcopy

from sdmetrics.reports._results_handler import DiagnosticReportResultsHandler
from sdmetrics.reports.base_report import BaseReport
from sdmetrics.reports.single_table._properties import Boundary, Coverage, Synthesis

LOGGER = logging.getLogger(__name__)
from sdmetrics.reports.single_table._properties import DataValidity, Structure


class DiagnosticReport(BaseReport):
"""Single table diagnostic report.
This class creates a diagnostic report for single-table data. It calculates the diagnostic
score along three properties - Synthesis, Coverage, and Boundary.
score along two properties - Data Structure and Data Validity.
"""

def __init__(self):
super().__init__()
self._properties = {
'Coverage': Coverage(),
'Boundary': Boundary(),
'Synthesis': Synthesis()
'Data Validity': DataValidity(),
'Data Structure': Structure(),
}
self._results_handler = DiagnosticReportResultsHandler()

def _handle_results(self, verbose):
self._results_handler.print_results(self._properties, verbose)

def get_results(self):
"""Return the diagnostic results.
Returns:
dict
The diagnostic results.
"""
self._check_report_generated()
return deepcopy(self._results_handler.results)
15 changes: 0 additions & 15 deletions sdmetrics/reports/single_table/quality_report.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
"""Single table quality report."""
from sdmetrics.reports._results_handler import QualityReportResultsHandler
from sdmetrics.reports.base_report import BaseReport
from sdmetrics.reports.single_table._properties import ColumnPairTrends, ColumnShapes

Expand All @@ -17,17 +16,3 @@ def __init__(self):
'Column Shapes': ColumnShapes(),
'Column Pair Trends': ColumnPairTrends()
}
self._results_handler = QualityReportResultsHandler()

def _handle_results(self, verbose):
self._results_handler.print_results(self._properties, self._overall_score, verbose)

def get_score(self):
"""Return the overall quality score.
Returns:
float
The overall quality score.
"""
self._check_report_generated()
return self._overall_score
Loading

0 comments on commit 340b8fe

Please sign in to comment.