Renaming implementation classes to -Impl and info classes to actual name

sodadata · Feb 15, 2025 · 4a92875 · 4a92875
1 parent ac0740b
commit 4a92875
Show file tree

Hide file tree

Showing 12 changed files with 353 additions and 398 deletions.
diff --git a/soda-core/src/soda_core/cli/soda.py b/soda-core/src/soda_core/cli/soda.py
@@ -31,7 +31,7 @@ def configure_logging():
         force=True,  # Override any previously set handlers.
         # https://docs.python.org/3/library/logging.html#logrecord-attributes
         # %(name)s
-        format="%(asctime)s [%(levelname)s] %(message)s",
+        format="%(message)s",
         handlers=[logging.StreamHandler(sys.stdout)],
     )
 
@@ -115,7 +115,8 @@ def create_data_source(data_source_file_path: str, data_source_type: str):
     print(f"\u2705 Created data source file '{data_source_file_path}'")
 
 
-def test_data_source(data_source_file_path: str):
+# name has underscore otherwise pycharm thinks this is a unit test file
+def _test_data_source(data_source_file_path: str):
     print(f"Testing data source configuration file {data_source_file_path}")
     from soda_core.common.data_source import DataSource
     data_source: DataSource = DataSource.from_file(data_source_file_path)
@@ -144,7 +145,8 @@ def create_soda_cloud(soda_cloud_file_path: str):
     print(f"\u2705 Created Soda Cloud configuration file '{soda_cloud_file_path}'")
 
 
-def test_soda_cloud(soda_cloud_file_path: str):
+# name has underscore otherwise pycharm thinks this is a unit test file
+def _test_soda_cloud(soda_cloud_file_path: str):
     from soda_core.common.soda_cloud import SodaCloud
     print(f"Testing soda cloud file {soda_cloud_file_path}")
     soda_cloud_yaml_source: YamlSource = YamlSource.from_file_path(soda_cloud_file_path)
@@ -263,11 +265,11 @@ def main():
         elif args.command == "create-data-source":
             create_data_source(args.file, args.type)
         elif args.command == "test-data-source":
-            test_data_source(args.data_source)
+            _test_data_source(args.data_source)
         elif args.command == "create-soda-cloud":
             create_soda_cloud(args.file)
         elif args.command == "test-soda-cloud":
-            test_soda_cloud(args.soda_cloud)
+            _test_soda_cloud(args.soda_cloud)
         else:
             cli_parser.print_help()
     except Exception as e:

diff --git a/soda-core/src/soda_core/common/soda_cloud.py b/soda-core/src/soda_core/common/soda_cloud.py
@@ -18,7 +18,7 @@
 from soda_core.common.version import SODA_CORE_VERSION
 from soda_core.common.yaml import YamlFileContent, YamlObject
 from soda_core.contracts.contract_verification import ContractResult, \
-    CheckResult, CheckOutcome, ThresholdInfo
+    CheckResult, CheckOutcome, Threshold
 
 
 class SodaCloud:
@@ -229,7 +229,7 @@ def build_check_result_cloud_dict(self, check_result: CheckResult) -> dict:
             "source": "soda-contract"
         }
         if check_result.metric_value is not None and check_result.check.threshold is not None:
-            t: ThresholdInfo = check_result.check.threshold
+            t: Threshold = check_result.check.threshold
             fail_threshold: dict = {}
             if t.must_be_less_than_or_equal is not None:
                 fail_threshold["greaterThan"] = t.must_be_less_than_or_equal

diff --git a/soda-core/src/soda_core/contracts/contract_verification.py b/soda-core/src/soda_core/contracts/contract_verification.py
@@ -196,7 +196,7 @@ class YamlFileContentInfo:
 
 
 @dataclass
-class ContractInfo:
+class Contract:
     data_source_name: str
     dataset_prefix: list[str]
     dataset_name: str
@@ -206,20 +206,21 @@ class ContractInfo:
 
 @dataclass
 class DataSourceInfo:
+    # TODO rename to DataSource (but first rename DataSource to DataSourceImpl?)
     name: str
     type: str
 
 
 @dataclass
-class ThresholdInfo:
+class Threshold:
     must_be_greater_than: Number | None = None
     must_be_greater_than_or_equal: Number | None = None
     must_be_less_than: Number | None = None
     must_be_less_than_or_equal: Number | None = None
 
 
 @dataclass
-class CheckInfo:
+class Check:
     column_name: str | None
     type: str
     name: str # Short description used in UI. Required. Between 1 and 4000 chars.  User defined with key 'name' or auto-generated.
@@ -228,21 +229,21 @@ class CheckInfo:
     column_name: str | None
     contract_file_line: int
     contract_file_column: int
-    threshold: ThresholdInfo | None
+    threshold: Threshold | None
 
 
 class CheckResult(ABC):
 
     def __init__(
         self,
-        contract: ContractInfo,
-        check: CheckInfo,
+        contract: Contract,
+        check: Check,
         metric_value: Number | None,
         outcome: CheckOutcome,
         diagnostic_lines: list[str]
     ):
-        self.contract: ContractInfo = contract
-        self.check: CheckInfo = check
+        self.contract: Contract = contract
+        self.check: Check = check
         self.metric_value: Number | None = metric_value
         self.outcome: CheckOutcome = outcome
         self.diagnostic_lines: list[str] = diagnostic_lines
@@ -278,7 +279,7 @@ class ContractResult:
 
     def __init__(
             self,
-            contract_info: ContractInfo,
+            contract_info: Contract,
             data_source_info: DataSourceInfo,
             data_timestamp: datetime | None,
             started_timestamp: datetime,
@@ -290,7 +291,7 @@ def __init__(
             check_results: list[CheckResult],
             logs: Logs
     ):
-        self.contract_info: ContractInfo = contract_info
+        self.contract_info: Contract = contract_info
         # TODO move to contract info or use the data_source_info
         self.data_source_name: str = data_source_name
         # TODO move to contract info
@@ -312,7 +313,7 @@ def failed(self) -> bool:
         Returns true if there are checks that have failed.
         Ignores execution errors in the logs.
         """
-        return any(check.outcome == CheckOutcome.FAILED for check in self.check_results)
+        return any(check_result.outcome == CheckOutcome.FAILED for check_result in self.check_results)
 
     def passed(self) -> bool:
         """

diff --git a/soda-core/src/soda_core/contracts/impl/check_types/invalidity_check.py b/soda-core/src/soda_core/contracts/impl/check_types/invalidity_check.py
@@ -3,13 +3,13 @@
 from soda_core.common.data_source import DataSource
 from soda_core.common.data_source_results import QueryResult
 from soda_core.common.sql_dialect import *
-from soda_core.contracts.contract_verification import CheckResult, CheckOutcome, Measurement, CheckInfo, ContractInfo
+from soda_core.contracts.contract_verification import CheckResult, CheckOutcome, Measurement, Check, Contract
 from soda_core.contracts.impl.check_types.invalidity_check_yaml import InvalidCheckYaml
 from soda_core.contracts.impl.check_types.missing_check_yaml import MissingCheckYaml
 from soda_core.contracts.impl.check_types.row_count_check import RowCountMetric
-from soda_core.contracts.impl.contract_verification_impl import MetricsResolver, Check, AggregationMetric, Threshold, \
-    ThresholdType, DerivedPercentageMetric, CheckParser, Contract, Column, MissingAndValidity, MissingAndValidityCheck, \
-    Metric, Query, ValidReferenceData, MeasurementValues
+from soda_core.contracts.impl.contract_verification_impl import MetricsResolver, CheckImpl, AggregationMetricImpl, ThresholdImpl, \
+    ThresholdType, DerivedPercentageMetricImpl, CheckParser, ContractImpl, ColumnImpl, MissingAndValidity, MissingAndValidityCheckImpl, \
+    MetricImpl, Query, ValidReferenceData, MeasurementValues
 from soda_core.contracts.impl.contract_yaml import ColumnYaml, CheckYaml
 
 
@@ -20,79 +20,79 @@ def get_check_type_names(self) -> list[str]:
 
     def parse_check(
         self,
-        contract: Contract,
-        column: Column | None,
+        contract_impl: ContractImpl,
+        column_impl: ColumnImpl | None,
         check_yaml: MissingCheckYaml,
         metrics_resolver: MetricsResolver,
-    ) -> Check | None:
+    ) -> CheckImpl | None:
         return InvalidCheck(
-            contract=contract,
-            column=column,
+            contract_impl=contract_impl,
+            column_impl=column_impl,
             check_yaml=check_yaml,
             metrics_resolver=metrics_resolver,
         )
 
 
-class InvalidCheck(MissingAndValidityCheck):
+class InvalidCheck(MissingAndValidityCheckImpl):
 
     def __init__(
         self,
-        contract: Contract,
-        column: Column,
+        contract_impl: ContractImpl,
+        column_impl: ColumnImpl,
         check_yaml: InvalidCheckYaml,
         metrics_resolver: MetricsResolver,
     ):
         super().__init__(
-            contract=contract,
-            column=column,
+            contract_impl=contract_impl,
+            column_impl=column_impl,
             check_yaml=check_yaml,
         )
-        self.threshold = Threshold.create(
+        self.threshold = ThresholdImpl.create(
             check_yaml=check_yaml,
-            default_threshold=Threshold(type=ThresholdType.SINGLE_COMPARATOR,must_be=0)
+            default_threshold=ThresholdImpl(type=ThresholdType.SINGLE_COMPARATOR, must_be=0)
         )
 
         # TODO create better support in class hierarchy for common vs specific stuff.  name is common.  see other check type impls
-        metric_name: str = Threshold.get_metric_name(check_yaml.type, column=column)
+        metric_name: str = ThresholdImpl.get_metric_name(check_yaml.type, column_impl=column_impl)
         self.name = check_yaml.name if check_yaml.name else (
             self.threshold.get_assertion_summary(metric_name=metric_name) if self.threshold
             else f"{check_yaml.type} (invalid threshold)"
         )
 
-        self.invalid_count_metric: Metric | None = None
+        self.invalid_count_metric_impl: MetricImpl | None = None
         if self.missing_and_validity.has_reference_data():
             # noinspection PyTypeChecker
-            self.invalid_count_metric = self._resolve_metric(InvalidReferenceCountMetric(
-                contract=contract,
-                column=column,
+            self.invalid_count_metric_impl = self._resolve_metric(InvalidReferenceCountMetricImpl(
+                contract_impl=contract_impl,
+                column_impl=column_impl,
                 missing_and_validity=self.missing_and_validity
             ))
             self.queries.append(InvalidReferenceCountQuery(
-                metric=self.invalid_count_metric,
-                data_source=contract.data_source
+                metric_impl=self.invalid_count_metric_impl,
+                data_source=contract_impl.data_source
             ))
         else:
-            self.invalid_count_metric = self._resolve_metric(InvalidCountMetric(
-                contract=contract,
-                column=column,
-                check=self
+            self.invalid_count_metric_impl = self._resolve_metric(InvalidCountMetric(
+                contract_impl=contract_impl,
+                column_impl=column_impl,
+                check_impl=self
             ))
 
         if self.type == "invalid_percent":
             self.row_count_metric = self._resolve_metric(RowCountMetric(
-                contract=contract,
+                contract_impl=contract_impl,
             ))
 
-            self.invalid_percent_metric = self._resolve_metric(DerivedPercentageMetric(
+            self.invalid_percent_metric = self._resolve_metric(DerivedPercentageMetricImpl(
                 metric_type="invalid_percent",
-                fraction_metric=self.invalid_count_metric,
-                total_metric=self.row_count_metric
+                fraction_metric_impl=self.invalid_count_metric_impl,
+                total_metric_impl=self.row_count_metric
             ))
 
-    def evaluate(self, measurement_values: MeasurementValues, contract_info: ContractInfo) -> CheckResult:
+    def evaluate(self, measurement_values: MeasurementValues, contract_info: Contract) -> CheckResult:
         outcome: CheckOutcome = CheckOutcome.NOT_EVALUATED
 
-        invalid_count: int = measurement_values.get_value(self.invalid_count_metric)
+        invalid_count: int = measurement_values.get_value(self.invalid_count_metric_impl)
         diagnostic_lines = [
             f"Actual invalid_count was {invalid_count}"
         ]
@@ -123,23 +123,23 @@ def evaluate(self, measurement_values: MeasurementValues, contract_info: Contrac
         )
 
 
-class InvalidCountMetric(AggregationMetric):
+class InvalidCountMetric(AggregationMetricImpl):
 
     def __init__(
         self,
-        contract: Contract,
-        column: Column,
-        check: MissingAndValidityCheck,
+        contract_impl: ContractImpl,
+        column_impl: ColumnImpl,
+        check_impl: MissingAndValidityCheckImpl,
     ):
         super().__init__(
-            contract=contract,
-            column=column,
+            contract_impl=contract_impl,
+            column_impl=column_impl,
             metric_type="invalid_count",
         )
-        self.missing_and_validity: MissingAndValidity = check.missing_and_validity
+        self.missing_and_validity: MissingAndValidity = check_impl.missing_and_validity
 
     def sql_expression(self) -> SqlExpression:
-        column_name: str = self.column.column_yaml.name
+        column_name: str = self.column_impl.column_yaml.name
         return self.missing_and_validity.get_sum_invalid_count_expr(column_name)
 
     def convert_db_value(self, value) -> any:
@@ -149,18 +149,18 @@ def convert_db_value(self, value) -> any:
         return int(value)
 
 
-class InvalidReferenceCountMetric(Metric):
+class InvalidReferenceCountMetricImpl(MetricImpl):
 
     def __init__(
         self,
-        contract: Contract,
-        column: Column,
+        contract_impl: ContractImpl,
+        column_impl: ColumnImpl,
         missing_and_validity: MissingAndValidity
     ):
         super().__init__(
-            contract=contract,
+            contract_impl=contract_impl,
             metric_type="invalid_count",
-            column=column,
+            column_impl=column_impl,
         )
         self.missing_and_validity = missing_and_validity
 
@@ -169,27 +169,27 @@ class InvalidReferenceCountQuery(Query):
 
     def __init__(
         self,
-        metric: InvalidReferenceCountMetric,
+        metric_impl: InvalidReferenceCountMetricImpl,
         data_source: DataSource
     ):
         super().__init__(
             data_source=data_source,
-            metrics=[metric]
+            metrics=[metric_impl]
         )
 
-        valid_reference_data: ValidReferenceData = metric.missing_and_validity.valid_reference_data
+        valid_reference_data: ValidReferenceData = metric_impl.missing_and_validity.valid_reference_data
 
-        referencing_dataset_name: str = metric.contract.dataset_name
-        referencing_dataset_prefix: str | None = metric.contract.dataset_prefix
-        referencing_column_name: str = metric.column.column_yaml.name
+        referencing_dataset_name: str = metric_impl.contract_impl.dataset_name
+        referencing_dataset_prefix: str | None = metric_impl.contract_impl.dataset_prefix
+        referencing_column_name: str = metric_impl.column_impl.column_yaml.name
         # C stands for the 'C'ontract dataset
         referencing_alias: str = "C"
 
         referenced_dataset_name: str = valid_reference_data.dataset_name
         referenced_dataset_prefix: list[str] | None = (
             valid_reference_data.dataset_prefix
             if valid_reference_data.dataset_prefix is not None
-            else metric.contract.dataset_prefix
+            else metric_impl.contract_impl.dataset_prefix
         )
         referenced_column: str = valid_reference_data.column
         # R stands for the 'R'eference dataset
@@ -213,9 +213,9 @@ def __init__(
     def execute(self) -> list[Measurement]:
         query_result: QueryResult = self.data_source.execute_query(self.sql)
         metric_value = query_result.rows[0][0]
-        metric: Metric = self.metrics[0]
+        metric_impl: MetricImpl = self.metrics[0]
         return [Measurement(
-            metric_id=metric.id,
+            metric_id=metric_impl.id,
             value=metric_value,
-            metric_name=metric.type
+            metric_name=metric_impl.type
         )]