Skip to content

Commit

Permalink
Renaming implementation classes to -Impl and info classes to actual name
Browse files Browse the repository at this point in the history
  • Loading branch information
tombaeyens committed Feb 15, 2025
1 parent ac0740b commit 4a92875
Show file tree
Hide file tree
Showing 12 changed files with 353 additions and 398 deletions.
12 changes: 7 additions & 5 deletions soda-core/src/soda_core/cli/soda.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def configure_logging():
force=True, # Override any previously set handlers.
# https://docs.python.org/3/library/logging.html#logrecord-attributes
# %(name)s
format="%(asctime)s [%(levelname)s] %(message)s",
format="%(message)s",
handlers=[logging.StreamHandler(sys.stdout)],
)

Expand Down Expand Up @@ -115,7 +115,8 @@ def create_data_source(data_source_file_path: str, data_source_type: str):
print(f"\u2705 Created data source file '{data_source_file_path}'")


def test_data_source(data_source_file_path: str):
# name has underscore otherwise pycharm thinks this is a unit test file
def _test_data_source(data_source_file_path: str):
print(f"Testing data source configuration file {data_source_file_path}")
from soda_core.common.data_source import DataSource
data_source: DataSource = DataSource.from_file(data_source_file_path)
Expand Down Expand Up @@ -144,7 +145,8 @@ def create_soda_cloud(soda_cloud_file_path: str):
print(f"\u2705 Created Soda Cloud configuration file '{soda_cloud_file_path}'")


def test_soda_cloud(soda_cloud_file_path: str):
# name has underscore otherwise pycharm thinks this is a unit test file
def _test_soda_cloud(soda_cloud_file_path: str):
from soda_core.common.soda_cloud import SodaCloud
print(f"Testing soda cloud file {soda_cloud_file_path}")
soda_cloud_yaml_source: YamlSource = YamlSource.from_file_path(soda_cloud_file_path)
Expand Down Expand Up @@ -263,11 +265,11 @@ def main():
elif args.command == "create-data-source":
create_data_source(args.file, args.type)
elif args.command == "test-data-source":
test_data_source(args.data_source)
_test_data_source(args.data_source)
elif args.command == "create-soda-cloud":
create_soda_cloud(args.file)
elif args.command == "test-soda-cloud":
test_soda_cloud(args.soda_cloud)
_test_soda_cloud(args.soda_cloud)
else:
cli_parser.print_help()
except Exception as e:
Expand Down
4 changes: 2 additions & 2 deletions soda-core/src/soda_core/common/soda_cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from soda_core.common.version import SODA_CORE_VERSION
from soda_core.common.yaml import YamlFileContent, YamlObject
from soda_core.contracts.contract_verification import ContractResult, \
CheckResult, CheckOutcome, ThresholdInfo
CheckResult, CheckOutcome, Threshold


class SodaCloud:
Expand Down Expand Up @@ -229,7 +229,7 @@ def build_check_result_cloud_dict(self, check_result: CheckResult) -> dict:
"source": "soda-contract"
}
if check_result.metric_value is not None and check_result.check.threshold is not None:
t: ThresholdInfo = check_result.check.threshold
t: Threshold = check_result.check.threshold
fail_threshold: dict = {}
if t.must_be_less_than_or_equal is not None:
fail_threshold["greaterThan"] = t.must_be_less_than_or_equal
Expand Down
23 changes: 12 additions & 11 deletions soda-core/src/soda_core/contracts/contract_verification.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ class YamlFileContentInfo:


@dataclass
class ContractInfo:
class Contract:
data_source_name: str
dataset_prefix: list[str]
dataset_name: str
Expand All @@ -206,20 +206,21 @@ class ContractInfo:

@dataclass
class DataSourceInfo:
# TODO rename to DataSource (but first rename DataSource to DataSourceImpl?)
name: str
type: str


@dataclass
class ThresholdInfo:
class Threshold:
must_be_greater_than: Number | None = None
must_be_greater_than_or_equal: Number | None = None
must_be_less_than: Number | None = None
must_be_less_than_or_equal: Number | None = None


@dataclass
class CheckInfo:
class Check:
column_name: str | None
type: str
name: str # Short description used in UI. Required. Between 1 and 4000 chars. User defined with key 'name' or auto-generated.
Expand All @@ -228,21 +229,21 @@ class CheckInfo:
column_name: str | None
contract_file_line: int
contract_file_column: int
threshold: ThresholdInfo | None
threshold: Threshold | None


class CheckResult(ABC):

def __init__(
self,
contract: ContractInfo,
check: CheckInfo,
contract: Contract,
check: Check,
metric_value: Number | None,
outcome: CheckOutcome,
diagnostic_lines: list[str]
):
self.contract: ContractInfo = contract
self.check: CheckInfo = check
self.contract: Contract = contract
self.check: Check = check
self.metric_value: Number | None = metric_value
self.outcome: CheckOutcome = outcome
self.diagnostic_lines: list[str] = diagnostic_lines
Expand Down Expand Up @@ -278,7 +279,7 @@ class ContractResult:

def __init__(
self,
contract_info: ContractInfo,
contract_info: Contract,
data_source_info: DataSourceInfo,
data_timestamp: datetime | None,
started_timestamp: datetime,
Expand All @@ -290,7 +291,7 @@ def __init__(
check_results: list[CheckResult],
logs: Logs
):
self.contract_info: ContractInfo = contract_info
self.contract_info: Contract = contract_info
# TODO move to contract info or use the data_source_info
self.data_source_name: str = data_source_name
# TODO move to contract info
Expand All @@ -312,7 +313,7 @@ def failed(self) -> bool:
Returns true if there are checks that have failed.
Ignores execution errors in the logs.
"""
return any(check.outcome == CheckOutcome.FAILED for check in self.check_results)
return any(check_result.outcome == CheckOutcome.FAILED for check_result in self.check_results)

def passed(self) -> bool:
"""
Expand Down
112 changes: 56 additions & 56 deletions soda-core/src/soda_core/contracts/impl/check_types/invalidity_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@
from soda_core.common.data_source import DataSource
from soda_core.common.data_source_results import QueryResult
from soda_core.common.sql_dialect import *
from soda_core.contracts.contract_verification import CheckResult, CheckOutcome, Measurement, CheckInfo, ContractInfo
from soda_core.contracts.contract_verification import CheckResult, CheckOutcome, Measurement, Check, Contract
from soda_core.contracts.impl.check_types.invalidity_check_yaml import InvalidCheckYaml
from soda_core.contracts.impl.check_types.missing_check_yaml import MissingCheckYaml
from soda_core.contracts.impl.check_types.row_count_check import RowCountMetric
from soda_core.contracts.impl.contract_verification_impl import MetricsResolver, Check, AggregationMetric, Threshold, \
ThresholdType, DerivedPercentageMetric, CheckParser, Contract, Column, MissingAndValidity, MissingAndValidityCheck, \
Metric, Query, ValidReferenceData, MeasurementValues
from soda_core.contracts.impl.contract_verification_impl import MetricsResolver, CheckImpl, AggregationMetricImpl, ThresholdImpl, \
ThresholdType, DerivedPercentageMetricImpl, CheckParser, ContractImpl, ColumnImpl, MissingAndValidity, MissingAndValidityCheckImpl, \
MetricImpl, Query, ValidReferenceData, MeasurementValues
from soda_core.contracts.impl.contract_yaml import ColumnYaml, CheckYaml


Expand All @@ -20,79 +20,79 @@ def get_check_type_names(self) -> list[str]:

def parse_check(
self,
contract: Contract,
column: Column | None,
contract_impl: ContractImpl,
column_impl: ColumnImpl | None,
check_yaml: MissingCheckYaml,
metrics_resolver: MetricsResolver,
) -> Check | None:
) -> CheckImpl | None:
return InvalidCheck(
contract=contract,
column=column,
contract_impl=contract_impl,
column_impl=column_impl,
check_yaml=check_yaml,
metrics_resolver=metrics_resolver,
)


class InvalidCheck(MissingAndValidityCheck):
class InvalidCheck(MissingAndValidityCheckImpl):

def __init__(
self,
contract: Contract,
column: Column,
contract_impl: ContractImpl,
column_impl: ColumnImpl,
check_yaml: InvalidCheckYaml,
metrics_resolver: MetricsResolver,
):
super().__init__(
contract=contract,
column=column,
contract_impl=contract_impl,
column_impl=column_impl,
check_yaml=check_yaml,
)
self.threshold = Threshold.create(
self.threshold = ThresholdImpl.create(
check_yaml=check_yaml,
default_threshold=Threshold(type=ThresholdType.SINGLE_COMPARATOR,must_be=0)
default_threshold=ThresholdImpl(type=ThresholdType.SINGLE_COMPARATOR, must_be=0)
)

# TODO create better support in class hierarchy for common vs specific stuff. name is common. see other check type impls
metric_name: str = Threshold.get_metric_name(check_yaml.type, column=column)
metric_name: str = ThresholdImpl.get_metric_name(check_yaml.type, column_impl=column_impl)
self.name = check_yaml.name if check_yaml.name else (
self.threshold.get_assertion_summary(metric_name=metric_name) if self.threshold
else f"{check_yaml.type} (invalid threshold)"
)

self.invalid_count_metric: Metric | None = None
self.invalid_count_metric_impl: MetricImpl | None = None
if self.missing_and_validity.has_reference_data():
# noinspection PyTypeChecker
self.invalid_count_metric = self._resolve_metric(InvalidReferenceCountMetric(
contract=contract,
column=column,
self.invalid_count_metric_impl = self._resolve_metric(InvalidReferenceCountMetricImpl(
contract_impl=contract_impl,
column_impl=column_impl,
missing_and_validity=self.missing_and_validity
))
self.queries.append(InvalidReferenceCountQuery(
metric=self.invalid_count_metric,
data_source=contract.data_source
metric_impl=self.invalid_count_metric_impl,
data_source=contract_impl.data_source
))
else:
self.invalid_count_metric = self._resolve_metric(InvalidCountMetric(
contract=contract,
column=column,
check=self
self.invalid_count_metric_impl = self._resolve_metric(InvalidCountMetric(
contract_impl=contract_impl,
column_impl=column_impl,
check_impl=self
))

if self.type == "invalid_percent":
self.row_count_metric = self._resolve_metric(RowCountMetric(
contract=contract,
contract_impl=contract_impl,
))

self.invalid_percent_metric = self._resolve_metric(DerivedPercentageMetric(
self.invalid_percent_metric = self._resolve_metric(DerivedPercentageMetricImpl(
metric_type="invalid_percent",
fraction_metric=self.invalid_count_metric,
total_metric=self.row_count_metric
fraction_metric_impl=self.invalid_count_metric_impl,
total_metric_impl=self.row_count_metric
))

def evaluate(self, measurement_values: MeasurementValues, contract_info: ContractInfo) -> CheckResult:
def evaluate(self, measurement_values: MeasurementValues, contract_info: Contract) -> CheckResult:
outcome: CheckOutcome = CheckOutcome.NOT_EVALUATED

invalid_count: int = measurement_values.get_value(self.invalid_count_metric)
invalid_count: int = measurement_values.get_value(self.invalid_count_metric_impl)
diagnostic_lines = [
f"Actual invalid_count was {invalid_count}"
]
Expand Down Expand Up @@ -123,23 +123,23 @@ def evaluate(self, measurement_values: MeasurementValues, contract_info: Contrac
)


class InvalidCountMetric(AggregationMetric):
class InvalidCountMetric(AggregationMetricImpl):

def __init__(
self,
contract: Contract,
column: Column,
check: MissingAndValidityCheck,
contract_impl: ContractImpl,
column_impl: ColumnImpl,
check_impl: MissingAndValidityCheckImpl,
):
super().__init__(
contract=contract,
column=column,
contract_impl=contract_impl,
column_impl=column_impl,
metric_type="invalid_count",
)
self.missing_and_validity: MissingAndValidity = check.missing_and_validity
self.missing_and_validity: MissingAndValidity = check_impl.missing_and_validity

def sql_expression(self) -> SqlExpression:
column_name: str = self.column.column_yaml.name
column_name: str = self.column_impl.column_yaml.name
return self.missing_and_validity.get_sum_invalid_count_expr(column_name)

def convert_db_value(self, value) -> any:
Expand All @@ -149,18 +149,18 @@ def convert_db_value(self, value) -> any:
return int(value)


class InvalidReferenceCountMetric(Metric):
class InvalidReferenceCountMetricImpl(MetricImpl):

def __init__(
self,
contract: Contract,
column: Column,
contract_impl: ContractImpl,
column_impl: ColumnImpl,
missing_and_validity: MissingAndValidity
):
super().__init__(
contract=contract,
contract_impl=contract_impl,
metric_type="invalid_count",
column=column,
column_impl=column_impl,
)
self.missing_and_validity = missing_and_validity

Expand All @@ -169,27 +169,27 @@ class InvalidReferenceCountQuery(Query):

def __init__(
self,
metric: InvalidReferenceCountMetric,
metric_impl: InvalidReferenceCountMetricImpl,
data_source: DataSource
):
super().__init__(
data_source=data_source,
metrics=[metric]
metrics=[metric_impl]
)

valid_reference_data: ValidReferenceData = metric.missing_and_validity.valid_reference_data
valid_reference_data: ValidReferenceData = metric_impl.missing_and_validity.valid_reference_data

referencing_dataset_name: str = metric.contract.dataset_name
referencing_dataset_prefix: str | None = metric.contract.dataset_prefix
referencing_column_name: str = metric.column.column_yaml.name
referencing_dataset_name: str = metric_impl.contract_impl.dataset_name
referencing_dataset_prefix: str | None = metric_impl.contract_impl.dataset_prefix
referencing_column_name: str = metric_impl.column_impl.column_yaml.name
# C stands for the 'C'ontract dataset
referencing_alias: str = "C"

referenced_dataset_name: str = valid_reference_data.dataset_name
referenced_dataset_prefix: list[str] | None = (
valid_reference_data.dataset_prefix
if valid_reference_data.dataset_prefix is not None
else metric.contract.dataset_prefix
else metric_impl.contract_impl.dataset_prefix
)
referenced_column: str = valid_reference_data.column
# R stands for the 'R'eference dataset
Expand All @@ -213,9 +213,9 @@ def __init__(
def execute(self) -> list[Measurement]:
query_result: QueryResult = self.data_source.execute_query(self.sql)
metric_value = query_result.rows[0][0]
metric: Metric = self.metrics[0]
metric_impl: MetricImpl = self.metrics[0]
return [Measurement(
metric_id=metric.id,
metric_id=metric_impl.id,
value=metric_value,
metric_name=metric.type
metric_name=metric_impl.type
)]
Loading

0 comments on commit 4a92875

Please sign in to comment.