Skip to content

Commit

Permalink
Refactored diagnostics and cleaned up emoticon refs
Browse files Browse the repository at this point in the history
  • Loading branch information
tombaeyens committed Feb 20, 2025
1 parent 7b03654 commit 4e59bf5
Show file tree
Hide file tree
Showing 13 changed files with 185 additions and 141 deletions.
25 changes: 14 additions & 11 deletions soda-core/src/soda_core/cli/soda.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from textwrap import dedent
from typing import Optional

from soda_core.common.logs import Logs
from soda_core.common.logs import Logs, Emoticons
from soda_core.common.yaml import YamlFileContent, YamlSource
from soda_core.contracts.contract_verification import ContractVerification, ContractVerificationBuilder, \
ContractVerificationResult
Expand Down Expand Up @@ -72,10 +72,11 @@ def publish_contract(contract_file_paths: list[str] | None):
def create_data_source(data_source_file_path: str, data_source_type: str):
print(f"Creating {data_source_type} data source YAML file '{data_source_file_path}'")
if exists(data_source_file_path):
print(f"\U0001F92F I'm suppose to create data source file '{data_source_file_path}' but it already exists")
print(f"Could not create data source file '{data_source_file_path}'. "
f"File already exists {Emoticons.POLICE_CAR_LIGHT}.")
return
if data_source_type!= "postgres":
print(f"\U0001F92F Only type postgres is supported atm")
if data_source_type != "postgres":
print(f"{Emoticons.POLICE_CAR_LIGHT} Only type postgres is supported atm")
return
dir: str = dirname(data_source_file_path)
Path(dir).mkdir(parents=True, exist_ok=True)
Expand All @@ -94,7 +95,7 @@ def create_data_source(data_source_file_path: str, data_source_type: str):
single_digit_test_format: ^[0-9]$
"""
).strip())
print(f"\u2705 Created data source file '{data_source_file_path}'")
print(f"{Emoticons.WHITE_CHECK_MARK} Created data source file '{data_source_file_path}'")


# name has underscore otherwise pycharm thinks this is a unit test file
Expand All @@ -104,16 +105,18 @@ def _test_data_source(data_source_file_path: str):
data_source: DataSource = DataSource.from_file(data_source_file_path)
error_message: Optional[str] = data_source.test_connection_error_message()
if error_message:
print(f"\U0001F92F Error: Connection configured in data source file '{data_source_file_path}' failed: {error_message}")
print(f"Could not connect {Emoticons.POLICE_CAR_LIGHT} using data source '{data_source_file_path}': "
f"{error_message}")
exit(1)
else:
print(f"\u2705 Success! Tested data source connection in '{data_source_file_path}'")
print(f"Success! Connection in '{data_source_file_path}' tested ok. {Emoticons.WHITE_CHECK_MARK}")


def create_soda_cloud(soda_cloud_file_path: str):
print(f"Creating Soda Cloud YAML file '{soda_cloud_file_path}'")
if exists(soda_cloud_file_path):
print(f"\U0001F92F I'm suppose to create soda cloud file '{soda_cloud_file_path}' but it already exists")
print(f"Could not create soda cloud file '{soda_cloud_file_path}'. "
f"File already exists {Emoticons.POLICE_CAR_LIGHT}")
dir: str = dirname(soda_cloud_file_path)
Path(dir).mkdir(parents=True, exist_ok=True)
with open(soda_cloud_file_path, "w") as text_file:
Expand All @@ -125,7 +128,7 @@ def create_soda_cloud(soda_cloud_file_path: str):
api_key_secret: ${SODA_CLOUD_API_KEY_SECRET}
"""
).strip())
print(f"\u2705 Created Soda Cloud configuration file '{soda_cloud_file_path}'")
print(f"{Emoticons.WHITE_CHECK_MARK} Created Soda Cloud configuration file '{soda_cloud_file_path}'")


# name has underscore otherwise pycharm thinks this is a unit test file
Expand All @@ -139,10 +142,10 @@ def _test_soda_cloud(soda_cloud_file_path: str):
soda_cloud: SodaCloud = SodaCloud.from_file(soda_cloud_file_content)
error_msg = soda_cloud.test_connection()
if error_msg:
print(f"\U0001F92F Could not connect to Soda Cloud: {error_msg}")
print(f"{Emoticons.POLICE_CAR_LIGHT} Could not connect to Soda Cloud: {error_msg}")
exit(1)
else:
print(f"\u2705 Success! Tested Soda Cloud credentials in '{soda_cloud_file_path}'")
print(f"{Emoticons.WHITE_CHECK_MARK} Success! Tested Soda Cloud credentials in '{soda_cloud_file_path}'")


def main():
Expand Down
4 changes: 3 additions & 1 deletion soda-core/src/soda_core/common/logs.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@ class Emoticons:
OK_HAND: str = "\U0001F44C"
SCROLL: str = "\U0001F4DC"
FINGERS_CROSSED: str = "\U0001F91E"
POLICE_CAR_LIGHT: str = "\U0001F92F"
EXPLODING_HEAD: str = "\U0001F92F"
POLICE_CAR_LIGHT: str = "\U0001F6A8"
SEE_NO_EVIL: str = "\U0001F648"


class Location:
Expand Down
20 changes: 10 additions & 10 deletions soda-core/src/soda_core/common/soda_cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,15 +126,15 @@ def __init__(
self._organization_configuration = None

def send_contract_result(self, contract_result: ContractResult, skip_publish: bool):
contract_yaml_source_str = contract_result.contract_info.source.source_content_str
contract_yaml_source_str = contract_result.contract.source.source_content_str
self.logs.debug(f"Sending results to Soda Cloud {Emoticons.CLOUD}")
soda_cloud_file_path : str = f"{contract_result.contract_info.soda_qualified_dataset_name.lower()}.yml"
soda_cloud_file_path : str = f"{contract_result.contract.soda_qualified_dataset_name.lower()}.yml"
file_id: str | None = self._upload_contract(
yaml_str_source=contract_yaml_source_str,
soda_cloud_file_path=soda_cloud_file_path
)
if file_id:
contract_result.contract_info.source.soda_cloud_file_id = file_id
contract_result.contract.source.soda_cloud_file_id = file_id
contract_result = self._build_contract_result_json(
contract_result=contract_result, skip_publish=skip_publish
)
Expand Down Expand Up @@ -173,8 +173,8 @@ def _build_contract_result_json(self, contract_result: ContractResult, skip_publ

return self.to_jsonnable( # type: ignore
{
"definitionName": contract_result.soda_qualified_dataset_name,
"defaultDataSource": contract_result.data_source_name,
"definitionName": contract_result.contract.soda_qualified_dataset_name,
"defaultDataSource": contract_result.data_source_info.name,
"defaultDataSourceProperties": {
"type": contract_result.data_source_info.type
},
Expand All @@ -201,16 +201,16 @@ def _build_contract_result_json(self, contract_result: ContractResult, skip_publ
"logs": log_cloud_json_dicts,
"sourceOwner": "soda-core",
"contract": {
"fileId": contract_result.contract_info.source.soda_cloud_file_id,
"fileId": contract_result.contract.source.soda_cloud_file_id,
"dataset": {
"datasource": contract_result.contract_info.data_source_name,
"prefixes": contract_result.contract_info.dataset_prefix,
"name": contract_result.contract_info.dataset_name
"datasource": contract_result.contract.data_source_name,
"prefixes": contract_result.contract.dataset_prefix,
"name": contract_result.contract.dataset_name
},
"metadata": {
"source": {
"type": "local",
"filePath": contract_result.contract_info.source.local_file_path
"filePath": contract_result.contract.source.local_file_path
}
}
},
Expand Down
77 changes: 43 additions & 34 deletions soda-core/src/soda_core/contracts/contract_verification.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
from __future__ import annotations

from abc import ABC
from abc import ABC, abstractmethod
from dataclasses import dataclass
from datetime import datetime
from enum import Enum
from logging import ERROR
from numbers import Number
from typing import Optional

from soda_core.common.logs import Logs
from soda_core.common.logs import Logs, Emoticons
from soda_core.common.yaml import YamlSource


Expand All @@ -31,7 +31,7 @@ def with_contract_yaml_file(self, contract_yaml_file_path: str) -> ContractVerif
self.contract_yaml_sources.append(YamlSource.from_file_path(yaml_file_path=contract_yaml_file_path))
else:
self.logs.error(
f"\U0001F92F ...ignoring invalid contract yaml file '{contract_yaml_file_path}'. "
f"{Emoticons.POLICE_CAR_LIGHT} ...ignoring invalid contract yaml file '{contract_yaml_file_path}'. "
f"Expected string, but was {contract_yaml_file_path.__class__.__name__}."
)
return self
Expand All @@ -42,7 +42,7 @@ def with_contract_yaml_str(self, contract_yaml_str: str) -> ContractVerification
self.contract_yaml_sources.append(YamlSource.from_str(yaml_str=contract_yaml_str))
else:
self.logs.error(
f"\U0001F92F ...ignoring invalid contract_yaml_str '{contract_yaml_str}'. "
f"{Emoticons.POLICE_CAR_LIGHT} ...ignoring invalid contract_yaml_str '{contract_yaml_str}'. "
f"Expected string, but was {contract_yaml_str.__class__.__name__}"
)
return self
Expand All @@ -53,13 +53,13 @@ def with_data_source_yaml_file(self, data_source_yaml_file_path: str) -> Contrac
self.logs.debug(f" ...with data_source_yaml_file_path '{data_source_yaml_file_path}'")
else:
self.logs.debug(
f"\U0001F92F ...with data_source_yaml_file_path '{data_source_yaml_file_path}'. "
f"{Emoticons.POLICE_CAR_LIGHT} ...with data_source_yaml_file_path '{data_source_yaml_file_path}'. "
f"Ignoring previously configured data source '{self.data_source_yaml_source}'"
)
self.data_source_yaml_source = YamlSource.from_file_path(yaml_file_path=data_source_yaml_file_path)
else:
self.logs.error(
f"\U0001F92F ...ignoring invalid data_source_yaml_file_path '{data_source_yaml_file_path}'. "
f"{Emoticons.POLICE_CAR_LIGHT} ...ignoring invalid data_source_yaml_file_path '{data_source_yaml_file_path}'. "
f"Expected string, but was {data_source_yaml_file_path.__class__.__name__}"
)
return self
Expand All @@ -70,13 +70,13 @@ def with_data_source_yaml_str(self, data_source_yaml_str: str) -> ContractVerifi
self.logs.debug(f" ...with data_source_yaml_str '{data_source_yaml_str}'")
else:
self.logs.debug(
f"\U0001F92F ...with data_source_yaml_str '{data_source_yaml_str}'. "
f"{Emoticons.POLICE_CAR_LIGHT} ...with data_source_yaml_str '{data_source_yaml_str}'. "
f"Ignoring previously configured data source '{self.data_source_yaml_source}'"
)
self.data_source_yaml_source = YamlSource.from_str(yaml_str=data_source_yaml_str)
else:
self.logs.error(
f"\U0001F92F ...ignoring invalid data_source_yaml_str '{data_source_yaml_str}'. "
f"{Emoticons.POLICE_CAR_LIGHT} ...ignoring invalid data_source_yaml_str '{data_source_yaml_str}'. "
f"Expected string, but was {data_source_yaml_str.__class__.__name__}"
)
return self
Expand All @@ -92,13 +92,13 @@ def with_soda_cloud_yaml_file(self, soda_cloud_yaml_file_path: str) -> ContractV
self.logs.debug(f" ...with soda_cloud_yaml_file_path '{soda_cloud_yaml_file_path}'")
else:
self.logs.debug(
f"\U0001F92F ...with soda_cloud_yaml_file_path '{soda_cloud_yaml_file_path}'. "
f"{Emoticons.POLICE_CAR_LIGHT} ...with soda_cloud_yaml_file_path '{soda_cloud_yaml_file_path}'. "
f"Ignoring previously configured soda cloud '{self.soda_cloud_yaml_source}'"
)
self.soda_cloud_yaml_source = YamlSource.from_file_path(yaml_file_path=soda_cloud_yaml_file_path)
else:
self.logs.error(
f"\U0001F92F ...ignoring invalid soda_cloud_yaml_file_path '{soda_cloud_yaml_file_path}'. "
f"{Emoticons.POLICE_CAR_LIGHT} ...ignoring invalid soda_cloud_yaml_file_path '{soda_cloud_yaml_file_path}'. "
f"Expected string, but was {soda_cloud_yaml_file_path.__class__.__name__}"
)
return self
Expand All @@ -109,13 +109,13 @@ def with_soda_cloud_yaml_str(self, soda_cloud_yaml_str: str) -> ContractVerifica
self.logs.debug(f" ...with soda_cloud_yaml_str '{soda_cloud_yaml_str}'")
else:
self.logs.debug(
f"\U0001F92F ...with soda_cloud_yaml_str '{soda_cloud_yaml_str}'. "
f"{Emoticons.POLICE_CAR_LIGHT} ...with soda_cloud_yaml_str '{soda_cloud_yaml_str}'. "
f"Ignoring previously configured soda cloud '{self.soda_cloud_yaml_source}'"
)
self.soda_cloud_yaml_source = YamlSource.from_str(yaml_str=soda_cloud_yaml_str)
else:
self.logs.error(
f"\U0001F92F ...ignoring invalid soda_cloud_yaml_str '{soda_cloud_yaml_str}'. "
f"{Emoticons.POLICE_CAR_LIGHT} ...ignoring invalid soda_cloud_yaml_str '{soda_cloud_yaml_str}'. "
f"Expected string, but was {soda_cloud_yaml_str.__class__.__name__}"
)
return self
Expand All @@ -131,7 +131,7 @@ def with_variable(self, key: str, value: str) -> ContractVerificationBuilder:
self.variables[key] = value
else:
self.logs.error(
f"\U0001F92F ...ignoring invalid variable '{key}'. "
f"{Emoticons.POLICE_CAR_LIGHT} ...ignoring invalid variable '{key}'. "
f"Expected key str and value string"
)
return self
Expand All @@ -146,7 +146,7 @@ def with_variables(self, variables: dict[str, str]) -> ContractVerificationBuild
self.variables = None
else:
self.logs.error(
f"\U0001F92F ...ignoring invalid variables '{variables}'. "
f"{Emoticons.POLICE_CAR_LIGHT} ...ignoring invalid variables '{variables}'. "
f"Expected dict, but was {variables.__class__.__name__}"
)
return self
Expand Down Expand Up @@ -300,23 +300,23 @@ def __init__(
check: Check,
metric_value: Number | None,
outcome: CheckOutcome,
diagnostic_lines: list[str]
diagnostics: list[Diagnostic]
):
self.contract: Contract = contract
self.check: Check = check
self.metric_value: Number | None = metric_value
self.outcome: CheckOutcome = outcome
self.diagnostic_lines: list[str] = diagnostic_lines
self.diagnostics: list[Diagnostic] = diagnostics

def log_summary(self, logs: Logs):
def log_summary(self, logs: Logs) -> None:
outcome_emoticon: str = (
"\u2705" if self.outcome == CheckOutcome.PASSED
else "\U0001F6A8" if self.outcome == CheckOutcome.FAILED
else "\U0001F6A9"
Emoticons.WHITE_CHECK_MARK if self.outcome == CheckOutcome.PASSED
else Emoticons.POLICE_CAR_LIGHT if self.outcome == CheckOutcome.FAILED
else Emoticons.SEE_NO_EVIL
)
logs.info(f"{outcome_emoticon} Check {self.outcome.name} {self.check.name}")
for diagnostic_line in self.diagnostic_lines:
logs.info(diagnostic_line)
for diagnostic in self.diagnostics:
logs.info(f" {diagnostic.log_line()}")


class Measurement:
Expand All @@ -327,6 +327,25 @@ def __init__(self, metric_id: str, value: any, metric_name: str | None):
self.value: any = value


@dataclass
class Diagnostic:

name: str

@abstractmethod
def log_line(self) -> str:
pass


@dataclass
class NumericDiagnostic(Diagnostic):

value: float

def log_line(self) -> str:
return f"Actual {self.name} was {self.value}"


class ContractResult:
"""
This is the immutable data structure containing all the results from a single contract verification.
Expand All @@ -335,26 +354,16 @@ class ContractResult:

def __init__(
self,
contract_info: Contract,
contract: Contract,
data_source_info: DataSourceInfo,
data_timestamp: datetime | None,
started_timestamp: datetime,
ended_timestamp: datetime,
data_source_name: str,
soda_qualified_dataset_name: str,
sql_qualified_dataset_name: str,
measurements: list[Measurement],
check_results: list[CheckResult],
logs: Logs
):
self.contract_info: Contract = contract_info
# TODO move to contract info or use the data_source_info
self.data_source_name: str = data_source_name
# TODO move to contract info
self.soda_qualified_dataset_name: str = soda_qualified_dataset_name
# TODO move to contract info
self.sql_qualified_dataset_name: str = sql_qualified_dataset_name

self.contract: Contract = contract
self.data_source_info: DataSourceInfo = data_source_info

self.data_timestamp: datetime | None = data_timestamp
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
from soda_core.common.data_source import DataSource
from soda_core.common.data_source_results import QueryResult
from soda_core.common.sql_dialect import *
from soda_core.contracts.contract_verification import CheckResult, CheckOutcome, Measurement, Check, Contract
from soda_core.contracts.contract_verification import CheckResult, CheckOutcome, Measurement, Check, Contract, \
Diagnostic, NumericDiagnostic
from soda_core.contracts.impl.check_types.invalidity_check_yaml import InvalidCheckYaml
from soda_core.contracts.impl.check_types.missing_check_yaml import MissingCheckYaml
from soda_core.contracts.impl.check_types.row_count_check import RowCountMetric
Expand Down Expand Up @@ -93,20 +94,20 @@ def evaluate(self, measurement_values: MeasurementValues, contract_info: Contrac
outcome: CheckOutcome = CheckOutcome.NOT_EVALUATED

invalid_count: int = measurement_values.get_value(self.invalid_count_metric_impl)
diagnostic_lines = [
f" Actual invalid_count was {invalid_count}"
diagnostics: list[Diagnostic] = [
NumericDiagnostic(name="invalid_count", value=invalid_count)
]

threshold_value: Number | None = None
if self.type == "invalid_count":
threshold_value = invalid_count
else:
row_count: int = measurement_values.get_value(self.row_count_metric)
diagnostic_lines.append(f" Actual row_count was {row_count}")
diagnostics.append(NumericDiagnostic(name="row_count", value=row_count))
if row_count > 0:
missing_percent: float = measurement_values.get_value(self.invalid_percent_metric)
diagnostic_lines.append(f" Actual invalid_percent was {missing_percent}")
threshold_value = missing_percent
invalid_percent: float = measurement_values.get_value(self.invalid_percent_metric)
diagnostics.append(NumericDiagnostic(name="invalid_percent", value=invalid_percent))
threshold_value = invalid_percent

if self.threshold and isinstance(threshold_value, Number):
if self.threshold.passes(threshold_value):
Expand All @@ -119,7 +120,7 @@ def evaluate(self, measurement_values: MeasurementValues, contract_info: Contrac
check=self._build_check_info(),
metric_value=threshold_value,
outcome=outcome,
diagnostic_lines=diagnostic_lines,
diagnostics=diagnostics
)


Expand Down
Loading

0 comments on commit 4e59bf5

Please sign in to comment.