Skip to content

Commit

Permalink
refactor(configuration): update configuration structure
Browse files Browse the repository at this point in the history
  • Loading branch information
datageek00 committed Aug 17, 2023
1 parent ea3bb34 commit e3fb34d
Show file tree
Hide file tree
Showing 22 changed files with 620 additions and 430 deletions.
6 changes: 0 additions & 6 deletions datachecks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,3 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from datachecks.core.configuration.configuration import (
Configuration,
load_configuration,
)
from datachecks.core.inspect import Inspect
23 changes: 1 addition & 22 deletions datachecks/cli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,8 @@
import click
from loguru import logger

from datachecks import Inspect
from datachecks.__version__ import __version__
from datachecks.core.configuration.configuration import (
Configuration,
load_configuration,
)
from datachecks.core import Configuration, Inspect, load_configuration


@click.version_option(package_name="datachecks", prog_name="datachecks")
Expand All @@ -40,36 +36,19 @@ def main():
default=None,
help="Specify the file path for configuration",
)
@click.option(
"-A",
"--application-name",
required=False,
default="datachecks",
help="Specify the application name for logging",
)
@click.option(
"--time-format",
required=False,
default=None,
help="Specify the time format for logging",
)
@click.option(
"--auto-profile",
is_flag=True,
help="Specify if the inspection should do auto-profile of all data sources",
)
def inspect(
config_path: Union[str, None] = None,
application_name: str = "datachecks",
time_format: str = None,
auto_profile: bool = False,
):
"""
Starts the datachecks inspection
"""
configuration: Configuration = load_configuration(config_path)
if time_format is not None:
configuration.metric_logger.config["time_format"] = time_format

inspector = Inspect(configuration=configuration, auto_profile=auto_profile)

Expand Down
6 changes: 6 additions & 0 deletions datachecks/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,9 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from datachecks.core.configuration.configuration_parser import (
Configuration,
load_configuration,
)
from datachecks.core.inspect import Inspect
89 changes: 89 additions & 0 deletions datachecks/core/common/models/configuration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
# Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from dataclasses import dataclass
from enum import Enum
from typing import Dict, List, Optional, Union

from datachecks.core.common.models.data_source_resource import Field, Index, Table
from datachecks.core.common.models.metric import MetricsType


class DatasourceType(str, Enum):
OPENSEARCH = "opensearch"
ELASTICSEARCH = "elasticsearch"
POSTGRES = "postgres"
MYSQL = "mysql"
MSSQL = "mssql"
BIGQUERY = "bigquery"
REDSHIFT = "redshift"
SNOWFLAKE = "snowflake"
DATABRICKS = "databricks"
MONGODB = "mongodb"


@dataclass
class DataSourceConnectionConfiguration:
"""
Connection configuration for a data source
"""

host: str
port: int
database: Optional[str]
username: Optional[str] = None
password: Optional[str] = None
schema: Optional[str] = "public"


@dataclass
class DataSourceConfiguration:
"""
Data source configuration
"""

name: str
type: DatasourceType
connection_config: DataSourceConnectionConfiguration


@dataclass
class MetricsFilterConfiguration:
"""
Filter configuration for a metric
"""

where: Optional[str] = None


@dataclass
class MetricConfiguration:
"""
Metric configuration
"""

name: str
metric_type: MetricsType
resource: Union[Table, Index, Field]
filters: Optional[MetricsFilterConfiguration] = None


@dataclass
class Configuration:
"""
Configuration for the data checks
"""

data_sources: Dict[str, DataSourceConfiguration]
metrics: Dict[str, MetricConfiguration]
46 changes: 46 additions & 0 deletions datachecks/core/common/models/data_source_resource.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from dataclasses import dataclass
from typing import Optional, Union


@dataclass
class Table:
"""
Database Table resource
"""

data_source: str
name: str


@dataclass
class Index:
"""
Search Index resource
"""

data_source: str
name: str


@dataclass
class Field:
"""
Search Field resource
"""

belongs_to: Union[Table, Index]
name: str
Loading

0 comments on commit e3fb34d

Please sign in to comment.