Skip to content

AD fixes: preprocessing steps, raise e remove #1135

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Apr 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 8 additions & 6 deletions ads/opctl/operator/lowcode/anomaly/model/anomaly_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,18 @@
# Copyright (c) 2023 Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/

from ..operator_config import AnomalyOperatorSpec
import pandas as pd

from ads.opctl import logger
from ads.opctl.operator.lowcode.anomaly.const import OutputColumns
from ads.opctl.operator.lowcode.anomaly.utils import get_frequency_of_datetime
from ads.opctl.operator.lowcode.common.data import AbstractData
from ads.opctl.operator.lowcode.common.utils import (
default_signer,
merge_category_columns,
)
from ads.opctl.operator.lowcode.common.data import AbstractData
from ads.opctl.operator.lowcode.anomaly.utils import get_frequency_of_datetime
from ads.opctl import logger
import pandas as pd
from ads.opctl.operator.lowcode.anomaly.const import OutputColumns

from ..operator_config import AnomalyOperatorSpec


class AnomalyData(AbstractData):
Expand Down
2 changes: 1 addition & 1 deletion ads/opctl/operator/lowcode/anomaly/model/base_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ def __init__(self, config: AnomalyOperatorConfig, datasets: AnomalyDatasets):
self.config: AnomalyOperatorConfig = config
self.spec: AnomalyOperatorSpec = config.spec
self.datasets = datasets

if self.spec.validation_data is not None:
self.X_valid_dict = self.datasets.valid_data.X_valid_dict
self.y_valid_dict = self.datasets.valid_data.y_valid_dict
Expand All @@ -74,7 +75,6 @@ def generate_report(self):
logger.warning(f"Found exception: {e}")
if self.spec.datetime_column:
anomaly_output = self._fallback_build_model()
raise e

elapsed_time = time.time() - start_time

Expand Down
8 changes: 5 additions & 3 deletions ads/opctl/operator/lowcode/anomaly/operator_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,16 @@
from typing import Dict, List

from ads.common.serializer import DataClassSerializable
from ads.opctl.operator.common.utils import _load_yaml_from_uri
from ads.opctl.operator.common.operator_config import (
InputData,
OperatorConfig,
OutputDirectory,
InputData,
)
from .const import SupportedModels
from ads.opctl.operator.common.utils import _load_yaml_from_uri
from ads.opctl.operator.lowcode.common.utils import find_output_dirname

from .const import SupportedModels


@dataclass(repr=True)
class ValidationData(InputData):
Expand Down Expand Up @@ -61,6 +62,7 @@ class AnomalyOperatorSpec(DataClassSerializable):
test_data: TestData = field(default_factory=TestData)
validation_data: ValidationData = field(default_factory=ValidationData)
output_directory: OutputDirectory = field(default_factory=OutputDirectory)
preprocessing: DataPreprocessor = field(default_factory=DataPreprocessor)
report_file_name: str = None
report_title: str = None
report_theme: str = None
Expand Down
2 changes: 2 additions & 0 deletions ads/opctl/operator/lowcode/common/transformations.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,8 @@ def run(self, data):
logger.info("Skipping outlier treatment because it is disabled")
elif self.name == "additional_data":
clean_df = self._missing_value_imputation_add(clean_df)
elif self.name == "input_data" and self.preprocessing.steps.missing_value_imputation:
clean_df = self._fill_na(clean_df)
else:
logger.info(
"Skipping all preprocessing steps because preprocessing is disabled"
Expand Down