oracle
diff --git a/‎.github/workflows/run-operators-unit-tests.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/run-operators-unit-tests.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎CODEOWNERS‎
Lines changed: 1 addition & 0 deletions b/‎CODEOWNERS‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎ads/common/serializer.py‎
Lines changed: 1 addition & 1 deletion b/‎ads/common/serializer.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎ads/dataset/label_encoder.py‎
Lines changed: 1 addition & 1 deletion b/‎ads/dataset/label_encoder.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎ads/model/model_artifact_boilerplate/artifact_introspection_test/model_artifact_validate.py‎
Lines changed: 1 addition & 1 deletion b/‎ads/model/model_artifact_boilerplate/artifact_introspection_test/model_artifact_validate.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎ads/opctl/operator/lowcode/anomaly/model/anomaly_dataset.py‎
Lines changed: 8 additions & 15 deletions b/‎ads/opctl/operator/lowcode/anomaly/model/anomaly_dataset.py‎
Lines changed: 8 additions & 15 deletions
diff --git a/‎ads/opctl/operator/lowcode/anomaly/model/automlx.py‎
Lines changed: 2 additions & 1 deletion b/‎ads/opctl/operator/lowcode/anomaly/model/automlx.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎ads/opctl/operator/lowcode/anomaly/model/base_model.py‎
Lines changed: 2 additions & 2 deletions b/‎ads/opctl/operator/lowcode/anomaly/model/base_model.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎ads/opctl/operator/lowcode/anomaly/operator_config.py‎
Lines changed: 18 additions & 1 deletion b/‎ads/opctl/operator/lowcode/anomaly/operator_config.py‎
Lines changed: 18 additions & 1 deletion
diff --git a/‎ads/opctl/operator/lowcode/anomaly/schema.yaml‎
Lines changed: 16 additions & 4 deletions b/‎ads/opctl/operator/lowcode/anomaly/schema.yaml‎
Lines changed: 16 additions & 4 deletions
@@ -31,7 +31,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.8", "3.10.8"]
+        python-version: ["3.8"]
 
     steps:
       - uses: actions/checkout@v4
 
@@ -0,0 +1 @@
+* @darenr @mayoor @mrDzurb @VipulMascarenhas @qiuosier
@@ -464,7 +464,7 @@ def from_dict(
             )
 
         obj = cls(
-            **{key: obj_dict.get(key) for key in allowed_fields if key in obj_dict}
+            **{key: obj_dict.get(key) for key in allowed_fields}
         )
 
         for key, value in obj_dict.items():
 
@@ -52,7 +52,7 @@ def fit(self, X: "pandas.DataFrame"):
 
         """
         for column in X.columns:
-            if X[column].dtype.name in ["object", "category"]:
+            if X[column].dtype.name in ["object", "category", "bool"]:
                 X[column] = X[column].astype(str)
                 self.label_encoders[column] = LabelEncoder()
                 self.label_encoders[column].fit(X[column])
 
@@ -29,7 +29,7 @@
 TESTS_PATH = os.path.join(_cwd, "resources", "tests.yaml")
 HTML_PATH = os.path.join(_cwd, "resources", "template.html")
 CONFIG_PATH = os.path.join(_cwd, "resources", "config.yaml")
-PYTHON_VER_PATTERN = "^([3])(\.[6-9])(\.\d+)?$"
+PYTHON_VER_PATTERN = "^([3])(\.([6-9]|1[0-2]))(\.\d+)?$"
 PAR_URL = "https://objectstorage.us-ashburn-1.oraclecloud.com/p/WyjtfVIG0uda-P3-2FmAfwaLlXYQZbvPZmfX1qg0-sbkwEQO6jpwabGr2hMDBmBp/n/ociodscdev/b/service-conda-packs/o/service_pack/index.json"
 
 TESTS = {
 
@@ -10,7 +10,6 @@
     merge_category_columns,
 )
 from ads.opctl.operator.lowcode.common.data import AbstractData
-from ads.opctl.operator.lowcode.common.data import AbstractData
 from ads.opctl.operator.lowcode.anomaly.utils import get_frequency_of_datetime
 from ads.opctl import logger
 import pandas as pd
@@ -56,6 +55,10 @@ def __init__(self, spec: AnomalyOperatorSpec):
             self.X_valid_dict = self.valid_data.X_valid_dict
             self.y_valid_dict = self.valid_data.y_valid_dict
 
+    # Returns raw data based on the series_id i.e; the merged target_category_column value
+    def get_raw_data_by_cat(self, category):
+        return self._data.get_raw_data_by_cat(category)
+
 
 class AnomalyOutput:
     def __init__(self, date_column):
@@ -94,38 +97,28 @@ def get_outliers_by_cat(self, category: str, data: pd.DataFrame):
             outliers = pd.merge(outliers, scores, on=self.date_column, how="inner")
         return outliers
 
-    def get_inliers(self, data):
+    def get_inliers(self, datasets):
         inliers = pd.DataFrame()
 
         for category in self.list_categories():
             inliers = pd.concat(
                 [
                     inliers,
-                    self.get_inliers_by_cat(
-                        category,
-                        data[data[OutputColumns.Series] == category]
-                        .reset_index(drop=True)
-                        .drop(OutputColumns.Series, axis=1),
-                    ),
+                    self.get_inliers_by_cat(category, datasets.get_raw_data_by_cat(category)),
                 ],
                 axis=0,
                 ignore_index=True,
             )
         return inliers
 
-    def get_outliers(self, data):
+    def get_outliers(self, datasets):
         outliers = pd.DataFrame()
 
         for category in self.list_categories():
             outliers = pd.concat(
                 [
                     outliers,
-                    self.get_outliers_by_cat(
-                        category,
-                        data[data[OutputColumns.Series] == category]
-                        .reset_index(drop=True)
-                        .drop(OutputColumns.Series, axis=1),
-                    ),
+                    self.get_outliers_by_cat(category, datasets.get_raw_data_by_cat(category)),
                 ],
                 axis=0,
                 ignore_index=True,
 
@@ -26,8 +26,9 @@ class AutoMLXOperatorModel(AnomalyOperatorBaseModel):
     )
     def _build_model(self) -> pd.DataFrame:
         from automlx import init
+        import logging
         try:
-            init(engine="ray", engine_opts={"ray_setup": {"_temp_dir": "/tmp/ray-temp"}})
+            init(engine="ray", engine_opts={"ray_setup": {"_temp_dir": "/tmp/ray-temp"}}, loglevel=logging.CRITICAL)
         except Exception as e:
             logger.info("Ray already initialized")
         date_column = self.spec.datetime_column.name
 
@@ -272,15 +272,15 @@ def _save_report(
                     f2.write(f1.read())
 
         if self.spec.generate_inliers:
-            inliers = anomaly_output.get_inliers(self.datasets.data)
+            inliers = anomaly_output.get_inliers(self.datasets)
             write_data(
                 data=inliers,
                 filename=os.path.join(unique_output_dir, self.spec.inliers_filename),
                 format="csv",
                 storage_options=storage_options,
             )
 
-        outliers = anomaly_output.get_outliers(self.datasets.data)
+        outliers = anomaly_output.get_outliers(self.datasets)
         write_data(
             data=outliers,
             filename=os.path.join(unique_output_dir, self.spec.outliers_filename),
 
@@ -36,6 +36,21 @@ class TestData(InputData):
     """Class representing operator specification test data details."""
 
 
+@dataclass(repr=True)
+class PreprocessingSteps(DataClassSerializable):
+    """Class representing preprocessing steps for operator."""
+
+    missing_value_imputation: bool = True
+    outlier_treatment: bool = False
+
+
+@dataclass(repr=True)
+class DataPreprocessor(DataClassSerializable):
+    """Class representing operator specification preprocessing details."""
+
+    enabled: bool = True
+    steps: PreprocessingSteps = field(default_factory=PreprocessingSteps)
+
 @dataclass(repr=True)
 class AnomalyOperatorSpec(DataClassSerializable):
     """Class representing operator specification."""
@@ -74,7 +89,9 @@ def __post_init__(self):
             self.generate_inliers if self.generate_inliers is not None else False
         )
         self.model_kwargs = self.model_kwargs or dict()
-
+        self.preprocessing = (
+            self.preprocessing if self.preprocessing is not None else DataPreprocessor(enabled=True)
+        )
 
 @dataclass(repr=True)
 class AnomalyOperatorConfig(OperatorConfig):
 
@@ -307,11 +307,23 @@ spec:
         description: "When provided, target_category_columns [list] indexes the data into multiple related datasets for anomaly detection"
 
     preprocessing:
-      type: boolean
+      type: dict
       required: false
-      default: true
-      meta:
-        description: "preprocessing and feature engineering can be disabled using this flag, Defaults to true"
+      schema:
+        enabled:
+          type: boolean
+          required: false
+          default: true
+          meta:
+            description: "preprocessing and feature engineering can be disabled using this flag, Defaults to true"
+        steps:
+          type: dict
+          required: false
+          schema:
+            missing_value_imputation:
+              type: boolean
+              required: false
+              default: true
 
     generate_report:
       type: boolean
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+* @darenr @mayoor @mrDzurb @VipulMascarenhas @qiuosier`
Original file line number	Diff line number	Diff line change
`@@ -464,7 +464,7 @@ def from_dict(`
`464`	`464`	`)`
`465`	`465`
`466`	`466`	`obj = cls(`
`467`		`- **{key: obj_dict.get(key) for key in allowed_fields if key in obj_dict}`
	`467`	`+ **{key: obj_dict.get(key) for key in allowed_fields}`
`468`	`468`	`)`
`469`	`469`
`470`	`470`	`for key, value in obj_dict.items():`