fix raising error in dataset

azoz01 · Dec 5, 2023 · 96bdd6b · 96bdd6b
1 parent b0d853b
commit 96bdd6b
Showing 1 changed file with 32 additions and 10 deletions.
diff --git a/liltab/data/datasets.py b/liltab/data/datasets.py
@@ -25,17 +25,25 @@ def __init__(
         preprocess_data: bool,
         encode_categorical_target: bool,
     ):
-        if response_columns and len(response_columns) > 1 and encode_categorical_target:
+        if (
+            response_columns is not None
+            and len(response_columns) > 1
+            and encode_categorical_target
+        ):
             raise ValueError("One-hot encoding is supported only for single target")
 
         self.data_path = data_path
         self.df = pd.read_csv(data_path)
 
         self.attribute_columns = np.array(
-            attribute_columns if attribute_columns is not None else self.df.columns.tolist()[:-1]
+            attribute_columns
+            if attribute_columns is not None
+            else self.df.columns.tolist()[:-1]
         )
         self.response_columns = np.array(
-            response_columns if response_columns is not None else [self.df.columns.tolist()[-1]]
+            response_columns
+            if response_columns is not None
+            else [self.df.columns.tolist()[-1]]
         )
         self.n_attributes = len(self.attribute_columns)
         self.n_responses = len(self.response_columns)
@@ -57,12 +65,15 @@ def _preprocess_data(self):
         """
         self.preprocessing_pipeline = get_preprocessing_pipeline()
         if self.encode_categorical_target:
-            self.df.loc[:, self.attribute_columns] = self.preprocessing_pipeline.fit_transform(
+            self.df.loc[
+                :, self.attribute_columns
+            ] = self.preprocessing_pipeline.fit_transform(
                 self.df[self.attribute_columns]
             )
         else:
             self.df = pd.DataFrame(
-                self.preprocessing_pipeline.fit_transform(self.df), columns=self.df.columns
+                self.preprocessing_pipeline.fit_transform(self.df),
+                columns=self.df.columns,
             )
 
     def _encode_categorical_target(self):
@@ -119,7 +130,9 @@ def __init__(
             preprocess_data=preprocess_data,
         )
 
-        self.X = torch.from_numpy(self.df[self.attribute_columns].to_numpy()).type(torch.float32)
+        self.X = torch.from_numpy(self.df[self.attribute_columns].to_numpy()).type(
+            torch.float32
+        )
         self.y = torch.from_numpy(self.y).type(torch.float32)
 
     def __getitem__(self, idx: list[int]) -> tuple[Tensor, Tensor]:
@@ -181,7 +194,9 @@ def __init__(
             preprocess_data=preprocess_data,
         )
         if total_random_feature_sampling and (
-            attribute_columns or response_columns or encode_categorical_target
+            attribute_columns is not None
+            or response_columns
+            or encode_categorical_target
         ):
             raise ValueError(
                 "total_random_feature_sampling doesn't support feature or encoding specification"
@@ -206,7 +221,10 @@ def __getitem__(self, idx: list[int]) -> tuple[Tensor, Tensor]:
                     self.columns[responses_idx],
                 )
             else:
-                attributes_idx, responses_idx = self._get_features_from_selected_columns()
+                (
+                    attributes_idx,
+                    responses_idx,
+                ) = self._get_features_from_selected_columns()
                 self.attributes, self.responses = (
                     self.attribute_columns[attributes_idx],
                     self.response_columns[responses_idx],
@@ -224,8 +242,12 @@ def __getitem__(self, idx: list[int]) -> tuple[Tensor, Tensor]:
     def _get_features_from_selected_columns(self) -> tuple[int, int]:
         attributes_size = np.random.randint(low=1, high=self.n_attributes + 1)
         responses_size = np.random.randint(low=1, high=self.n_responses + 1)
-        attributes_idx = np.random.choice(len(self.attribute_columns), attributes_size).tolist()
-        responses_idx = np.random.choice(len(self.response_columns), responses_size).tolist()
+        attributes_idx = np.random.choice(
+            len(self.attribute_columns), attributes_size
+        ).tolist()
+        responses_idx = np.random.choice(
+            len(self.response_columns), responses_size
+        ).tolist()
 
         return attributes_idx, responses_idx