Skip to content

Commit

Permalink
fix raising error in dataset
Browse files Browse the repository at this point in the history
  • Loading branch information
DawidPludowski committed Dec 5, 2023
1 parent b0d853b commit 96bdd6b
Showing 1 changed file with 32 additions and 10 deletions.
42 changes: 32 additions & 10 deletions liltab/data/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,25 @@ def __init__(
preprocess_data: bool,
encode_categorical_target: bool,
):
if response_columns and len(response_columns) > 1 and encode_categorical_target:
if (
response_columns is not None
and len(response_columns) > 1
and encode_categorical_target
):
raise ValueError("One-hot encoding is supported only for single target")

self.data_path = data_path
self.df = pd.read_csv(data_path)

self.attribute_columns = np.array(
attribute_columns if attribute_columns is not None else self.df.columns.tolist()[:-1]
attribute_columns
if attribute_columns is not None
else self.df.columns.tolist()[:-1]
)
self.response_columns = np.array(
response_columns if response_columns is not None else [self.df.columns.tolist()[-1]]
response_columns
if response_columns is not None
else [self.df.columns.tolist()[-1]]
)
self.n_attributes = len(self.attribute_columns)
self.n_responses = len(self.response_columns)
Expand All @@ -57,12 +65,15 @@ def _preprocess_data(self):
"""
self.preprocessing_pipeline = get_preprocessing_pipeline()
if self.encode_categorical_target:
self.df.loc[:, self.attribute_columns] = self.preprocessing_pipeline.fit_transform(
self.df.loc[
:, self.attribute_columns
] = self.preprocessing_pipeline.fit_transform(
self.df[self.attribute_columns]
)
else:
self.df = pd.DataFrame(
self.preprocessing_pipeline.fit_transform(self.df), columns=self.df.columns
self.preprocessing_pipeline.fit_transform(self.df),
columns=self.df.columns,
)

def _encode_categorical_target(self):
Expand Down Expand Up @@ -119,7 +130,9 @@ def __init__(
preprocess_data=preprocess_data,
)

self.X = torch.from_numpy(self.df[self.attribute_columns].to_numpy()).type(torch.float32)
self.X = torch.from_numpy(self.df[self.attribute_columns].to_numpy()).type(
torch.float32
)
self.y = torch.from_numpy(self.y).type(torch.float32)

def __getitem__(self, idx: list[int]) -> tuple[Tensor, Tensor]:
Expand Down Expand Up @@ -181,7 +194,9 @@ def __init__(
preprocess_data=preprocess_data,
)
if total_random_feature_sampling and (
attribute_columns or response_columns or encode_categorical_target
attribute_columns is not None
or response_columns
or encode_categorical_target
):
raise ValueError(
"total_random_feature_sampling doesn't support feature or encoding specification"
Expand All @@ -206,7 +221,10 @@ def __getitem__(self, idx: list[int]) -> tuple[Tensor, Tensor]:
self.columns[responses_idx],
)
else:
attributes_idx, responses_idx = self._get_features_from_selected_columns()
(
attributes_idx,
responses_idx,
) = self._get_features_from_selected_columns()
self.attributes, self.responses = (
self.attribute_columns[attributes_idx],
self.response_columns[responses_idx],
Expand All @@ -224,8 +242,12 @@ def __getitem__(self, idx: list[int]) -> tuple[Tensor, Tensor]:
def _get_features_from_selected_columns(self) -> tuple[int, int]:
attributes_size = np.random.randint(low=1, high=self.n_attributes + 1)
responses_size = np.random.randint(low=1, high=self.n_responses + 1)
attributes_idx = np.random.choice(len(self.attribute_columns), attributes_size).tolist()
responses_idx = np.random.choice(len(self.response_columns), responses_size).tolist()
attributes_idx = np.random.choice(
len(self.attribute_columns), attributes_size
).tolist()
responses_idx = np.random.choice(
len(self.response_columns), responses_size
).tolist()

return attributes_idx, responses_idx

Expand Down

0 comments on commit 96bdd6b

Please sign in to comment.