Skip to content

Commit

Permalink
Add new methods for dropping output parameters and columns
Browse files Browse the repository at this point in the history
  • Loading branch information
mpvanderschelling committed Mar 6, 2024
1 parent 1dcdb40 commit 77ad679
Show file tree
Hide file tree
Showing 3 changed files with 121 additions and 9 deletions.
45 changes: 41 additions & 4 deletions src/f3dasm/_src/design/domain.py
Original file line number Diff line number Diff line change
Expand Up @@ -428,7 +428,7 @@ def add(self, name: str,
f"Unknown type {type}!"
f"Possible types are: 'float', 'int', 'category', 'constant'.")

def add_output(self, name: str, to_disk: bool):
def add_output(self, name: str, to_disk: bool, exist_ok=False):
"""Add a new output parameter to the domain.
Parameters
Expand All @@ -446,9 +446,11 @@ def add_output(self, name: str, to_disk: bool):
{'param1': OutputParameter(to_disk=True)}
"""
if name in self.output_space:
raise KeyError(
f"Parameter {name} already exists in the domain! \
Choose a different name.")
if not exist_ok:
raise KeyError(
f"Parameter {name} already exists in the domain! \
Choose a different name.")
return

self.output_space[name] = _OutputParameter(to_disk)
# Getters
Expand Down Expand Up @@ -724,6 +726,41 @@ def select(self, names: str | Iterable[str]) -> Domain:

return Domain(space={key: self.space[key] for key in names})

def drop_output(self, names: str | Iterable[str]) -> Domain:
"""Drop a subset of output parameters from the domain.
Parameters
----------
names : str or Iterable[str]
The names of the output parameters to drop.
Returns
-------
Domain
A new domain with the dropped output parameters.
Example
-------
>>> domain = Domain()
>>> domain.output_space = {
... 'param1': _OutputParameter(to_disk=True),
... 'param2': _OutputParameter(to_disk=True),
... 'param3': _OutputParameter(to_disk=True)
... }
>>> domain.drop_output(['param1', 'param3'])
Domain({'param2': _OutputParameter(to_disk=True)})
"""

if isinstance(names, str):
names = [names]

return Domain(
space=self.space,
output_space={key: self.output_space[key]
for key in self.output_space
if key not in names})

# Miscellaneous
# =============================================================================

Expand Down
42 changes: 41 additions & 1 deletion src/f3dasm/_src/experimentdata/_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -351,6 +351,29 @@ def select_columns(self, columns: Iterable[str] | str) -> _Data:
return _Data(
self.data[self.columns.iloc(columns)], columns=_selected_columns)

def drop(self, columns: Iterable[str] | str) -> _Data:
"""Drop the selected columns from the data.
Parameters
----------
columns : Iterable[str] | str
The columns to drop.
Returns
-------
_Data
The data without the selected columns
"""
if isinstance(columns, str):
columns = [columns]
_selected_columns = _Columns(
{
name: None for name in self.columns.columns
if name not in columns})
return _Data(
data=self.data.drop(columns=self.columns.iloc(columns)),
columns=_selected_columns)

# Append and remove data
# =============================================================================

Expand Down Expand Up @@ -381,7 +404,14 @@ def add_empty_rows(self, number_of_rows: int):
np.nan, index=new_indices, columns=self.data.columns)
self.data = pd.concat([self.data, empty_data], ignore_index=False)

def add_column(self, name: str):
def add_column(self, name: str, exist_ok: bool = False):
if name in self.columns.names:
if not exist_ok:
raise ValueError(
f"Column {name} already exists in the data. "
"Set exist_ok to True to allow skipping existing columns.")
return

if self.data.columns.empty:
new_columns_index = 0
else:
Expand Down Expand Up @@ -454,6 +484,16 @@ def is_empty(self) -> bool:
"""Check if the data is empty."""
return self.data.empty

def get_index_with_nan(self) -> pd.Index:
"""Get the indices with NaN values.
Returns
-------
pd.Index
The indices with NaN values.
"""
return self.indices[self.data.isna().any(axis=1)]

def has_columnnames(self, names: Iterable[str]) -> bool:
return set(names).issubset(self.names)

Expand Down
43 changes: 39 additions & 4 deletions src/f3dasm/_src/experimentdata/experimentdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,12 +329,17 @@ def from_yaml(cls, config: DictConfig) -> ExperimentData:
ExperimentData
ExperimentData object containing the loaded data.
"""
# Option 0: Both existing and sampling
if 'from_file' in config and 'from_sampling' in config:
return cls.from_file(config.from_file) + cls.from_sampling(
**config.from_sampling)

# Option 1: From exisiting ExperimentData files
if 'from_file' in config:
return cls.from_file(config.from_file)

# Option 2: Sample from the domain
elif 'from_sampling' in config:
if 'from_sampling' in config:
return cls.from_sampling(**config.from_sampling)

else:
Expand Down Expand Up @@ -395,6 +400,25 @@ def select(self, indices: int | Iterable[int]) -> ExperimentData:
jobs=self._jobs[indices],
domain=self.domain, project_dir=self.project_dir)

def drop_output(self, names: Iterable[str] | str) -> ExperimentData:
"""Drop a column from the output data
Parameters
----------
names : Iteraeble | str
The names of the columns to drop.
Returns
-------
ExperimentData
The ExperimentData object with the column dropped.
"""
return ExperimentData(input_data=self._input_data,
output_data=self._output_data.drop(names),
jobs=self._jobs, domain=self.domain.drop_output(
names),
project_dir=self.project_dir)

def select_with_status(self, status: Literal['open', 'in progress',
'finished', 'error']
) -> ExperimentData:
Expand Down Expand Up @@ -763,7 +787,8 @@ def add_input_parameter(
self._input_data.add_column(name)
self.domain.add(name=name, type=type, **kwargs)

def add_output_parameter(self, name: str, is_disk: bool) -> None:
def add_output_parameter(
self, name: str, is_disk: bool, exist_ok: bool = False) -> None:
"""Add a new output column to the ExperimentData object.
Parameters
Expand All @@ -772,9 +797,12 @@ def add_output_parameter(self, name: str, is_disk: bool) -> None:
name of the new output column
is_disk
Whether the output column will be stored on disk or not
exist_ok
If True, it will not raise an error if the output column already
exists, by default False
"""
self._output_data.add_column(name)
self.domain.add_output(name, is_disk)
self._output_data.add_column(name, exist_ok=exist_ok)
self.domain.add_output(name=name, to_disk=is_disk, exist_ok=exist_ok)

def remove_rows_bottom(self, number_of_rows: int):
"""
Expand Down Expand Up @@ -1018,6 +1046,13 @@ def mark_all_in_progress_open(self) -> None:
Mark all the experiments that have the status 'in progress' open
"""
self._jobs.mark_all_in_progress_open()

def mark_all_nan_open(self) -> None:
"""
Mark all the experiments that have 'nan' in output open
"""
indices = self._output_data.get_index_with_nan()
self.mark(indices=indices, status='open')
# Datageneration
# =========================================================================

Expand Down

0 comments on commit 77ad679

Please sign in to comment.