Skip to content

Commit

Permalink
Improve validation of design matrix
Browse files Browse the repository at this point in the history
This commit adds some validation to design matrix, and also makes the active_realizations_field
in the gui only accept realizations actually found in design matrix.
The designmatrix validation added in this commit:
* parameter names cannot contain multiple words
* parameter names cannot be numerical
  • Loading branch information
jonathan-eq committed Feb 6, 2025
1 parent fb8fd2a commit 912dbc8
Show file tree
Hide file tree
Showing 7 changed files with 82 additions and 12 deletions.
19 changes: 11 additions & 8 deletions src/ert/config/design_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ def read_design_matrix(

if error_list := DesignMatrix._validate_design_matrix(design_matrix_df):
error_msg = "\n".join(error_list)
raise ValueError(f"Design matrix is not valid, error:\n{error_msg}")
raise ValueError(f"Design matrix is not valid, error(s):\n{error_msg}")

defaults_to_use = DesignMatrix._read_defaultssheet(
self.xls_filename, self.default_sheet, design_matrix_df.columns.to_list()
Expand Down Expand Up @@ -259,13 +259,6 @@ def _validate_design_matrix(design_matrix: pd.DataFrame) -> list[str]:
return []
errors = []
column_na_mask = design_matrix.columns.isna()
column_indexes_unnamed = [
index for index, value in enumerate(column_na_mask) if value
]
if len(column_indexes_unnamed) > 0:
errors.append(
f"Column headers not present in column {column_indexes_unnamed}"
)
if not design_matrix.columns[~column_na_mask].is_unique:
errors.append("Duplicate parameter names found in design sheet")
empties = [
Expand All @@ -274,6 +267,16 @@ def _validate_design_matrix(design_matrix: pd.DataFrame) -> list[str]:
]
if len(empties) > 0:
errors.append(f"Design matrix contains empty cells {empties}")

for column_num, param_name in enumerate(design_matrix.columns):
if pd.isna(param_name) or len(param_name.split()) == 0:
errors.append(f"Empty parameter name found in column {column_num}.")
elif len(param_name.split()) > 1:
errors.append(
f"Multiple words in parameter name found in column {column_num} ({param_name})."
)
elif param_name.isnumeric():
errors.append(f"Numeric parameter name found in column {column_num}.")
return errors

@staticmethod
Expand Down
7 changes: 6 additions & 1 deletion src/ert/gui/simulation/ensemble_experiment_panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from ert.gui.tools.design_matrix.design_matrix_panel import DesignMatrixPanel
from ert.mode_definitions import ENSEMBLE_EXPERIMENT_MODE
from ert.run_models import EnsembleExperiment
from ert.validation import ActiveRange, RangeStringArgument
from ert.validation import ActiveRange, RangeStringArgument, RangeSubsetStringArgument
from ert.validation.proper_name_argument import ExperimentValidation, ProperNameArgument

from .experiment_config_panel import ExperimentConfigPanel
Expand Down Expand Up @@ -88,6 +88,11 @@ def __init__(
self._active_realizations_field.setText(
ActiveRange(design_matrix.active_realizations).rangestring
)
self._active_realizations_field.setValidator(
RangeSubsetStringArgument(
ActiveRange(design_matrix.active_realizations)
)
)
show_dm_param_button = QPushButton("Show parameters")
show_dm_param_button.setObjectName("show-dm-parameters")
show_dm_param_button.setMinimumWidth(50)
Expand Down
3 changes: 2 additions & 1 deletion src/ert/validation/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from .number_list_string_argument import NumberListStringArgument
from .proper_name_argument import ExperimentValidation, ProperNameArgument
from .proper_name_format_argument import ProperNameFormatArgument
from .range_string_argument import RangeStringArgument
from .range_string_argument import RangeStringArgument, RangeSubsetStringArgument
from .rangestring import mask_to_rangestring, rangestring_to_list, rangestring_to_mask
from .runpath_argument import RunPathArgument
from .string_definition import StringDefinition
Expand All @@ -21,6 +21,7 @@
"ProperNameArgument",
"ProperNameFormatArgument",
"RangeStringArgument",
"RangeSubsetStringArgument",
"RunPathArgument",
"StringDefinition",
"ValidationStatus",
Expand Down
11 changes: 11 additions & 0 deletions src/ert/validation/active_range.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ def __init__(
rangestring: str | None = None,
length: int | None = None,
):
self.length = length
if mask is None and rangestring is None and length is None:
raise ValueError("Supply mask or rangestring and length to IndexRange.")
if mask is None:
Expand Down Expand Up @@ -74,3 +75,13 @@ def validate_rangestring_vs_length(
f"for size {length}"
)
return (rangestring, length)

def validate_range_is_subset(self, other: "ActiveRange") -> None:
if (
not [a and b for a, b in zip(self.mask, other.mask, strict=False)]
== other.mask
):
raise ValueError(
f"Specified rangestring ({other.rangestring}) is not a subset "
f"of the active realizations ({self.rangestring})"
)
21 changes: 21 additions & 0 deletions src/ert/validation/range_string_argument.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,24 @@ def validate(self, token: str) -> ValidationStatus:
validation_status.setValue(token)

return validation_status


class RangeSubsetStringArgument(RangeStringArgument):
def __init__(self, source_active_range: ActiveRange) -> None:
super().__init__()
self._source_range = source_active_range

def validate(self, token: str) -> ValidationStatus:
validation_status = super().validate(token)

if not validation_status:
return validation_status
try:
other_range = ActiveRange(rangestring=token, length=len(self._source_range))
self._source_range.validate_range_is_subset(other_range)
except ValueError as e:
validation_status.setFailed()
validation_status.addToMessage(str(e))

validation_status.setValue(token)
return validation_status
15 changes: 14 additions & 1 deletion tests/ert/unit_tests/gui/ide/test_range_string_argument.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from ert.validation import RangeStringArgument
from ert.validation import RangeStringArgument, RangeSubsetStringArgument
from ert.validation.active_range import ActiveRange


def test_proper_name_argument():
Expand All @@ -23,3 +24,15 @@ def test_proper_name_argument():

assert argument.validate("1-5, 9")
assert not argument.validate("10")


def test_range_subset():
source_range = ActiveRange(rangestring="0-3,5,7-9", length=10)
argument_range = RangeSubsetStringArgument(source_active_range=source_range)

assert argument_range.validate("1")
assert argument_range.validate("0-3")
assert argument_range.validate("0-3,5,7-9")
assert not argument_range.validate("10")
assert not argument_range.validate("1-10")
assert not argument_range.validate("0-4")
18 changes: 17 additions & 1 deletion tests/ert/unit_tests/sensitivity_analysis/test_design_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,9 +238,25 @@ def test_reading_design_matrix_validate_reals(tmp_path, real_column, error_msg):
),
pytest.param(
["a", "b ", ""],
r"Column headers not present in column \[2\]",
r"Empty parameter name found in column 2",
id="missing entries",
),
pytest.param(
["a", "b", "parameter name with spaces"],
"Multiple words in parameter name found in column 2.",
id="multiple words in parameter name",
),
pytest.param(
["a", "b", " "],
"Empty parameter name found in column 2",
id="dataframe loads parameter name as whitespace",
),
pytest.param(["a", "b", "3"], "Numeric parameter name found in column 2"),
pytest.param(
["a", "b c d e", 33],
r"Multiple words in parameter name found in column 1 \(b c d e\)\.\nNumeric parameter name found in column 2",
id="multiple errors",
),
],
)
def test_reading_design_matrix_validate_headers(tmp_path, column_names, error_msg):
Expand Down

0 comments on commit 912dbc8

Please sign in to comment.