From dd3b664b61ba050f8c44586f2668ed143afcf986 Mon Sep 17 00:00:00 2001 From: Frode Aarstad Date: Wed, 10 Jan 2024 15:32:28 +0100 Subject: [PATCH] Improve plotting performance --- src/ert/dark_storage/common.py | 3 +- src/ert/dark_storage/endpoints/ensembles.py | 20 +++++ src/ert/dark_storage/endpoints/records.py | 11 ++- src/ert/gui/tools/plot/plot_api.py | 20 +++-- src/ert/gui/tools/plot/plot_window.py | 7 +- src/ert/storage/local_ensemble.py | 83 +++++++++++++++----- tests/performance_tests/performance_utils.py | 8 +- 7 files changed, 110 insertions(+), 42 deletions(-) diff --git a/src/ert/dark_storage/common.py b/src/ert/dark_storage/common.py index 0cf4bfee031..485a9c8b23c 100644 --- a/src/ert/dark_storage/common.py +++ b/src/ert/dark_storage/common.py @@ -36,10 +36,11 @@ def data_for_key( given case. The row index is the realization number, and the columns are an index over the indexes/dates""" + if key.startswith("LOG10_"): key = key[6:] if key in ensemble.get_summary_keyset(): - data = ensemble.load_all_summary_data([key], realization_index) + data = ensemble.load_summary(key, realization_index) data = data[key].unstack(level="Date") elif key in ensemble.get_gen_kw_keyset(): data = ensemble.load_all_gen_kw_data(key.split(":")[0], realization_index) diff --git a/src/ert/dark_storage/endpoints/ensembles.py b/src/ert/dark_storage/endpoints/ensembles.py index 8d0a53fc02c..d0739d89557 100644 --- a/src/ert/dark_storage/endpoints/ensembles.py +++ b/src/ert/dark_storage/endpoints/ensembles.py @@ -42,6 +42,26 @@ def get_ensemble( ) +@router.get("/ensembles/{ensemble_id}/small", response_model=js.EnsembleOut) +def get_ensemble_small( + *, + storage: StorageAccessor = DEFAULT_STORAGE, + ensemble_id: UUID, +) -> js.EnsembleOut: + ensemble = storage.get_ensemble(ensemble_id) + return js.EnsembleOut( + id=ensemble_id, + children=[], + parent=None, + experiment_id=ensemble.experiment_id, + userdata={"name": ensemble.name}, + size=ensemble.ensemble_size, + parameter_names=[], + response_names=[], + child_ensemble_ids=[], + ) + + @router.put("/ensembles/{ensemble_id}/userdata") async def replace_ensemble_userdata( *, diff --git a/src/ert/dark_storage/endpoints/records.py b/src/ert/dark_storage/endpoints/records.py index 5de1cf58247..19dd4e8509e 100644 --- a/src/ert/dark_storage/endpoints/records.py +++ b/src/ert/dark_storage/endpoints/records.py @@ -174,7 +174,7 @@ async def get_ensemble_record( name: str, ensemble_id: UUID, accept: Annotated[Union[str, None], Header()] = None, - realization_index: Optional[int] = None, + realization_index: Optional[int] = None, # remove this?? Is it used anywhere? label: Optional[str] = None, ) -> Any: dataframe = data_for_key(db.get_ensemble(ensemble_id), name, realization_index) @@ -249,15 +249,18 @@ def get_ensemble_responses( ensemble_id: UUID, ) -> Mapping[str, js.RecordOut]: response_map: Dict[str, js.RecordOut] = {} - ens = db.get_ensemble(ensemble_id) + name_dict = {} + + for obs in res.get_observations(): + name_dict[obs.observation_key] = obs.observation_type + for name in ens.get_summary_keyset(): - obs_keys = res.observation_keys(name) response_map[str(name)] = js.RecordOut( id=UUID(int=0), name=name, userdata={"data_origin": "Summary"}, - has_observations=len(obs_keys) != 0, + has_observations=name in name_dict, ) for name in res.get_gen_data_keys(): diff --git a/src/ert/gui/tools/plot/plot_api.py b/src/ert/gui/tools/plot/plot_api.py index 20e9ddbabe2..efa3eebae95 100644 --- a/src/ert/gui/tools/plot/plot_api.py +++ b/src/ert/gui/tools/plot/plot_api.py @@ -43,7 +43,7 @@ def _get_all_cases(self) -> List[dict]: for experiment in experiments: for ensemble_id in experiment["ensemble_ids"]: response = client.get( - f"/ensembles/{ensemble_id}", timeout=self._timeout + f"/ensembles/{ensemble_id}/small", timeout=self._timeout ) self._check_response(response) response_json = response.json() @@ -68,14 +68,6 @@ def _check_response(response: requests.Response): f"{response.text} from url: {response.url}." ) - def _get_experiments(self) -> dict: - with StorageService.session() as client: - response: requests.Response = client.get( - "/experiments", timeout=self._timeout - ) - self._check_response(response) - return response.json() - def _get_ensembles(self, experiement_id) -> List: with StorageService.session() as client: response: requests.Response = client.get( @@ -94,8 +86,14 @@ def all_data_type_keys(self) -> List: the key""" all_keys = {} + with StorageService.session() as client: - for experiment in self._get_experiments(): + response: requests.Response = client.get( + "/experiments", timeout=self._timeout + ) + self._check_response(response) + + for experiment in response.json(): for ensemble in self._get_ensembles(experiment["id"]): response: requests.Response = client.get( f"/ensembles/{ensemble['id']}/responses", timeout=self._timeout @@ -133,7 +131,7 @@ def get_all_cases_not_running(self) -> List: info about the case is returned""" # Currently, the ensemble information from the storage API does not contain any # hint if a case is running or not for now we return all the cases, running or - # not + # no return self._get_all_cases() def data_for_key(self, case_name, key) -> pd.DataFrame: diff --git a/src/ert/gui/tools/plot/plot_window.py b/src/ert/gui/tools/plot/plot_window.py index 1fcd777edb5..6cbd5de34d3 100644 --- a/src/ert/gui/tools/plot/plot_window.py +++ b/src/ert/gui/tools/plot/plot_window.py @@ -1,4 +1,5 @@ import logging +import time from typing import List from httpx import RequestError @@ -42,12 +43,10 @@ class PlotWindow(QMainWindow): def __init__(self, config_file, parent): QMainWindow.__init__(self, parent) - + t = time.perf_counter() logger.info("PlotWindow __init__") - self.setMinimumWidth(850) self.setMinimumHeight(650) - self.setWindowTitle(f"Plotting - {config_file}") self.activateWindow() @@ -109,6 +108,8 @@ def __init__(self, config_file, parent): self._data_type_keys_widget.selectDefault() self._updateCustomizer(current_plot_widget) + logger.info(f"PlotWindow __init__ done. time={time.perf_counter() -t}") + def currentPlotChanged(self): key_def = self.getSelectedKey() if key_def is None: diff --git a/src/ert/storage/local_ensemble.py b/src/ert/storage/local_ensemble.py index 5176b70f946..8dbf3c6cd17 100644 --- a/src/ert/storage/local_ensemble.py +++ b/src/ert/storage/local_ensemble.py @@ -105,8 +105,10 @@ def get_realization_mask_without_parent_failure(self) -> npt.NDArray[np.bool_]: def get_realization_mask_with_parameters(self) -> npt.NDArray[np.bool_]: return np.array([self._get_parameter(i) for i in range(self.ensemble_size)]) - def get_realization_mask_with_responses(self) -> npt.NDArray[np.bool_]: - return np.array([self._get_response(i) for i in range(self.ensemble_size)]) + def get_realization_mask_with_responses( + self, key: Optional[str] = None + ) -> npt.NDArray[np.bool_]: + return np.array([self._get_response(i, key) for i in range(self.ensemble_size)]) def _get_parameter(self, realization: int) -> bool: if not self.experiment.parameter_configuration: @@ -117,10 +119,14 @@ def _get_parameter(self, realization: int) -> bool: for parameter in self.experiment.parameter_configuration ) - def _get_response(self, realization: int) -> bool: + def _get_response(self, realization: int, key: Optional[str] = None) -> bool: if not self.experiment.response_configuration: return False path = self.mount_point / f"realization-{realization}" + + if key: + return (path / f"{key}.nc").exists() + return all( (path / f"{response}.nc").exists() for response in self._filter_response_configuration() @@ -180,9 +186,13 @@ def realizations_initialized(self, realizations: List[int]) -> bool: return all((responses[real] or parameters[real]) for real in realizations) - def get_realization_list_with_responses(self) -> List[int]: + def get_realization_list_with_responses( + self, key: Optional[str] = None + ) -> List[int]: return [ - idx for idx, b in enumerate(self.get_realization_mask_with_responses()) if b + idx + for idx, b in enumerate(self.get_realization_mask_with_responses(key)) + if b ] def set_failure( @@ -253,20 +263,14 @@ def _get_gen_data_config(self, key: str) -> GenDataConfig: @deprecated("Check the experiment for registered responses") def get_gen_data_keyset(self) -> List[str]: - keylist = [ - k - for k, v in self.experiment.response_info.items() - if "_ert_kind" in v and v["_ert_kind"] == "GenDataConfig" - ] - gen_data_list = [] - for key in keylist: - gen_data_config = self._get_gen_data_config(key) - if gen_data_config.report_steps is None: - gen_data_list.append(f"{key}@0") - else: - for report_step in gen_data_config.report_steps: - gen_data_list.append(f"{key}@{report_step}") + for k, v in self.experiment.response_configuration.items(): + if isinstance(v, GenDataConfig): + if v.report_steps is None: + gen_data_list.append(f"{k}@0") + else: + for report_step in v.report_steps: + gen_data_list.append(f"{k}@{report_step}") return sorted(gen_data_list, key=lambda k: k.lower()) @deprecated("Check the experiment for registered parameters") @@ -293,7 +297,7 @@ def load_gen_data( report_step: int, realization_index: Optional[int] = None, ) -> pd.DataFrame: - realizations = self.get_realization_list_with_responses() + realizations = self.get_realization_list_with_responses(key) if realization_index is not None: if realization_index not in realizations: raise IndexError(f"No such realization {realization_index}") @@ -368,6 +372,46 @@ def load_responses( assert isinstance(response, xr.Dataset) return response + def load_responses_summary( + self, key: str, realizations: npt.NDArray[np.int_] + ) -> xr.Dataset: + loaded = [] + for realization in realizations: + input_path = self.mount_point / f"realization-{realization}" / "summary.nc" + if input_path.exists(): + ds = xr.open_dataset(input_path, engine="scipy") + ds = ds.query(name=f'name=="{key}"') + loaded.append(ds) + response = xr.combine_nested(loaded, concat_dim="realization") + assert isinstance(response, xr.Dataset) + return response + + def load_summary( + self, + key: str, + realization_index: Optional[int] = None, + ) -> pd.DataFrame: + # realizations = self.get_realization_list_with_responses("summary") + # realizations = self.get_realization_list_with_responses() + realizations = [i for i in range(self.ensemble_size)] + + if realization_index is not None: + if realization_index not in realizations: + raise IndexError(f"No such realization {realization_index}") + realizations = [realization_index] + + try: + df = self.load_responses_summary(key, tuple(realizations)).to_dataframe() + except (ValueError, KeyError): + return pd.DataFrame() + + df = df.unstack(level="name") + df.columns = [col[1] for col in df.columns.values] + df.index = df.index.rename( + {"time": "Date", "realization": "Realization"} + ).reorder_levels(["Realization", "Date"]) + return df + @deprecated("Use load_responses") def load_all_summary_data( self, @@ -386,6 +430,7 @@ def load_all_summary_data( df = self.load_responses("summary", tuple(realizations)).to_dataframe() except (ValueError, KeyError): return pd.DataFrame() + # df= df.query(f'name == "{key}"') df = df.unstack(level="name") df.columns = [col[1] for col in df.columns.values] df.index = df.index.rename( diff --git a/tests/performance_tests/performance_utils.py b/tests/performance_tests/performance_utils.py index 772a84c204a..b30b4968549 100644 --- a/tests/performance_tests/performance_utils.py +++ b/tests/performance_tests/performance_utils.py @@ -152,9 +152,9 @@ def dark_storage_app(monkeypatch): folder = py.path.local(tempfile.mkdtemp()) make_poly_example( folder, - "../../test-data/poly_template", - gen_data_count=34, - gen_data_entries=15, + "test-data/poly_template", + gen_data_count=3400, + gen_data_entries=150, summary_data_entries=100, reals=200, summary_data_count=4000, @@ -163,7 +163,7 @@ def dark_storage_app(monkeypatch): sum_obs_every=10, gen_obs_every=1, parameter_entries=10, - parameter_count=8, + parameter_count=10, update_steps=1, ) print(folder)