Skip to content

Commit

Permalink
Improve plotting performance
Browse files Browse the repository at this point in the history
  • Loading branch information
frode-aarstad committed Jan 16, 2024
1 parent 7ef6fd6 commit dd3b664
Show file tree
Hide file tree
Showing 7 changed files with 110 additions and 42 deletions.
3 changes: 2 additions & 1 deletion src/ert/dark_storage/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,11 @@ def data_for_key(
given case. The row index is the realization number, and the columns are an
index over the indexes/dates"""


if key.startswith("LOG10_"):
key = key[6:]
if key in ensemble.get_summary_keyset():
data = ensemble.load_all_summary_data([key], realization_index)
data = ensemble.load_summary(key, realization_index)
data = data[key].unstack(level="Date")
elif key in ensemble.get_gen_kw_keyset():
data = ensemble.load_all_gen_kw_data(key.split(":")[0], realization_index)
Expand Down
20 changes: 20 additions & 0 deletions src/ert/dark_storage/endpoints/ensembles.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,26 @@ def get_ensemble(
)


@router.get("/ensembles/{ensemble_id}/small", response_model=js.EnsembleOut)
def get_ensemble_small(
*,
storage: StorageAccessor = DEFAULT_STORAGE,
ensemble_id: UUID,
) -> js.EnsembleOut:
ensemble = storage.get_ensemble(ensemble_id)
return js.EnsembleOut(
id=ensemble_id,
children=[],
parent=None,
experiment_id=ensemble.experiment_id,
userdata={"name": ensemble.name},
size=ensemble.ensemble_size,
parameter_names=[],
response_names=[],
child_ensemble_ids=[],
)


@router.put("/ensembles/{ensemble_id}/userdata")
async def replace_ensemble_userdata(
*,
Expand Down
11 changes: 7 additions & 4 deletions src/ert/dark_storage/endpoints/records.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ async def get_ensemble_record(
name: str,
ensemble_id: UUID,
accept: Annotated[Union[str, None], Header()] = None,
realization_index: Optional[int] = None,
realization_index: Optional[int] = None, # remove this?? Is it used anywhere?
label: Optional[str] = None,
) -> Any:
dataframe = data_for_key(db.get_ensemble(ensemble_id), name, realization_index)
Expand Down Expand Up @@ -249,15 +249,18 @@ def get_ensemble_responses(
ensemble_id: UUID,
) -> Mapping[str, js.RecordOut]:
response_map: Dict[str, js.RecordOut] = {}

ens = db.get_ensemble(ensemble_id)
name_dict = {}

for obs in res.get_observations():
name_dict[obs.observation_key] = obs.observation_type

for name in ens.get_summary_keyset():
obs_keys = res.observation_keys(name)
response_map[str(name)] = js.RecordOut(
id=UUID(int=0),
name=name,
userdata={"data_origin": "Summary"},
has_observations=len(obs_keys) != 0,
has_observations=name in name_dict,
)

for name in res.get_gen_data_keys():
Expand Down
20 changes: 9 additions & 11 deletions src/ert/gui/tools/plot/plot_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def _get_all_cases(self) -> List[dict]:
for experiment in experiments:
for ensemble_id in experiment["ensemble_ids"]:
response = client.get(
f"/ensembles/{ensemble_id}", timeout=self._timeout
f"/ensembles/{ensemble_id}/small", timeout=self._timeout
)
self._check_response(response)
response_json = response.json()
Expand All @@ -68,14 +68,6 @@ def _check_response(response: requests.Response):
f"{response.text} from url: {response.url}."
)

def _get_experiments(self) -> dict:
with StorageService.session() as client:
response: requests.Response = client.get(
"/experiments", timeout=self._timeout
)
self._check_response(response)
return response.json()

def _get_ensembles(self, experiement_id) -> List:
with StorageService.session() as client:
response: requests.Response = client.get(
Expand All @@ -94,8 +86,14 @@ def all_data_type_keys(self) -> List:
the key"""

all_keys = {}

with StorageService.session() as client:
for experiment in self._get_experiments():
response: requests.Response = client.get(
"/experiments", timeout=self._timeout
)
self._check_response(response)

for experiment in response.json():
for ensemble in self._get_ensembles(experiment["id"]):
response: requests.Response = client.get(
f"/ensembles/{ensemble['id']}/responses", timeout=self._timeout
Expand Down Expand Up @@ -133,7 +131,7 @@ def get_all_cases_not_running(self) -> List:
info about the case is returned"""
# Currently, the ensemble information from the storage API does not contain any
# hint if a case is running or not for now we return all the cases, running or
# not
# no
return self._get_all_cases()

def data_for_key(self, case_name, key) -> pd.DataFrame:
Expand Down
7 changes: 4 additions & 3 deletions src/ert/gui/tools/plot/plot_window.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
import time
from typing import List

from httpx import RequestError
Expand Down Expand Up @@ -42,12 +43,10 @@
class PlotWindow(QMainWindow):
def __init__(self, config_file, parent):
QMainWindow.__init__(self, parent)

t = time.perf_counter()
logger.info("PlotWindow __init__")

self.setMinimumWidth(850)
self.setMinimumHeight(650)

self.setWindowTitle(f"Plotting - {config_file}")
self.activateWindow()

Expand Down Expand Up @@ -109,6 +108,8 @@ def __init__(self, config_file, parent):
self._data_type_keys_widget.selectDefault()
self._updateCustomizer(current_plot_widget)

logger.info(f"PlotWindow __init__ done. time={time.perf_counter() -t}")

def currentPlotChanged(self):
key_def = self.getSelectedKey()
if key_def is None:
Expand Down
83 changes: 64 additions & 19 deletions src/ert/storage/local_ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,10 @@ def get_realization_mask_without_parent_failure(self) -> npt.NDArray[np.bool_]:
def get_realization_mask_with_parameters(self) -> npt.NDArray[np.bool_]:
return np.array([self._get_parameter(i) for i in range(self.ensemble_size)])

def get_realization_mask_with_responses(self) -> npt.NDArray[np.bool_]:
return np.array([self._get_response(i) for i in range(self.ensemble_size)])
def get_realization_mask_with_responses(
self, key: Optional[str] = None
) -> npt.NDArray[np.bool_]:
return np.array([self._get_response(i, key) for i in range(self.ensemble_size)])

def _get_parameter(self, realization: int) -> bool:
if not self.experiment.parameter_configuration:
Expand All @@ -117,10 +119,14 @@ def _get_parameter(self, realization: int) -> bool:
for parameter in self.experiment.parameter_configuration
)

def _get_response(self, realization: int) -> bool:
def _get_response(self, realization: int, key: Optional[str] = None) -> bool:
if not self.experiment.response_configuration:
return False
path = self.mount_point / f"realization-{realization}"

if key:
return (path / f"{key}.nc").exists()

return all(
(path / f"{response}.nc").exists()
for response in self._filter_response_configuration()
Expand Down Expand Up @@ -180,9 +186,13 @@ def realizations_initialized(self, realizations: List[int]) -> bool:

return all((responses[real] or parameters[real]) for real in realizations)

def get_realization_list_with_responses(self) -> List[int]:
def get_realization_list_with_responses(
self, key: Optional[str] = None
) -> List[int]:
return [
idx for idx, b in enumerate(self.get_realization_mask_with_responses()) if b
idx
for idx, b in enumerate(self.get_realization_mask_with_responses(key))
if b
]

def set_failure(
Expand Down Expand Up @@ -253,20 +263,14 @@ def _get_gen_data_config(self, key: str) -> GenDataConfig:

@deprecated("Check the experiment for registered responses")
def get_gen_data_keyset(self) -> List[str]:
keylist = [
k
for k, v in self.experiment.response_info.items()
if "_ert_kind" in v and v["_ert_kind"] == "GenDataConfig"
]

gen_data_list = []
for key in keylist:
gen_data_config = self._get_gen_data_config(key)
if gen_data_config.report_steps is None:
gen_data_list.append(f"{key}@0")
else:
for report_step in gen_data_config.report_steps:
gen_data_list.append(f"{key}@{report_step}")
for k, v in self.experiment.response_configuration.items():
if isinstance(v, GenDataConfig):
if v.report_steps is None:
gen_data_list.append(f"{k}@0")
else:
for report_step in v.report_steps:
gen_data_list.append(f"{k}@{report_step}")
return sorted(gen_data_list, key=lambda k: k.lower())

@deprecated("Check the experiment for registered parameters")
Expand All @@ -293,7 +297,7 @@ def load_gen_data(
report_step: int,
realization_index: Optional[int] = None,
) -> pd.DataFrame:
realizations = self.get_realization_list_with_responses()
realizations = self.get_realization_list_with_responses(key)
if realization_index is not None:
if realization_index not in realizations:
raise IndexError(f"No such realization {realization_index}")
Expand Down Expand Up @@ -368,6 +372,46 @@ def load_responses(
assert isinstance(response, xr.Dataset)
return response

def load_responses_summary(
self, key: str, realizations: npt.NDArray[np.int_]
) -> xr.Dataset:
loaded = []
for realization in realizations:
input_path = self.mount_point / f"realization-{realization}" / "summary.nc"
if input_path.exists():
ds = xr.open_dataset(input_path, engine="scipy")
ds = ds.query(name=f'name=="{key}"')
loaded.append(ds)
response = xr.combine_nested(loaded, concat_dim="realization")
assert isinstance(response, xr.Dataset)
return response

def load_summary(
self,
key: str,
realization_index: Optional[int] = None,
) -> pd.DataFrame:
# realizations = self.get_realization_list_with_responses("summary")
# realizations = self.get_realization_list_with_responses()
realizations = [i for i in range(self.ensemble_size)]

if realization_index is not None:
if realization_index not in realizations:
raise IndexError(f"No such realization {realization_index}")
realizations = [realization_index]

try:
df = self.load_responses_summary(key, tuple(realizations)).to_dataframe()
except (ValueError, KeyError):
return pd.DataFrame()

df = df.unstack(level="name")
df.columns = [col[1] for col in df.columns.values]
df.index = df.index.rename(
{"time": "Date", "realization": "Realization"}
).reorder_levels(["Realization", "Date"])
return df

@deprecated("Use load_responses")
def load_all_summary_data(
self,
Expand All @@ -386,6 +430,7 @@ def load_all_summary_data(
df = self.load_responses("summary", tuple(realizations)).to_dataframe()
except (ValueError, KeyError):
return pd.DataFrame()
# df= df.query(f'name == "{key}"')
df = df.unstack(level="name")
df.columns = [col[1] for col in df.columns.values]
df.index = df.index.rename(
Expand Down
8 changes: 4 additions & 4 deletions tests/performance_tests/performance_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,9 +152,9 @@ def dark_storage_app(monkeypatch):
folder = py.path.local(tempfile.mkdtemp())
make_poly_example(
folder,
"../../test-data/poly_template",
gen_data_count=34,
gen_data_entries=15,
"test-data/poly_template",
gen_data_count=3400,
gen_data_entries=150,
summary_data_entries=100,
reals=200,
summary_data_count=4000,
Expand All @@ -163,7 +163,7 @@ def dark_storage_app(monkeypatch):
sum_obs_every=10,
gen_obs_every=1,
parameter_entries=10,
parameter_count=8,
parameter_count=10,
update_steps=1,
)
print(folder)

0 comments on commit dd3b664

Please sign in to comment.