Implement adaptive batch size

dafeda · Dec 16, 2023 · 9d2ea14 · 9d2ea14
1 parent 38348cd
commit 9d2ea14
Show file tree

Hide file tree

Showing 2 changed files with 34 additions and 1 deletion.
diff --git a/pyproject.toml b/pyproject.toml
@@ -135,6 +135,7 @@ types = [
     "types-decorator",
     "types-docutils",
     "types-tqdm",
+    "types-psutil"
 ]
 
 [tool.setuptools]

diff --git a/src/ert/analysis/_es_update.py b/src/ert/analysis/_es_update.py
@@ -21,6 +21,7 @@
 
 import iterative_ensemble_smoother as ies
 import numpy as np
+import psutil
 import xarray as xr
 from iterative_ensemble_smoother.experimental import (
     AdaptiveESMDA,
@@ -535,7 +536,6 @@ def analysis_ES(
         truncation = module.enkf_truncation
 
         if module.localization:
-            batch_size: int = 1000
             smoother_adaptive_es = AdaptiveESMDA(
                 covariance=observation_errors**2,
                 observations=observation_values,
@@ -570,6 +570,38 @@ def analysis_ES(
             )
             if module.localization:
                 num_params = temp_storage[param_group.name].shape[0]
+
+                # Calculate adaptive batch size.
+                # Adaptive Localization calculates the cross-covariance between
+                # parameters and responses.
+                # Cross-covariance is a matrix with shape num_params x num_obs
+                # which may be larger than memory.
+
+                # From `psutil` documentation:
+                # - available:
+                # the memory that can be given instantly to processes without the
+                # system going into swap.
+                # This is calculated by summing different memory values depending
+                # on the platform and it is supposed to be used to monitor actual
+                # memory usage in a cross platform fashion.
+                available_memory_bytes = psutil.virtual_memory().available
+                memory_safety_factor = 0.8
+                bytes_in_float64 = 8
+                batch_size = min(
+                    int(
+                        np.floor(
+                            available_memory_bytes
+                            * memory_safety_factor
+                            / (num_obs * bytes_in_float64)
+                        )
+                    ),
+                    num_params,
+                )
+                _logger.info(
+                    f"Adaptive Localization parameter group {param_group.name} with {num_params} parameters"
+                )
+                _logger.info(f"Adaptive Localization batch size: {batch_size}")
+
                 batches = _split_by_batchsize(np.arange(0, num_params), batch_size)
 
                 progress_callback(