PlasmaControl · YigitElma · Dec 25, 2024 · Dec 25, 2024 · Dec 25, 2024 · Dec 25, 2024
diff --git a/.github/workflows/notebook_tests.yml b/.github/workflows/notebook_tests.yml
@@ -92,8 +92,9 @@ jobs:
           source .venv-${{ env.version }}/bin/activate
           export PYTHONPATH=$(pwd)
           pytest -v --nbmake "./docs/notebooks" \
-            --nbmake-timeout=2000 \
-            --ignore=./docs/notebooks/zernike_eval.ipynb \
-            --splits 3 \
-            --group ${{ matrix.group }} \
-            --splitting-algorithm least_duration
+                --nbmake-timeout=2000 \
+                --ignore=./docs/notebooks/zernike_eval.ipynb \
+                --ignore=./docs/notebooks/tutorials/multi_device.ipynb \
+                --splits 3 \
+                --group ${{ matrix.group }} \
+                --splitting-algorithm least_duration
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -26,6 +26,7 @@ New Features
 - Adds a new function ``desc.coils.initialize_helical_coils`` for creating an initial guess for stage 2 helical coil optimization.
 - Adds ``desc.vmec_utils.make_boozmn_output `` for writing boozmn.nc style output files
 for compatibility with other codes which expect such files from the Booz_Xform code.
+- Adds initial support for multiple GPU optimization. This allows to compute derivatives on multiple GPU, and allows more memory intense objectives. Note that: at this phase, the multi-device support is for memory, not speed.
 - Renames compute quantity ``sqrt(g)_B`` to ``sqrt(g)_Boozer_DESC`` to more accurately reflect what the quantiy is (the jacobian from (rho,theta_B,zeta_B) to (rho,theta,zeta)), and adds a new function to compute ``sqrt(g)_Boozer`` which is the jacobian from (rho,theta_B,zeta_B) to (R,phi,Z).
 - Allows specification of Nyquist spectrum maximum modenumbers when using ``VMECIO.save`` to save a DESC .h5 file as a VMEC-format wout file
 - Adds a new objective ``desc.objectives.ExternalObjective`` for wrapping external codes with finite differences.

diff --git a/desc/__init__.py b/desc/__init__.py
@@ -2,10 +2,13 @@
 
 import importlib
 import os
+import platform
 import re
+import subprocess
 import warnings
 
 import colorama
+import psutil
 from termcolor import colored
 
 from ._version import get_versions
@@ -58,36 +61,111 @@ def __getattr__(name):
 BANNER = colored(_BANNER, "magenta")
 
 
-config = {"device": None, "avail_mem": None, "kind": None}
+config = {"devices": None, "avail_mem": None, "kind": None, "num_device": None}
 
 
-def set_device(kind="cpu", gpuid=None):
+def _get_processor_name():
+    """Get the processor name of the current system."""
+    if platform.system() == "Windows":
+        return platform.processor()
+    elif platform.system() == "Darwin":
+        os.environ["PATH"] = os.environ["PATH"] + os.pathsep + "/usr/sbin"
+        command = "sysctl -n machdep.cpu.brand_string"
+        return subprocess.check_output(command).strip()
+    elif platform.system() == "Linux":
+        command = "cat /proc/cpuinfo"
+        all_info = subprocess.check_output(command, shell=True).decode().strip()
+        for line in all_info.split("\n"):
+            if "model name" in line:
+                return re.sub(".*model name.*:", "", line, 1)
+    return ""
+
+
+def _set_cpu_count(n):
+    """Set the number of CPUs visible to JAX.
+
+    By default, JAX sees the whole CPU as a single device, regardless of the number of
+    cores or threads. It then uses multiple cores and threads for lower level
+    parallelism within individual operations.
+
+    Alternatively, you can force JAX to expose a given number of "virtual" CPUs that
+    can then be used manually for higher level parallelism (as in at the level of
+    multiple objective functions.)
+
+    This function is mainly for testing on CI purposes of the parallelism in DESC.
+
+    Parameters
+    ----------
+    n : int
+        Number of virtual CPUs for high level parallelism.
+
+    Notes
+    -----
+    This function must be called before importing anything else from DESC or JAX,
+    and before calling ``desc.set_device``, otherwise it will have no effect.
+    """
+    xla_flags = os.getenv("XLA_FLAGS", "")
+    xla_flags = re.sub(
+        r"--xla_force_host_platform_device_count=\S+", "", xla_flags
+    ).split()
+    os.environ["XLA_FLAGS"] = " ".join(
+        [f"--xla_force_host_platform_device_count={n}"] + xla_flags
+    )
+
+
+def set_device(kind="cpu", gpuid=None, num_device=1):  # noqa: C901
     """Sets the device to use for computation.
 
     If kind==``'gpu'`` and a gpuid is specified, uses the specified GPU. If
     gpuid==``None`` or a wrong GPU id is given, checks available GPUs and selects the
     one with the most available memory.
     Respects environment variable CUDA_VISIBLE_DEVICES for selecting from multiple
-    available GPUs
+    available GPUs.
+
+    Notes
+    -----
+    This function must be called before importing anything else from DESC or JAX,
+    otherwise it will have no effect.
 
     Parameters
     ----------
     kind : {``'cpu'``, ``'gpu'``}
         whether to use CPU or GPU.
+    gpuid : int, optional
+        GPU id to use. Default is None. Supported only when num_device is 1.
+    num_device : int
+        number of devices to use. Default is 1.
 
     """
     config["kind"] = kind
+    config["num_device"] = num_device
+
+    cpu_mem = psutil.virtual_memory().available / 1024**3  # RAM in GB
+    cpu_info = _get_processor_name()
+    config["cpu_info"] = f"{cpu_info} CPU"
+    config["cpu_mem"] = cpu_mem
+
     if kind == "cpu":
         os.environ["JAX_PLATFORMS"] = "cpu"
         os.environ["CUDA_VISIBLE_DEVICES"] = ""
-        import psutil
-
-        cpu_mem = psutil.virtual_memory().available / 1024**3  # RAM in GB
-        config["device"] = "CPU"
-        config["avail_mem"] = cpu_mem
+        if num_device == 1:
+            config["devices"] = [f"{cpu_info} CPU"]
+            config["avail_mems"] = [cpu_mem]
+        else:
+            try:
+                import jax
+
+                jax_cpu = jax.devices("cpu")
+                assert len(jax_cpu) == num_device
+                config["devices"] = [f"{dev}" for dev in jax_cpu]
+                config["avail_mems"] = [cpu_mem for _ in range(num_device)]
+            except ModuleNotFoundError:
+                raise ValueError(
+                    "JAX not installed. Please install JAX to use multiple CPUs."
+                    "Alternatively, set num_device=1 to use a single CPU."
+                )
 
-    if kind == "gpu":
-        # Set CUDA_DEVICE_ORDER so the IDs assigned by CUDA match those from nvidia-smi
+    elif kind == "gpu":
         os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
         import nvgpu
 
@@ -100,55 +178,62 @@ def set_device(kind="cpu", gpuid=None):
             set_device(kind="cpu")
             return
 
-        maxmem = 0
-        selected_gpu = None
         gpu_ids = [dev["index"] for dev in devices]
         if "CUDA_VISIBLE_DEVICES" in os.environ:
             cuda_ids = [
                 s for s in re.findall(r"\b\d+\b", os.environ["CUDA_VISIBLE_DEVICES"])
             ]
-            # check that the visible devices actually exist and are gpus
             gpu_ids = [i for i in cuda_ids if i in gpu_ids]
         if len(gpu_ids) == 0:
-            # cuda visible devices = '' -> don't use any gpu
             warnings.warn(
                 colored(
-                    (
-                        "CUDA_VISIBLE_DEVICES={} ".format(
-                            os.environ["CUDA_VISIBLE_DEVICES"]
-                        )
-                        + "did not match any physical GPU "
-                        + "(id={}), falling back to CPU".format(
-                            [dev["index"] for dev in devices]
-                        )
-                    ),
+                    f"CUDA_VISIBLE_DEVICES={os.environ['CUDA_VISIBLE_DEVICES']} did "
+                    "not match any physical GPU "
+                    f"(id={[dev['index'] for dev in devices]}), falling back to CPU",
                     "yellow",
                 )
             )
             set_device(kind="cpu")
             return
+
         devices = [dev for dev in devices if dev["index"] in gpu_ids]
+        memories = {dev["index"]: dev["mem_total"] - dev["mem_used"] for dev in devices}
+
+        if num_device == 1:
+            if gpuid is not None:
+                if str(gpuid) in gpu_ids:
+                    selected_gpu = next(
+                        dev for dev in devices if dev["index"] == str(gpuid)
+                    )
+                else:
+                    warnings.warn(
+                        colored(
+                            f"Specified gpuid {gpuid} not found, selecting GPU with "
+                            "most memory",
+                            "yellow",
+                        )
+                    )
+            else:
+                selected_gpu = max(
+                    devices, key=lambda dev: dev["mem_total"] - dev["mem_used"]
+                )
+            devices = [selected_gpu]
 
-        if gpuid is not None and (str(gpuid) in gpu_ids):
-            selected_gpu = [dev for dev in devices if dev["index"] == str(gpuid)][0]
         else:
-            for dev in devices:
-                mem = dev["mem_total"] - dev["mem_used"]
-                if mem > maxmem:
-                    maxmem = mem
-                    selected_gpu = dev
-        config["device"] = selected_gpu["type"] + " (id={})".format(
-            selected_gpu["index"]
-        )
-        if gpuid is not None and not (str(gpuid) in gpu_ids):
-            warnings.warn(
-                colored(
-                    "Specified gpuid {} not found, falling back to ".format(str(gpuid))
-                    + config["device"],
-                    "yellow",
+            if num_device > len(devices):
+                raise ValueError(
+                    f"Requested {num_device} GPUs, but only {len(devices)} available"
                 )
-            )
-        config["avail_mem"] = (
-            selected_gpu["mem_total"] - selected_gpu["mem_used"]
-        ) / 1024  # in GB
-        os.environ["CUDA_VISIBLE_DEVICES"] = str(selected_gpu["index"])
+            if gpuid is not None:
+                # TODO: implement multiple GPU selection
+                raise ValueError("Cannot specify `gpuid` when requesting multiple GPUs")
+
+        config["avail_mems"] = [
+            memories[dev["index"]] / 1024 for dev in devices[:num_device]
+        ]  # in GB
+        config["devices"] = [
+            f"{dev['type']} (id={dev['index']})" for dev in devices[:num_device]
+        ]
+        os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(
+            str(dev["index"]) for dev in devices[:num_device]
+        )