From 4945700f63a79186b99b3090cb8b94e54b4aa77a Mon Sep 17 00:00:00 2001 From: Christine Simpson Date: Fri, 28 Apr 2023 16:27:45 -0500 Subject: [PATCH 01/20] first attempt to sketch out cpu affinity bindings --- balsam/platform/app_run/polaris.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/balsam/platform/app_run/polaris.py b/balsam/platform/app_run/polaris.py index 72834f39..4da45bfd 100644 --- a/balsam/platform/app_run/polaris.py +++ b/balsam/platform/app_run/polaris.py @@ -1,3 +1,5 @@ +import os + from .app_run import SubprocessAppRun @@ -9,6 +11,18 @@ class PolarisRun(SubprocessAppRun): def _build_cmdline(self) -> str: node_ids = [h for h in self._node_spec.hostnames] cpu_bind = self._launch_params.get("cpu_bind", "none") + if ( + cpu_bind == "none" + and self._gpus_per_rank > 0 + and self.get_num_ranks() == 8 + and self.get_cpus_per_rank == 1 + ): + gpu_device = int(os.getenv("CUDA_VISIBLE_DEVICES")) + cpu_bind_list = ["list"] + start_cpu = 32 - 8 * (1 + gpu_device) + for i in range(8): + cpu_bind_list.append(":" + str(start_cpu + i)) + cpu_bind = "".join(cpu_bind_list) nid_str = ",".join(map(str, node_ids)) args = [ "mpiexec", From 5c85221db688af073946807a6a95324e67adf07e Mon Sep 17 00:00:00 2001 From: Christine Simpson Date: Fri, 19 May 2023 18:09:04 -0500 Subject: [PATCH 02/20] updates to polaris app run --- balsam/platform/app_run/polaris.py | 56 +++++++++++++++---- .../compute_node/alcf_polaris_node.py | 2 + 2 files changed, 47 insertions(+), 11 deletions(-) diff --git a/balsam/platform/app_run/polaris.py b/balsam/platform/app_run/polaris.py index 4da45bfd..8eb806c0 100644 --- a/balsam/platform/app_run/polaris.py +++ b/balsam/platform/app_run/polaris.py @@ -1,7 +1,10 @@ +import logging import os from .app_run import SubprocessAppRun +logger = logging.getLogger(__name__) + class PolarisRun(SubprocessAppRun): """ @@ -10,19 +13,35 @@ class PolarisRun(SubprocessAppRun): def _build_cmdline(self) -> str: node_ids = [h for h in self._node_spec.hostnames] + cpu_bind = self._launch_params.get("cpu_bind", "none") - if ( - cpu_bind == "none" - and self._gpus_per_rank > 0 - and self.get_num_ranks() == 8 - and self.get_cpus_per_rank == 1 - ): - gpu_device = int(os.getenv("CUDA_VISIBLE_DEVICES")) - cpu_bind_list = ["list"] - start_cpu = 32 - 8 * (1 + gpu_device) - for i in range(8): - cpu_bind_list.append(":" + str(start_cpu + i)) + if cpu_bind == "none" and self._gpus_per_rank > 0: + gpu_device = self._envs["CUDA_VISIBLE_DEVICES"] + gpu_ids = gpu_device.split(",") + gpu_ids.reverse() + cpu_ids = self._node_spec.cpu_ids[0] + + cpu_bind_list = ["verbose,list"] + for gid in gpu_ids: + start_cpu = 32 - int(gid) * 8 - self.get_cpus_per_rank() + cpu_bind_list.append(":") + for icpu in range(self.get_cpus_per_rank()): + if icpu > 0: + cpu_bind_list.append(",") + cpu_bind_list.append(str(start_cpu + icpu)) + + # start_cpu = 32 - 8 * (1 + gpu_device) + # for i in range(8): + # cpu_bind_list.append(":" + str(start_cpu + i)) cpu_bind = "".join(cpu_bind_list) + logger.info(f"Polaris app_run: cpu_bind={cpu_bind} cpu_ids={cpu_ids} gpu_ids={gpu_ids}") + + launch_params = [] + for k in self._launch_params.keys(): + if k != "cpu_bind": + launch_params.append("--" + k) + launch_params.append(str(self._launch_params[k])) + nid_str = ",".join(map(str, node_ids)) args = [ "mpiexec", @@ -36,6 +55,21 @@ def _build_cmdline(self) -> str: cpu_bind, "-d", self._threads_per_rank, + *launch_params, self._cmdline, ] return " ".join(str(arg) for arg in args) + + # Overide default because sunspot does not use CUDA + def _set_envs(self) -> None: + + envs = os.environ.copy() + envs.update(self._envs) + # Check the assigned GPU ID list from the first compute node: + gpu_ids = self._node_spec.gpu_ids[0] + + if gpu_ids: + envs["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" + envs["CUDA_VISIBLE_DEVICES"] = ",".join(map(str, gpu_ids)) + envs["OMP_NUM_THREADS"] = str(self._threads_per_rank) + self._envs = envs diff --git a/balsam/platform/compute_node/alcf_polaris_node.py b/balsam/platform/compute_node/alcf_polaris_node.py index b5283c3b..af5925fb 100644 --- a/balsam/platform/compute_node/alcf_polaris_node.py +++ b/balsam/platform/compute_node/alcf_polaris_node.py @@ -12,6 +12,8 @@ class PolarisNode(ComputeNode): # turam: confirm number of cpus cpu_ids = list(range(64)) + # cms21: recommended cpu affinity for polaris nodes is in reverse order to gpu ids + cpu_ids.reverse() gpu_ids: List[IntStr] = list(range(4)) @classmethod From 9f1b1ca717bb400c22e9d32c73cd39e2f7a96d8d Mon Sep 17 00:00:00 2001 From: Christine Simpson Date: Fri, 19 May 2023 18:27:51 -0500 Subject: [PATCH 03/20] updates to polaris app run --- balsam/platform/app_run/polaris.py | 1 - 1 file changed, 1 deletion(-) diff --git a/balsam/platform/app_run/polaris.py b/balsam/platform/app_run/polaris.py index 8eb806c0..6b5afdec 100644 --- a/balsam/platform/app_run/polaris.py +++ b/balsam/platform/app_run/polaris.py @@ -18,7 +18,6 @@ def _build_cmdline(self) -> str: if cpu_bind == "none" and self._gpus_per_rank > 0: gpu_device = self._envs["CUDA_VISIBLE_DEVICES"] gpu_ids = gpu_device.split(",") - gpu_ids.reverse() cpu_ids = self._node_spec.cpu_ids[0] cpu_bind_list = ["verbose,list"] From c811f374163ec4c6e164a9fe0c6e670790712e68 Mon Sep 17 00:00:00 2001 From: Christine Simpson Date: Wed, 24 May 2023 18:49:37 -0500 Subject: [PATCH 04/20] attempt to fix cpu affinity in Polaris app_run --- balsam/platform/app_run/polaris.py | 44 ++++++++++++------- .../compute_node/alcf_polaris_node.py | 9 ++-- 2 files changed, 34 insertions(+), 19 deletions(-) diff --git a/balsam/platform/app_run/polaris.py b/balsam/platform/app_run/polaris.py index 6b5afdec..f04b6277 100644 --- a/balsam/platform/app_run/polaris.py +++ b/balsam/platform/app_run/polaris.py @@ -1,6 +1,8 @@ import logging import os +from balsam.platform.compute_node.alcf_polaris_node import PolarisNode + from .app_run import SubprocessAppRun logger = logging.getLogger(__name__) @@ -14,26 +16,31 @@ class PolarisRun(SubprocessAppRun): def _build_cmdline(self) -> str: node_ids = [h for h in self._node_spec.hostnames] + # cms21: currently this is broken for multinode jobs + cpu_bind = self._launch_params.get("cpu_bind", "none") if cpu_bind == "none" and self._gpus_per_rank > 0: - gpu_device = self._envs["CUDA_VISIBLE_DEVICES"] - gpu_ids = gpu_device.split(",") - cpu_ids = self._node_spec.cpu_ids[0] + polaris_node = PolarisNode() + # gpu_device = self._envs["CUDA_VISIBLE_DEVICES"] + # gpu_ids = gpu_device.split(",") + # cpu_ids = self._node_spec.cpu_ids[0] + cpu_ids = polaris_node.cpu_ids + gpu_ids = polaris_node.gpu_ids + cpus_per_rank = self.get_cpus_per_rank() + cpu_ids_ns = self._node_spec.cpu_ids cpu_bind_list = ["verbose,list"] - for gid in gpu_ids: - start_cpu = 32 - int(gid) * 8 - self.get_cpus_per_rank() + for irank in range(self._ranks_per_node): cpu_bind_list.append(":") - for icpu in range(self.get_cpus_per_rank()): - if icpu > 0: + for i in range(cpus_per_rank): + if i > 0: cpu_bind_list.append(",") - cpu_bind_list.append(str(start_cpu + icpu)) - - # start_cpu = 32 - 8 * (1 + gpu_device) - # for i in range(8): - # cpu_bind_list.append(":" + str(start_cpu + i)) + cid = str(cpu_ids[i + cpus_per_rank * irank]) + cpu_bind_list.append(cid) cpu_bind = "".join(cpu_bind_list) - logger.info(f"Polaris app_run: cpu_bind={cpu_bind} cpu_ids={cpu_ids} gpu_ids={gpu_ids}") + logger.info( + f"Polaris app_run: cpu_bind={cpu_bind} cpu_ids={cpu_ids} cpu_ids_ns={cpu_ids_ns} gpu_ids={gpu_ids}" + ) launch_params = [] for k in self._launch_params.keys(): @@ -65,9 +72,16 @@ def _set_envs(self) -> None: envs = os.environ.copy() envs.update(self._envs) # Check the assigned GPU ID list from the first compute node: - gpu_ids = self._node_spec.gpu_ids[0] + gpu_ids = self._node_spec.gpu_ids + cpu_ids = self._node_spec.cpu_ids + logger.info(f"Polaris set_envs: gpu_ids={gpu_ids} cpu_ids={cpu_ids}") + if gpu_ids[0] and len(self._node_spec.node_ids) == 1: + envs["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" + envs["CUDA_VISIBLE_DEVICES"] = ",".join(map(str, gpu_ids)) + if not gpu_ids[0] and len(self._node_spec.node_ids) > 1 and self._gpus_per_rank > 0: + polaris_node = PolarisNode() + gpu_ids = polaris_node.gpu_ids - if gpu_ids: envs["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" envs["CUDA_VISIBLE_DEVICES"] = ",".join(map(str, gpu_ids)) envs["OMP_NUM_THREADS"] = str(self._threads_per_rank) diff --git a/balsam/platform/compute_node/alcf_polaris_node.py b/balsam/platform/compute_node/alcf_polaris_node.py index af5925fb..c2788e6d 100644 --- a/balsam/platform/compute_node/alcf_polaris_node.py +++ b/balsam/platform/compute_node/alcf_polaris_node.py @@ -10,12 +10,13 @@ class PolarisNode(ComputeNode): - # turam: confirm number of cpus - cpu_ids = list(range(64)) - # cms21: recommended cpu affinity for polaris nodes is in reverse order to gpu ids - cpu_ids.reverse() + + cpu_ids = list(range(32)) gpu_ids: List[IntStr] = list(range(4)) + # cms21: optimal gpu/cpu binding on Polaris nodes goes in reverse order + gpu_ids.reverse() + @classmethod def get_job_nodelist(cls) -> List["PolarisNode"]: """ From 0936c686177f1c67c0e1980da60ab3b507bd604e Mon Sep 17 00:00:00 2001 From: Christine Simpson Date: Thu, 25 May 2023 03:16:41 -0500 Subject: [PATCH 05/20] added polaris gpu affinity script --- balsam/platform/app_run/app_run.py | 11 ++++ balsam/platform/app_run/polaris.py | 84 +++++++++++++++++++++++++----- 2 files changed, 81 insertions(+), 14 deletions(-) diff --git a/balsam/platform/app_run/app_run.py b/balsam/platform/app_run/app_run.py index ff9f2cf7..5d5af973 100644 --- a/balsam/platform/app_run/app_run.py +++ b/balsam/platform/app_run/app_run.py @@ -8,6 +8,7 @@ import psutil # type: ignore +from balsam.platform.compute_node import ComputeNode from balsam.site.launcher import NodeSpec logger = logging.getLogger(__name__) @@ -72,6 +73,16 @@ def get_cpus_per_rank(self) -> int: cpu_per_rank = max(1, int(self._threads_per_rank // self._threads_per_core)) return cpu_per_rank + def get_gpus_per_node_for_job(self) -> int: + gpus_per_node = self._gpus_per_rank * self._ranks_per_node + compute_node = ComputeNode(self._node_spec.node_ids[0], self._node_spec.hostnames[0]) + total_gpus_per_node = len(compute_node.gpu_ids) + if gpus_per_node > total_gpus_per_node: + logger.warning( + f"You have too many gpus per node! Physical gpus={total_gpus_per_node} gpus_per_rank={self._gpus_per_rank} ranks_per_node={self._ranks_per_node}" + ) + return min(gpus_per_node, total_gpus_per_node) + @abstractmethod def start(self) -> None: pass diff --git a/balsam/platform/app_run/polaris.py b/balsam/platform/app_run/polaris.py index f04b6277..1a0bfae7 100644 --- a/balsam/platform/app_run/polaris.py +++ b/balsam/platform/app_run/polaris.py @@ -1,5 +1,6 @@ import logging import os +import stat from balsam.platform.compute_node.alcf_polaris_node import PolarisNode @@ -19,13 +20,66 @@ def _build_cmdline(self) -> str: # cms21: currently this is broken for multinode jobs cpu_bind = self._launch_params.get("cpu_bind", "none") + gpu_affinity_script = "" if cpu_bind == "none" and self._gpus_per_rank > 0: - polaris_node = PolarisNode() - # gpu_device = self._envs["CUDA_VISIBLE_DEVICES"] - # gpu_ids = gpu_device.split(",") - # cpu_ids = self._node_spec.cpu_ids[0] - cpu_ids = polaris_node.cpu_ids - gpu_ids = polaris_node.gpu_ids + if len(self._node_spec.node_ids) == 1 or self._ranks_per_node == 1: + cpu_ids = self._node_spec.cpu_ids[0] + gpu_ids = self._node_spec.gpu_ids[0] + else: + gpu_ids = self._envs["CUDA_VISIBLE_DEVICES"].split( + "," + ) # These should be distributed across local ranks + polaris_node = PolarisNode(self._node_spec.node_ids[0], self._node_spec.hostnames[0]) + cpu_ids = polaris_node.cpu_ids + node_gpu_ids = polaris_node.gpu_ids + gpu_affinity_script = self._cwd.joinpath("set_affinity_gpu_polaris.sh") + with open(gpu_affinity_script, "w") as f: + f.write( + f"""#!/bin/bash -l + gpu_ids=( "{" ".join(gpu_ids)}" ) + num_gpus={len(node_gpu_ids)} + gpus_per_rank={self._gpus_per_rank} + ngpu=0 + gpu_string=""\n + """ + ) + f.write( + """while [ $ngpu -lt $gpus_per_rank ] + do + igpu=$(((${PMI_LOCAL_RANK} * ${gpus_per_rank}) + ${ngpu} % ${num_gpus})) + gpu=${gpu_ids[$igpu]} + ##gpu=$((${num_gpus} - 1 - ${ngpu} - (${PMI_LOCAL_RANK} * ${gpus_per_rank}) % ${num_gpus})) + sep="" + if [ $ngpu -gt 0 ] + then + sep="," + fi + gpu_string=$gpu_string$sep$gpu + ngpu=$((${igpu} + 1)) + done + export CUDA_VISIBLE_DEVICES=$gpu_string + echo “RANK= ${PMI_RANK} LOCAL_RANK= ${PMI_LOCAL_RANK} gpu= $gpu_string” + exec "$@" + """ + ) + st = os.stat(gpu_affinity_script) + os.chmod(gpu_affinity_script, st.st_mode | stat.S_IEXEC) + + # gpu_ids = polaris_node.gpu_ids + # num_gpus = len(gpu_ids) + # gpu_affinity_script = self._cwd.joinpath("set_affinity_gpu_polaris.sh") + # with open(gpu_affinity_script,"w") as f: + # f.write(f"""#!/bin/bash -l + # num_gpus={num_gpus} + # gpus_per_rank={self._gpus_per_rank}\n"""+ + # """gpu=$((${num_gpus} - 1 - ${PMI_LOCAL_RANK} % ${num_gpus}))\n + # export CUDA_VISIBLE_DEVICES=$gpu\n + # echo “RANK= ${PMI_RANK} LOCAL_RANK= ${PMI_LOCAL_RANK} gpu= ${gpu}”\n + # exec "$@"\n + # """) + # st = os.stat(gpu_affinity_script) + # os.chmod(gpu_affinity_script, st.st_mode | stat.S_IEXEC) + cpus_per_rank = self.get_cpus_per_rank() cpu_ids_ns = self._node_spec.cpu_ids @@ -62,6 +116,7 @@ def _build_cmdline(self) -> str: "-d", self._threads_per_rank, *launch_params, + gpu_affinity_script, self._cmdline, ] return " ".join(str(arg) for arg in args) @@ -72,17 +127,18 @@ def _set_envs(self) -> None: envs = os.environ.copy() envs.update(self._envs) # Check the assigned GPU ID list from the first compute node: - gpu_ids = self._node_spec.gpu_ids - cpu_ids = self._node_spec.cpu_ids + gpu_ids = self._node_spec.gpu_ids[0] + cpu_ids = self._node_spec.cpu_ids[0] logger.info(f"Polaris set_envs: gpu_ids={gpu_ids} cpu_ids={cpu_ids}") - if gpu_ids[0] and len(self._node_spec.node_ids) == 1: + if gpu_ids: envs["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" envs["CUDA_VISIBLE_DEVICES"] = ",".join(map(str, gpu_ids)) - if not gpu_ids[0] and len(self._node_spec.node_ids) > 1 and self._gpus_per_rank > 0: - polaris_node = PolarisNode() - gpu_ids = polaris_node.gpu_ids + else: + polaris_node = PolarisNode(self._node_spec.node_ids[0], self._node_spec.hostnames[0]) + if self._gpus_per_rank > 0: + gpu_ids = polaris_node.gpu_ids[0 : self.get_gpus_per_node_for_job()] + envs["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" + envs["CUDA_VISIBLE_DEVICES"] = ",".join(map(str, gpu_ids)) - envs["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" - envs["CUDA_VISIBLE_DEVICES"] = ",".join(map(str, gpu_ids)) envs["OMP_NUM_THREADS"] = str(self._threads_per_rank) self._envs = envs From 08e09766a6fa1ee912e209b76f56e36479ac9424 Mon Sep 17 00:00:00 2001 From: Christine Simpson Date: Thu, 25 May 2023 10:20:40 -0500 Subject: [PATCH 06/20] fixes to the affinity script --- balsam/platform/app_run/polaris.py | 22 +++++----------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/balsam/platform/app_run/polaris.py b/balsam/platform/app_run/polaris.py index 1a0bfae7..c30f4536 100644 --- a/balsam/platform/app_run/polaris.py +++ b/balsam/platform/app_run/polaris.py @@ -22,7 +22,7 @@ def _build_cmdline(self) -> str: cpu_bind = self._launch_params.get("cpu_bind", "none") gpu_affinity_script = "" if cpu_bind == "none" and self._gpus_per_rank > 0: - if len(self._node_spec.node_ids) == 1 or self._ranks_per_node == 1: + if len(self._node_spec.node_ids) == 1: cpu_ids = self._node_spec.cpu_ids[0] gpu_ids = self._node_spec.gpu_ids[0] else: @@ -31,12 +31,15 @@ def _build_cmdline(self) -> str: ) # These should be distributed across local ranks polaris_node = PolarisNode(self._node_spec.node_ids[0], self._node_spec.hostnames[0]) cpu_ids = polaris_node.cpu_ids + + if len(self._node_spec.node_ids) > 1 or self._ranks_per_node > 1: + polaris_node = PolarisNode(self._node_spec.node_ids[0], self._node_spec.hostnames[0]) node_gpu_ids = polaris_node.gpu_ids gpu_affinity_script = self._cwd.joinpath("set_affinity_gpu_polaris.sh") with open(gpu_affinity_script, "w") as f: f.write( f"""#!/bin/bash -l - gpu_ids=( "{" ".join(gpu_ids)}" ) + gpu_ids=( {" ".join(gpu_ids)} ) num_gpus={len(node_gpu_ids)} gpus_per_rank={self._gpus_per_rank} ngpu=0 @@ -65,21 +68,6 @@ def _build_cmdline(self) -> str: st = os.stat(gpu_affinity_script) os.chmod(gpu_affinity_script, st.st_mode | stat.S_IEXEC) - # gpu_ids = polaris_node.gpu_ids - # num_gpus = len(gpu_ids) - # gpu_affinity_script = self._cwd.joinpath("set_affinity_gpu_polaris.sh") - # with open(gpu_affinity_script,"w") as f: - # f.write(f"""#!/bin/bash -l - # num_gpus={num_gpus} - # gpus_per_rank={self._gpus_per_rank}\n"""+ - # """gpu=$((${num_gpus} - 1 - ${PMI_LOCAL_RANK} % ${num_gpus}))\n - # export CUDA_VISIBLE_DEVICES=$gpu\n - # echo “RANK= ${PMI_RANK} LOCAL_RANK= ${PMI_LOCAL_RANK} gpu= ${gpu}”\n - # exec "$@"\n - # """) - # st = os.stat(gpu_affinity_script) - # os.chmod(gpu_affinity_script, st.st_mode | stat.S_IEXEC) - cpus_per_rank = self.get_cpus_per_rank() cpu_ids_ns = self._node_spec.cpu_ids From e61c12cbe79afad2e8ebbcb7aa492ab558aeb329 Mon Sep 17 00:00:00 2001 From: Christine Simpson Date: Thu, 25 May 2023 10:49:10 -0500 Subject: [PATCH 07/20] some style changes --- balsam/platform/app_run/polaris.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/balsam/platform/app_run/polaris.py b/balsam/platform/app_run/polaris.py index c30f4536..0f258033 100644 --- a/balsam/platform/app_run/polaris.py +++ b/balsam/platform/app_run/polaris.py @@ -2,7 +2,7 @@ import os import stat -from balsam.platform.compute_node.alcf_polaris_node import PolarisNode +from balsam.platform.compute_node import PolarisNode from .app_run import SubprocessAppRun @@ -17,8 +17,6 @@ class PolarisRun(SubprocessAppRun): def _build_cmdline(self) -> str: node_ids = [h for h in self._node_spec.hostnames] - # cms21: currently this is broken for multinode jobs - cpu_bind = self._launch_params.get("cpu_bind", "none") gpu_affinity_script = "" if cpu_bind == "none" and self._gpus_per_rank > 0: From 7bcbc52c353a06977ee880c6cd02a69d21396519 Mon Sep 17 00:00:00 2001 From: Christine Simpson Date: Thu, 25 May 2023 15:39:26 -0500 Subject: [PATCH 08/20] reverting affinity script addition, put in different branch --- balsam/platform/app_run/polaris.py | 62 ++++-------------------------- 1 file changed, 8 insertions(+), 54 deletions(-) diff --git a/balsam/platform/app_run/polaris.py b/balsam/platform/app_run/polaris.py index 0f258033..05b506c1 100644 --- a/balsam/platform/app_run/polaris.py +++ b/balsam/platform/app_run/polaris.py @@ -1,6 +1,5 @@ import logging import os -import stat from balsam.platform.compute_node import PolarisNode @@ -18,56 +17,15 @@ def _build_cmdline(self) -> str: node_ids = [h for h in self._node_spec.hostnames] cpu_bind = self._launch_params.get("cpu_bind", "none") - gpu_affinity_script = "" - if cpu_bind == "none" and self._gpus_per_rank > 0: + if cpu_bind == "none" and self._gpus_per_rank > 0 and self._ranks_per_node == 1: + gpu_ids = self._envs["CUDA_VISIBLE_DEVICES"].split(",") if len(self._node_spec.node_ids) == 1: cpu_ids = self._node_spec.cpu_ids[0] - gpu_ids = self._node_spec.gpu_ids[0] else: - gpu_ids = self._envs["CUDA_VISIBLE_DEVICES"].split( - "," - ) # These should be distributed across local ranks polaris_node = PolarisNode(self._node_spec.node_ids[0], self._node_spec.hostnames[0]) cpu_ids = polaris_node.cpu_ids - if len(self._node_spec.node_ids) > 1 or self._ranks_per_node > 1: - polaris_node = PolarisNode(self._node_spec.node_ids[0], self._node_spec.hostnames[0]) - node_gpu_ids = polaris_node.gpu_ids - gpu_affinity_script = self._cwd.joinpath("set_affinity_gpu_polaris.sh") - with open(gpu_affinity_script, "w") as f: - f.write( - f"""#!/bin/bash -l - gpu_ids=( {" ".join(gpu_ids)} ) - num_gpus={len(node_gpu_ids)} - gpus_per_rank={self._gpus_per_rank} - ngpu=0 - gpu_string=""\n - """ - ) - f.write( - """while [ $ngpu -lt $gpus_per_rank ] - do - igpu=$(((${PMI_LOCAL_RANK} * ${gpus_per_rank}) + ${ngpu} % ${num_gpus})) - gpu=${gpu_ids[$igpu]} - ##gpu=$((${num_gpus} - 1 - ${ngpu} - (${PMI_LOCAL_RANK} * ${gpus_per_rank}) % ${num_gpus})) - sep="" - if [ $ngpu -gt 0 ] - then - sep="," - fi - gpu_string=$gpu_string$sep$gpu - ngpu=$((${igpu} + 1)) - done - export CUDA_VISIBLE_DEVICES=$gpu_string - echo “RANK= ${PMI_RANK} LOCAL_RANK= ${PMI_LOCAL_RANK} gpu= $gpu_string” - exec "$@" - """ - ) - st = os.stat(gpu_affinity_script) - os.chmod(gpu_affinity_script, st.st_mode | stat.S_IEXEC) - cpus_per_rank = self.get_cpus_per_rank() - cpu_ids_ns = self._node_spec.cpu_ids cpu_bind_list = ["verbose,list"] for irank in range(self._ranks_per_node): @@ -78,9 +36,7 @@ def _build_cmdline(self) -> str: cid = str(cpu_ids[i + cpus_per_rank * irank]) cpu_bind_list.append(cid) cpu_bind = "".join(cpu_bind_list) - logger.info( - f"Polaris app_run: cpu_bind={cpu_bind} cpu_ids={cpu_ids} cpu_ids_ns={cpu_ids_ns} gpu_ids={gpu_ids}" - ) + logger.info(f"Polaris app_run: cpu_bind={cpu_bind} cpu_ids={cpu_ids} gpu_ids={gpu_ids}") launch_params = [] for k in self._launch_params.keys(): @@ -102,7 +58,6 @@ def _build_cmdline(self) -> str: "-d", self._threads_per_rank, *launch_params, - gpu_affinity_script, self._cmdline, ] return " ".join(str(arg) for arg in args) @@ -116,15 +71,14 @@ def _set_envs(self) -> None: gpu_ids = self._node_spec.gpu_ids[0] cpu_ids = self._node_spec.cpu_ids[0] logger.info(f"Polaris set_envs: gpu_ids={gpu_ids} cpu_ids={cpu_ids}") - if gpu_ids: + if gpu_ids and self._ranks_per_node == 1: envs["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" envs["CUDA_VISIBLE_DEVICES"] = ",".join(map(str, gpu_ids)) - else: + if not gpu_ids and self._ranks_per_node == 1 and self._gpus_per_rank > 0: polaris_node = PolarisNode(self._node_spec.node_ids[0], self._node_spec.hostnames[0]) - if self._gpus_per_rank > 0: - gpu_ids = polaris_node.gpu_ids[0 : self.get_gpus_per_node_for_job()] - envs["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" - envs["CUDA_VISIBLE_DEVICES"] = ",".join(map(str, gpu_ids)) + gpu_ids = polaris_node.gpu_ids[0 : self._gpus_per_rank] + envs["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" + envs["CUDA_VISIBLE_DEVICES"] = ",".join(map(str, gpu_ids)) envs["OMP_NUM_THREADS"] = str(self._threads_per_rank) self._envs = envs From b0973cf47b60852b84ef77ac5556087152554b0f Mon Sep 17 00:00:00 2001 From: Christine Simpson Date: Fri, 26 May 2023 10:39:22 -0500 Subject: [PATCH 09/20] removed helper function --- balsam/platform/app_run/app_run.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/balsam/platform/app_run/app_run.py b/balsam/platform/app_run/app_run.py index 5d5af973..ff9f2cf7 100644 --- a/balsam/platform/app_run/app_run.py +++ b/balsam/platform/app_run/app_run.py @@ -8,7 +8,6 @@ import psutil # type: ignore -from balsam.platform.compute_node import ComputeNode from balsam.site.launcher import NodeSpec logger = logging.getLogger(__name__) @@ -73,16 +72,6 @@ def get_cpus_per_rank(self) -> int: cpu_per_rank = max(1, int(self._threads_per_rank // self._threads_per_core)) return cpu_per_rank - def get_gpus_per_node_for_job(self) -> int: - gpus_per_node = self._gpus_per_rank * self._ranks_per_node - compute_node = ComputeNode(self._node_spec.node_ids[0], self._node_spec.hostnames[0]) - total_gpus_per_node = len(compute_node.gpu_ids) - if gpus_per_node > total_gpus_per_node: - logger.warning( - f"You have too many gpus per node! Physical gpus={total_gpus_per_node} gpus_per_rank={self._gpus_per_rank} ranks_per_node={self._ranks_per_node}" - ) - return min(gpus_per_node, total_gpus_per_node) - @abstractmethod def start(self) -> None: pass From 77f8941307b102c088ccda0f1bc2a65d5f24ce0b Mon Sep 17 00:00:00 2001 From: Christine Simpson Date: Fri, 26 May 2023 13:54:22 -0500 Subject: [PATCH 10/20] Updates to polaris cmdline implementation after dev discussion; includes notes --- balsam/platform/app_run/polaris.py | 47 +++++++++++++++++++++++------- 1 file changed, 36 insertions(+), 11 deletions(-) diff --git a/balsam/platform/app_run/polaris.py b/balsam/platform/app_run/polaris.py index 05b506c1..6ca5b397 100644 --- a/balsam/platform/app_run/polaris.py +++ b/balsam/platform/app_run/polaris.py @@ -17,9 +17,20 @@ def _build_cmdline(self) -> str: node_ids = [h for h in self._node_spec.hostnames] cpu_bind = self._launch_params.get("cpu_bind", "none") - if cpu_bind == "none" and self._gpus_per_rank > 0 and self._ranks_per_node == 1: - gpu_ids = self._envs["CUDA_VISIBLE_DEVICES"].split(",") - if len(self._node_spec.node_ids) == 1: + + # If the user does not set a cpu_bind option and gpus are being used, + # this code sets cpu-bind to be optimal for the gpus being used. + # This does not handle the case where the application is using less than + # 8 cpus per gpu. This code will not skip the appropriate number of cpus + # in the rank binding assignments. + if cpu_bind == "none" and self._gpus_per_rank > 0: + + # Here we grab the cpu_ids assigned to the job in the NodeSpec object + # If this is not set in NodeSpec (it is only set for single node jobs), + # then we take the cpu_id list from the Polaris ComputeNode subclass, + # assuming the job will have use of all the cpus in nodes assigned to it. + cpu_ids_ns = self._node_spec.cpu_ids[0] + if cpu_ids_ns: cpu_ids = self._node_spec.cpu_ids[0] else: polaris_node = PolarisNode(self._node_spec.node_ids[0], self._node_spec.hostnames[0]) @@ -27,6 +38,8 @@ def _build_cmdline(self) -> str: cpus_per_rank = self.get_cpus_per_rank() + # PolarisNode reverses the order of the gpu_ids, so assigning the cpu-bind + # in ascending cpu order is what we want here. cpu_bind_list = ["verbose,list"] for irank in range(self._ranks_per_node): cpu_bind_list.append(":") @@ -36,6 +49,8 @@ def _build_cmdline(self) -> str: cid = str(cpu_ids[i + cpus_per_rank * irank]) cpu_bind_list.append(cid) cpu_bind = "".join(cpu_bind_list) + gpu_device = self._envs["CUDA_VISIBLE_DEVICES"] + gpu_ids = gpu_device.split(",") logger.info(f"Polaris app_run: cpu_bind={cpu_bind} cpu_ids={cpu_ids} gpu_ids={gpu_ids}") launch_params = [] @@ -67,18 +82,28 @@ def _set_envs(self) -> None: envs = os.environ.copy() envs.update(self._envs) - # Check the assigned GPU ID list from the first compute node: + + # Here we grab the gpus assigned to the job from NodeSpec. NodeSpec only + # sets this for single node jobs. For multinode jobs, gpu_ids below will + # be an empty list of lists (e.g. [[], []]). The ordering of the gpu_ids + # is reversed in PolarisNode and therefore the reverse ordering of + # cpus to gpus should be reflected here gpu_ids = self._node_spec.gpu_ids[0] cpu_ids = self._node_spec.cpu_ids[0] logger.info(f"Polaris set_envs: gpu_ids={gpu_ids} cpu_ids={cpu_ids}") - if gpu_ids and self._ranks_per_node == 1: - envs["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" - envs["CUDA_VISIBLE_DEVICES"] = ",".join(map(str, gpu_ids)) - if not gpu_ids and self._ranks_per_node == 1 and self._gpus_per_rank > 0: - polaris_node = PolarisNode(self._node_spec.node_ids[0], self._node_spec.hostnames[0]) - gpu_ids = polaris_node.gpu_ids[0 : self._gpus_per_rank] + + # Here we set CUDA_VISIBLE_DEVICES for single node jobs only. We assume + # for multinode jobs that the job has access to all gpus, and + # CUDA_VISIBLE_DEVICES is set by the user, for example by local rank with an + # gpu_affinity.sh script that wraps around the user application in the + # ApplicationDefinition. + # One special case: if your job has one node, 2 ranks, and 1 gpu per rank, the + # code here will set CUDA_VISIBLE_DEVICES to "3,2" or "1,0". A user provided + # gpu_affinity.sh script should take this assigment and use it to reset + # CUDA_VISIBLE_DEVICES for each local rank. The user script should NOT + # round-robin the setting CUDA_VISIBLE_DEVICES starting from 3. + if gpu_ids: envs["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" envs["CUDA_VISIBLE_DEVICES"] = ",".join(map(str, gpu_ids)) - envs["OMP_NUM_THREADS"] = str(self._threads_per_rank) self._envs = envs From 2efaa8ed82c87487188a1dac01ee0d08aafb5451 Mon Sep 17 00:00:00 2001 From: Christine Simpson Date: Fri, 26 May 2023 17:23:17 -0500 Subject: [PATCH 11/20] remove turam path from polaris job-template.sh --- balsam/config/defaults/alcf_polaris/job-template.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/balsam/config/defaults/alcf_polaris/job-template.sh b/balsam/config/defaults/alcf_polaris/job-template.sh index 8dae69c2..dd090dee 100644 --- a/balsam/config/defaults/alcf_polaris/job-template.sh +++ b/balsam/config/defaults/alcf_polaris/job-template.sh @@ -8,8 +8,6 @@ export http_proxy="http://proxy:3128" export https_proxy="http://proxy:3128" -export PYTHONPATH=/home/turam/dev/polaris/balsam:$PYTHONPATH - #remove export PMI_NO_FORK=1 export BALSAM_SITE_PATH={{balsam_site_path}} cd $BALSAM_SITE_PATH From 1281a794650a3311bfb671e7656153ca60bfda10 Mon Sep 17 00:00:00 2001 From: Christine Simpson Date: Fri, 26 May 2023 17:24:04 -0500 Subject: [PATCH 12/20] more updates to polaris cmdline --- balsam/platform/app_run/polaris.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/balsam/platform/app_run/polaris.py b/balsam/platform/app_run/polaris.py index 6ca5b397..46062f2d 100644 --- a/balsam/platform/app_run/polaris.py +++ b/balsam/platform/app_run/polaris.py @@ -49,8 +49,11 @@ def _build_cmdline(self) -> str: cid = str(cpu_ids[i + cpus_per_rank * irank]) cpu_bind_list.append(cid) cpu_bind = "".join(cpu_bind_list) - gpu_device = self._envs["CUDA_VISIBLE_DEVICES"] - gpu_ids = gpu_device.split(",") + if "CUDA_VISIBLE_DEVICES" in self._envs.keys(): + gpu_device = self._envs["CUDA_VISIBLE_DEVICES"] + gpu_ids = gpu_device.split(",") + else: + gpu_ids = [] logger.info(f"Polaris app_run: cpu_bind={cpu_bind} cpu_ids={cpu_ids} gpu_ids={gpu_ids}") launch_params = [] @@ -71,7 +74,7 @@ def _build_cmdline(self) -> str: "--cpu-bind", cpu_bind, "-d", - self._threads_per_rank, + self.get_cpus_per_rank(), *launch_params, self._cmdline, ] From 1b64cdb798f7804b652782149d0b4d07b5449089 Mon Sep 17 00:00:00 2001 From: Christine Simpson Date: Wed, 31 May 2023 21:56:52 -0500 Subject: [PATCH 13/20] changes to make depth paramter for Polaris app_run consistent with docs --- balsam/platform/app_run/app_run.py | 23 +++++++++++++++++++---- balsam/platform/app_run/polaris.py | 18 +++++++++++++++++- 2 files changed, 36 insertions(+), 5 deletions(-) diff --git a/balsam/platform/app_run/app_run.py b/balsam/platform/app_run/app_run.py index ff9f2cf7..7713b974 100644 --- a/balsam/platform/app_run/app_run.py +++ b/balsam/platform/app_run/app_run.py @@ -8,6 +8,7 @@ import psutil # type: ignore +from balsam.platform.compute_node import ComputeNode from balsam.site.launcher import NodeSpec logger = logging.getLogger(__name__) @@ -67,10 +68,24 @@ def get_num_ranks(self) -> int: return self._ranks_per_node * len(self._node_spec.node_ids) def get_cpus_per_rank(self) -> int: - cpu_per_rank = len(self._node_spec.cpu_ids[0]) // self._ranks_per_node - if not cpu_per_rank: - cpu_per_rank = max(1, int(self._threads_per_rank // self._threads_per_core)) - return cpu_per_rank + + # Get the list of cpus assigned to the job. If it is a single node job, that is stored in + # the NodeSpec object. If it is a multinode job, the cpu_ids assigned to NodeSpec is empty, + # so we will assume all cpus on a compute node are available to the job. The list of cpus is + # just the list of cpus on the node in that case. + cpu_ids = self._node_spec.cpu_ids[0] + if not cpu_ids: + compute_node = ComputeNode(self._node_spec.node_ids[0], self._node_spec.hostnames[0]) + cpu_ids = compute_node.cpu_ids + + cpus_per_node = len(cpu_ids) + cpus_per_rank = cpus_per_node // self._ranks_per_node + + # If ranks are oversubscribed to cpus (ranks_per_node > cpus_per_node), set it to a minimum of + # 1 cpu per rank or the number of cores per rank from the threading settings + if not cpus_per_rank: + cpus_per_rank = max(1, int(self._threads_per_rank // self._threads_per_core)) + return cpus_per_rank @abstractmethod def start(self) -> None: diff --git a/balsam/platform/app_run/polaris.py b/balsam/platform/app_run/polaris.py index 46062f2d..5f59cd78 100644 --- a/balsam/platform/app_run/polaris.py +++ b/balsam/platform/app_run/polaris.py @@ -32,6 +32,8 @@ def _build_cmdline(self) -> str: cpu_ids_ns = self._node_spec.cpu_ids[0] if cpu_ids_ns: cpu_ids = self._node_spec.cpu_ids[0] + if self._threads_per_core == 2: + polaris_node = PolarisNode(self._node_spec.node_ids[0], self._node_spec.hostnames[0]) else: polaris_node = PolarisNode(self._node_spec.node_ids[0], self._node_spec.hostnames[0]) cpu_ids = polaris_node.cpu_ids @@ -48,6 +50,13 @@ def _build_cmdline(self) -> str: cpu_bind_list.append(",") cid = str(cpu_ids[i + cpus_per_rank * irank]) cpu_bind_list.append(cid) + # If the job is using 2 hardware threads per core, we need to add those threads to the list + # The additional threads should go in the same ascending order (threads 0 and 32 are on the + # same physical core, threads 31 and 63 are on the same physical core) + if self._threads_per_core == 2: + cpu_bind_list.append(",") + cid = str(cpu_ids[i + cpus_per_rank * irank] + len(polaris_node.cpu_ids)) + cpu_bind_list.append(cid) cpu_bind = "".join(cpu_bind_list) if "CUDA_VISIBLE_DEVICES" in self._envs.keys(): gpu_device = self._envs["CUDA_VISIBLE_DEVICES"] @@ -62,6 +71,13 @@ def _build_cmdline(self) -> str: launch_params.append("--" + k) launch_params.append(str(self._launch_params[k])) + # The value of -d depends on the setting of cpu_bind. If cpu-bind=core, -d is the number of + # physical cores per rank, otherwise it is the number of hardware threads per rank + # https://docs.alcf.anl.gov/running-jobs/example-job-scripts/ + depth = self._threads_per_rank + if "core" in cpu_bind: + depth = self.get_cpus_per_rank() + nid_str = ",".join(map(str, node_ids)) args = [ "mpiexec", @@ -74,7 +90,7 @@ def _build_cmdline(self) -> str: "--cpu-bind", cpu_bind, "-d", - self.get_cpus_per_rank(), + depth, *launch_params, self._cmdline, ] From 937947ecfa7f483bb8ee789a6ba4c003f6bd7e28 Mon Sep 17 00:00:00 2001 From: Christine Simpson Date: Wed, 31 May 2023 22:41:17 -0500 Subject: [PATCH 14/20] Removed blank lines --- balsam/platform/app_run/app_run.py | 1 - balsam/platform/app_run/polaris.py | 2 -- balsam/platform/compute_node/alcf_polaris_node.py | 1 - 3 files changed, 4 deletions(-) diff --git a/balsam/platform/app_run/app_run.py b/balsam/platform/app_run/app_run.py index 7713b974..2aa06e39 100644 --- a/balsam/platform/app_run/app_run.py +++ b/balsam/platform/app_run/app_run.py @@ -68,7 +68,6 @@ def get_num_ranks(self) -> int: return self._ranks_per_node * len(self._node_spec.node_ids) def get_cpus_per_rank(self) -> int: - # Get the list of cpus assigned to the job. If it is a single node job, that is stored in # the NodeSpec object. If it is a multinode job, the cpu_ids assigned to NodeSpec is empty, # so we will assume all cpus on a compute node are available to the job. The list of cpus is diff --git a/balsam/platform/app_run/polaris.py b/balsam/platform/app_run/polaris.py index 5f59cd78..761878a5 100644 --- a/balsam/platform/app_run/polaris.py +++ b/balsam/platform/app_run/polaris.py @@ -24,7 +24,6 @@ def _build_cmdline(self) -> str: # 8 cpus per gpu. This code will not skip the appropriate number of cpus # in the rank binding assignments. if cpu_bind == "none" and self._gpus_per_rank > 0: - # Here we grab the cpu_ids assigned to the job in the NodeSpec object # If this is not set in NodeSpec (it is only set for single node jobs), # then we take the cpu_id list from the Polaris ComputeNode subclass, @@ -98,7 +97,6 @@ def _build_cmdline(self) -> str: # Overide default because sunspot does not use CUDA def _set_envs(self) -> None: - envs = os.environ.copy() envs.update(self._envs) diff --git a/balsam/platform/compute_node/alcf_polaris_node.py b/balsam/platform/compute_node/alcf_polaris_node.py index c2788e6d..208490a1 100644 --- a/balsam/platform/compute_node/alcf_polaris_node.py +++ b/balsam/platform/compute_node/alcf_polaris_node.py @@ -10,7 +10,6 @@ class PolarisNode(ComputeNode): - cpu_ids = list(range(32)) gpu_ids: List[IntStr] = list(range(4)) From 8d6f5f00f1fd2f14c703998e8955c7074640f478 Mon Sep 17 00:00:00 2001 From: Christine Simpson Date: Wed, 31 May 2023 22:51:30 -0500 Subject: [PATCH 15/20] lint fixes --- balsam/_api/model.py | 2 +- balsam/config/config.py | 11 ++++------- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/balsam/_api/model.py b/balsam/_api/model.py index 05d4831a..38ae2c48 100644 --- a/balsam/_api/model.py +++ b/balsam/_api/model.py @@ -186,7 +186,7 @@ def __repr__(self) -> str: def __str__(self) -> str: d = self.display_dict() - return yaml.dump(d, sort_keys=False, indent=4) # type: ignore + return yaml.dump(d, sort_keys=False, indent=4) def __eq__(self, other: Any) -> bool: if not isinstance(other, BalsamModel): diff --git a/balsam/config/config.py b/balsam/config/config.py index 5766afc5..00d95c69 100644 --- a/balsam/config/config.py +++ b/balsam/config/config.py @@ -235,13 +235,10 @@ def save(self, path: Union[str, Path]) -> None: fp.write(self.dump_yaml()) def dump_yaml(self) -> str: - return cast( - str, - yaml.dump( - json.loads(self.json()), - sort_keys=False, - indent=4, - ), + return yaml.dump( + json.loads(self.json()), + sort_keys=False, + indent=4, ) @classmethod From c57beb787ba78471dbf73950cb78434aaf0fc0a4 Mon Sep 17 00:00:00 2001 From: Christine Simpson Date: Thu, 1 Jun 2023 11:56:46 -0500 Subject: [PATCH 16/20] fix type error --- balsam/platform/app_run/app_run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/balsam/platform/app_run/app_run.py b/balsam/platform/app_run/app_run.py index 2aa06e39..c4efb45b 100644 --- a/balsam/platform/app_run/app_run.py +++ b/balsam/platform/app_run/app_run.py @@ -75,7 +75,7 @@ def get_cpus_per_rank(self) -> int: cpu_ids = self._node_spec.cpu_ids[0] if not cpu_ids: compute_node = ComputeNode(self._node_spec.node_ids[0], self._node_spec.hostnames[0]) - cpu_ids = compute_node.cpu_ids + cpu_ids = list(compute_node.cpu_ids) cpus_per_node = len(cpu_ids) cpus_per_rank = cpus_per_node // self._ranks_per_node From 0691ed3d88c2170c4f355808e9c311d95145aecf Mon Sep 17 00:00:00 2001 From: Christine Simpson Date: Thu, 1 Jun 2023 12:00:49 -0500 Subject: [PATCH 17/20] fix type error --- balsam/platform/app_run/app_run.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/balsam/platform/app_run/app_run.py b/balsam/platform/app_run/app_run.py index c4efb45b..3c25c4f6 100644 --- a/balsam/platform/app_run/app_run.py +++ b/balsam/platform/app_run/app_run.py @@ -73,11 +73,11 @@ def get_cpus_per_rank(self) -> int: # so we will assume all cpus on a compute node are available to the job. The list of cpus is # just the list of cpus on the node in that case. cpu_ids = self._node_spec.cpu_ids[0] + cpus_per_node = len(cpu_ids) if not cpu_ids: compute_node = ComputeNode(self._node_spec.node_ids[0], self._node_spec.hostnames[0]) - cpu_ids = list(compute_node.cpu_ids) + cpus_per_node = len(compute_node.cpu_ids) - cpus_per_node = len(cpu_ids) cpus_per_rank = cpus_per_node // self._ranks_per_node # If ranks are oversubscribed to cpus (ranks_per_node > cpus_per_node), set it to a minimum of From ad0e661b68a9bfd9f5c37ee537c75dac48043bc5 Mon Sep 17 00:00:00 2001 From: Christine Simpson Date: Tue, 13 Jun 2023 16:28:22 -0500 Subject: [PATCH 18/20] made change to accept a user setting cpu_bind to none --- balsam/platform/app_run/polaris.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/balsam/platform/app_run/polaris.py b/balsam/platform/app_run/polaris.py index 761878a5..d18efb12 100644 --- a/balsam/platform/app_run/polaris.py +++ b/balsam/platform/app_run/polaris.py @@ -16,14 +16,14 @@ class PolarisRun(SubprocessAppRun): def _build_cmdline(self) -> str: node_ids = [h for h in self._node_spec.hostnames] - cpu_bind = self._launch_params.get("cpu_bind", "none") - - # If the user does not set a cpu_bind option and gpus are being used, + # If the user does not set a cpu_bind option, # this code sets cpu-bind to be optimal for the gpus being used. # This does not handle the case where the application is using less than # 8 cpus per gpu. This code will not skip the appropriate number of cpus # in the rank binding assignments. - if cpu_bind == "none" and self._gpus_per_rank > 0: + if "cpu_bind" in self._launch_params.keys(): + cpu_bind = self._launch_params.get("cpu_bind", "none") + else: # Here we grab the cpu_ids assigned to the job in the NodeSpec object # If this is not set in NodeSpec (it is only set for single node jobs), # then we take the cpu_id list from the Polaris ComputeNode subclass, @@ -57,12 +57,12 @@ def _build_cmdline(self) -> str: cid = str(cpu_ids[i + cpus_per_rank * irank] + len(polaris_node.cpu_ids)) cpu_bind_list.append(cid) cpu_bind = "".join(cpu_bind_list) - if "CUDA_VISIBLE_DEVICES" in self._envs.keys(): - gpu_device = self._envs["CUDA_VISIBLE_DEVICES"] - gpu_ids = gpu_device.split(",") - else: - gpu_ids = [] - logger.info(f"Polaris app_run: cpu_bind={cpu_bind} cpu_ids={cpu_ids} gpu_ids={gpu_ids}") + # if "CUDA_VISIBLE_DEVICES" in self._envs.keys(): + # gpu_device = self._envs["CUDA_VISIBLE_DEVICES"] + # gpu_ids = gpu_device.split(",") + # else: + # gpu_ids = [] + # logger.info(f"Polaris app_run: cpu_bind={cpu_bind} cpu_ids={cpu_ids} gpu_ids={gpu_ids}") launch_params = [] for k in self._launch_params.keys(): From 6a10eb72b8d421cce25a6166b90abf029d49596e Mon Sep 17 00:00:00 2001 From: Christine Simpson Date: Fri, 4 Aug 2023 16:52:42 -0500 Subject: [PATCH 19/20] polaris app_run cleanup --- balsam/platform/app_run/polaris.py | 26 ++++++++------------------ 1 file changed, 8 insertions(+), 18 deletions(-) diff --git a/balsam/platform/app_run/polaris.py b/balsam/platform/app_run/polaris.py index d18efb12..3bc86a65 100644 --- a/balsam/platform/app_run/polaris.py +++ b/balsam/platform/app_run/polaris.py @@ -22,26 +22,24 @@ def _build_cmdline(self) -> str: # 8 cpus per gpu. This code will not skip the appropriate number of cpus # in the rank binding assignments. if "cpu_bind" in self._launch_params.keys(): - cpu_bind = self._launch_params.get("cpu_bind", "none") + cpu_bind = self._launch_params.get("cpu_bind") + elif "--cpu-bind" in self._launch_params.keys(): + cpu_bind = self._launch_params.get("--cpu-bind") else: # Here we grab the cpu_ids assigned to the job in the NodeSpec object # If this is not set in NodeSpec (it is only set for single node jobs), # then we take the cpu_id list from the Polaris ComputeNode subclass, # assuming the job will have use of all the cpus in nodes assigned to it. - cpu_ids_ns = self._node_spec.cpu_ids[0] - if cpu_ids_ns: - cpu_ids = self._node_spec.cpu_ids[0] - if self._threads_per_core == 2: - polaris_node = PolarisNode(self._node_spec.node_ids[0], self._node_spec.hostnames[0]) - else: - polaris_node = PolarisNode(self._node_spec.node_ids[0], self._node_spec.hostnames[0]) + cpu_ids = self._node_spec.cpu_ids[0] + polaris_node = PolarisNode(self._node_spec.node_ids[0], self._node_spec.hostnames[0]) + if not cpu_ids: cpu_ids = polaris_node.cpu_ids cpus_per_rank = self.get_cpus_per_rank() # PolarisNode reverses the order of the gpu_ids, so assigning the cpu-bind # in ascending cpu order is what we want here. - cpu_bind_list = ["verbose,list"] + cpu_bind_list = ["list"] for irank in range(self._ranks_per_node): cpu_bind_list.append(":") for i in range(cpus_per_rank): @@ -57,17 +55,10 @@ def _build_cmdline(self) -> str: cid = str(cpu_ids[i + cpus_per_rank * irank] + len(polaris_node.cpu_ids)) cpu_bind_list.append(cid) cpu_bind = "".join(cpu_bind_list) - # if "CUDA_VISIBLE_DEVICES" in self._envs.keys(): - # gpu_device = self._envs["CUDA_VISIBLE_DEVICES"] - # gpu_ids = gpu_device.split(",") - # else: - # gpu_ids = [] - # logger.info(f"Polaris app_run: cpu_bind={cpu_bind} cpu_ids={cpu_ids} gpu_ids={gpu_ids}") launch_params = [] for k in self._launch_params.keys(): - if k != "cpu_bind": - launch_params.append("--" + k) + if k != "cpu_bind" and k != "--cpu-bind": launch_params.append(str(self._launch_params[k])) # The value of -d depends on the setting of cpu_bind. If cpu-bind=core, -d is the number of @@ -95,7 +86,6 @@ def _build_cmdline(self) -> str: ] return " ".join(str(arg) for arg in args) - # Overide default because sunspot does not use CUDA def _set_envs(self) -> None: envs = os.environ.copy() envs.update(self._envs) From 020ae447d1ade936cc71f4e4e243c80fab08ae09 Mon Sep 17 00:00:00 2001 From: Christine Simpson Date: Fri, 4 Aug 2023 16:58:11 -0500 Subject: [PATCH 20/20] lint fix --- balsam/platform/app_run/polaris.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/balsam/platform/app_run/polaris.py b/balsam/platform/app_run/polaris.py index 3bc86a65..20f6ea22 100644 --- a/balsam/platform/app_run/polaris.py +++ b/balsam/platform/app_run/polaris.py @@ -65,7 +65,7 @@ def _build_cmdline(self) -> str: # physical cores per rank, otherwise it is the number of hardware threads per rank # https://docs.alcf.anl.gov/running-jobs/example-job-scripts/ depth = self._threads_per_rank - if "core" in cpu_bind: + if "core" == cpu_bind: depth = self.get_cpus_per_rank() nid_str = ",".join(map(str, node_ids))