diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 14e8de1f91..f89de19bdc 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -147,6 +147,7 @@ jobs: token: ${{ secrets.PYANSYS_CI_BOT_TOKEN }} python-package-name: ${{ env.PACKAGE_NAME }} dev-mode: ${{ github.ref != 'refs/heads/main' }} + upload-reports: True docs-build: name: "Build documentation" @@ -774,6 +775,7 @@ jobs: env: ON_LOCAL: true ON_UBUNTU: true + TESTING_MINIMAL: true steps: - name: "Install Git and checkout project" diff --git a/doc/changelog.d/3497.documentation.md b/doc/changelog.d/3497.documentation.md new file mode 100644 index 0000000000..9034ad6606 --- /dev/null +++ b/doc/changelog.d/3497.documentation.md @@ -0,0 +1 @@ +feat: support for launching an MAPDL instance in an SLURM HPC cluster \ No newline at end of file diff --git a/doc/changelog.d/3513.documentation.md b/doc/changelog.d/3513.documentation.md new file mode 100644 index 0000000000..b2f60a9a07 --- /dev/null +++ b/doc/changelog.d/3513.documentation.md @@ -0,0 +1 @@ +docs: adding-sbatch-support \ No newline at end of file diff --git a/doc/source/user_guide/hpc/launch_mapdl_entrypoint.rst b/doc/source/user_guide/hpc/launch_mapdl_entrypoint.rst new file mode 100644 index 0000000000..d3044c920d --- /dev/null +++ b/doc/source/user_guide/hpc/launch_mapdl_entrypoint.rst @@ -0,0 +1,229 @@ + +Interactive MAPDL instance launched from the login node +======================================================= + +Starting the instance +--------------------- + +If you are already logged in a login node, you can launch an MAPDL instance as a SLURM job and +connect to it. +To accomplish this, run these commands in your login node. + +.. code:: pycon + + >>> from ansys.mapdl.core import launch_mapdl + >>> mapdl = launch_mapdl(launch_on_hpc=True) + +PyMAPDL submits a job to the scheduler using the appropriate commands. +In case of SLURM, it uses the ``sbatch`` command with the ``--wrap`` argument +to pass the MAPDL command line to start. +Other scheduler arguments can be specified using the ``scheduler_options`` +argument as a Python :class:`dict`: + +.. code:: pycon + + >>> from ansys.mapdl.core import launch_mapdl + >>> scheduler_options = {"nodes": 10, "ntasks-per-node": 2} + >>> mapdl = launch_mapdl(launch_on_hpc=True, nproc=20, scheduler_options=scheduler_options) + + +.. note:: + PyMAPDL cannot infer the number of CPUs that you are requesting from the scheduler. + Hence, you must specify this value using the ``nproc`` argument. + +The double minus (``--``) common in the long version of some scheduler commands +are added automatically if PyMAPDL detects it is missing and the specified +command is long more than 1 character in length). +For instance, the ``ntasks-per-node`` argument is submitted as ``--ntasks-per-node``. + +Or, a single Python string (:class:`str`) is submitted: + +.. code:: pycon + + >>> from ansys.mapdl.core import launch_mapdl + >>> scheduler_options = "-N 10" + >>> mapdl = launch_mapdl(launch_on_hpc=True, scheduler_options=scheduler_options) + +.. warning:: + Because PyMAPDL is already using the ``--wrap`` argument, this argument + cannot be used again. + +The values of each scheduler argument are wrapped in single quotes (`'`). +This might cause parsing issues that can cause the job to fail after successful +submission. + +PyMAPDL passes all the environment variables of the +user to the new job and to the MAPDL instance. +This is usually convenient because many environmental variables are +needed to run the job or MAPDL command. +For instance, the license server is normally stored in the :envvar:`ANSYSLMD_LICENSE_FILE` environment variable. +If you prefer not to pass these environment variables to the job, use the SLURM argument +``--export`` to specify the desired environment variables. +For more information, see `SLURM documentation `_. + + +Working with the instance +------------------------- + +Once the :class:`Mapdl ` object has been created, +it does not differ from a normal :class:`Mapdl ` +instance. +You can retrieve the IP of the MAPDL instance as well as its hostname: + +.. code:: pycon + + >>> mapdl.ip + '123.45.67.89' + >>> mapdl.hostname + 'node0' + +You can also retrieve the SLURM job ID: + +.. code:: pycon + + >>> mapdl.jobid + 10001 + +If you want to check whether the instance has been launched using a scheduler, +you can use the :attr:`mapdl_on_hpc ` +attribute: + +.. code:: pycon + + >>> mapdl.mapdl_on_hpc + True + + +Sharing files +^^^^^^^^^^^^^ + +Most of the HPC clusters share the login node filesystem with the compute nodes, +which means that you do not need to do extra work to upload or download files to the MAPDL +instance. You only need to copy them to the location where MAPDL is running. +You can obtain this location with the +:attr:`directory ` attribute. + +If no location is specified in the :func:`launch_mapdl() ` +function, then a temporal location is selected. +It is a good idea to set the ``run_location`` argument to a directory that is accessible +from all the compute nodes. +Normally anything under ``/home/user`` is available to all compute nodes. +If you are unsure where you should launch MAPDL, contact your cluster administrator. + +Additionally, you can use methods like the :meth:`upload ` +and :meth:`download ` to +upload and download files to and from the MAPDL instance respectively. +You do not need ``ssh`` or another similar connection. +However, for large files, you might want to consider alternatives. + + +Exiting MAPDL +------------- + +Exiting MAPDL, either intentionally or unintentionally, stops the job. +This behavior occurs because MAPDL is the main process at the job. Thus, when finished, +the scheduler considers the job done. + +To exit MAPDL, you can use the :meth:`exit() ` method. +This method exits MAPDL and sends a signal to the scheduler to cancel the job. + +.. code-block:: python + + mapdl.exit() + +When the Python process you are running PyMAPDL on finishes without errors, and you have not +issued the :meth:`exit() ` method, the garbage collector +kills the MAPDL instance and its job. This is intended to save resources. + +If you prefer that the job is not killed, set the following attribute in the +:class:`Mapdl ` class: + +.. code-block:: python + + mapdl.finish_job_on_exit = False + + +In this case, you should set a timeout in your job to avoid having the job +running longer than needed. + + +Handling crashes on an HPC +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If MAPDL crashes while running on an HPC, the job finishes right away. +In this case, MAPDL disconnects from MAPDL. +PyMAPDL retries to reconnect to the MAPDL instance up to 5 times, waiting +for up to 5 seconds. +If unsuccessful, you might get an error like this: + +.. code-block:: text + + MAPDL server connection terminated unexpectedly while running: + /INQUIRE,,DIRECTORY,, + called by: + _send_command + + Suggestions: + MAPDL *might* have died because it executed a not-allowed command or ran out of memory. + Check the MAPDL command output for more details. + Open an issue on GitHub if you need assistance: https://github.com/ansys/pymapdl/issues + Error: + failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:50052: Failed to connect to remote host: connect: Connection refused (111) + Full error: + <_InactiveRpcError of RPC that terminated with: + status = StatusCode.UNAVAILABLE + details = "failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:50052: Failed to connect to remote host: connect: Connection refused (111)" + debug_error_string = "UNKNOWN:Error received from peer {created_time:"2024-10-24T08:25:04.054559811+00:00", grpc_status:14, grpc_message:"failed to connect to all addresses; last error: UNKNOWN: ipv4:127.0.0.1:50052: Failed to connect to remote host: connect: Connection refused (111)"}" + > + +The data of that job is available at :attr:`directory `. +You should set the run location using the ``run_location`` argument. + +While handling this exception, PyMAPDL also cancels the job to avoid resources leaking. +Therefore, the only option is to start a new instance by launching a new job using +the :func:`launch_mapdl() ` function. + +User case on a SLURM cluster +---------------------------- + +Assume a user wants to start a remote MAPDL instance in an HPC cluster +to interact with it. +The user would like to request 10 nodes, and 1 task per node (to avoid clashes +between MAPDL instances). +The user would like to also request 64 GB of RAM. +Because of administration logistics, the user must use the machines in +the ``supercluster01`` partition. +To make PyMAPDL launch an instance like that on SLURM, run the following code: + +.. code-block:: python + + from ansys.mapdl.core import launch_mapdl + from ansys.mapdl.core.examples import vmfiles + + scheduler_options = { + "nodes": 10, + "ntasks-per-node": 1, + "partition": "supercluster01", + "memory": 64, + } + mapdl = launch_mapdl(launch_on_hpc=True, nproc=10, scheduler_options=scheduler_options) + + num_cpu = mapdl.get_value("ACTIVE", 0, "NUMCPU") # It should be equal to 10 + + mapdl.clear() # Not strictly needed. + mapdl.prep7() + + # Run an MAPDL script + mapdl.input(vmfiles["vm1"]) + + # Let's solve again to get the solve printout + mapdl.solution() + output = mapdl.solve() + print(output) + + mapdl.exit() # Kill the MAPDL instance + + +PyMAPDL automatically sets MAPDL to read the job configuration (including machines, +number of CPUs, and memory), which allows MAPDL to use all the resources allocated +to that job. diff --git a/doc/source/user_guide/hpc/pymapdl.rst b/doc/source/user_guide/hpc/pymapdl.rst index 14f366b63d..76d27f75ee 100644 --- a/doc/source/user_guide/hpc/pymapdl.rst +++ b/doc/source/user_guide/hpc/pymapdl.rst @@ -19,35 +19,34 @@ on whether or not you run them both on the HPC compute nodes. Additionally, you might be able interact with them (``interactive`` mode) or not (``batch`` mode). -For information on supported configurations, see :ref:`ref_pymapdl_batch_in_cluster_hpc`. +PyMAPDL takes advantage of HPC clusters to launch MAPDL instances +with increased resources. +PyMAPDL automatically sets these MAPDL instances to read the +scheduler job configuration (which includes machines, number +of CPUs, and memory), which allows MAPDL to use all the resources +allocated to that job. +For more information, see :ref:`ref_tight_integration_hpc`. +The following configurations are supported: -Since v0.68.5, PyMAPDL can take advantage of the tight integration -between the scheduler and MAPDL to read the job configuration and -launch an MAPDL instance that can use all the resources allocated -to that job. -For instance, if a SLURM job has allocated 8 nodes with 4 cores each, -then PyMAPDL launches an MAPDL instance which uses 32 cores -spawning across those 8 nodes. -This behavior can turn off if passing the :envvar:`PYMAPDL_ON_SLURM` -environment variable or passing the ``detect_HPC=False`` argument -to the :func:`launch_mapdl() ` function. +* :ref:`ref_pymapdl_batch_in_cluster_hpc`. +* :ref:`ref_pymapdl_interactive_in_cluster_hpc_from_login` .. _ref_pymapdl_batch_in_cluster_hpc: -Submit a PyMAPDL batch job to the cluster from the entrypoint node -================================================================== +Batch job submission from the login node +======================================== Many HPC clusters allow their users to log into a machine using ``ssh``, ``vnc``, ``rdp``, or similar technologies and then submit a job to the cluster from there. -This entrypoint machine, sometimes known as the *head node* or *entrypoint node*, +This login machine, sometimes known as the *head node* or *entrypoint node*, might be a virtual machine (VDI/VM). In such cases, once the Python virtual environment with PyMAPDL is already set and is accessible to all the compute nodes, launching a -PyMAPDL job from the entrypoint node is very easy to do using the ``sbatch`` command. +PyMAPDL job from the login node is very easy to do using the ``sbatch`` command. When the ``sbatch`` command is used, PyMAPDL runs and launches an MAPDL instance in the compute nodes. No changes are needed on a PyMAPDL script to run it on an SLURM cluster. @@ -98,6 +97,8 @@ job by setting the :envvar:`PYMAPDL_NPROC` environment variable to the desired v (venv) user@entrypoint-machine:~$ PYMAPDL_NPROC=4 sbatch main.py +For more applicable environment variables, see :ref:`ref_environment_variables`. + You can also add ``sbatch`` options to the command: .. code-block:: console @@ -181,3 +182,34 @@ This bash script performs tasks such as creating environment variables, moving files to different directories, and printing to ensure your configuration is correct. + +.. _ref_pymapdl_interactive_in_cluster_hpc: + + +.. _ref_pymapdl_interactive_in_cluster_hpc_from_login: + +.. include:: launch_mapdl_entrypoint.rst + + +.. _ref_tight_integration_hpc: + +Tight integration between MAPDL and the HPC scheduler +===================================================== + +Since v0.68.5, PyMAPDL can take advantage of the tight integration +between the scheduler and MAPDL to read the job configuration and +launch an MAPDL instance that can use all the resources allocated +to that job. +For instance, if a SLURM job has allocated 8 nodes with 4 cores each, +then PyMAPDL launches an MAPDL instance that uses 32 cores +spawning across those 8 nodes. + +This behavior can turn off by passing the +:envvar:`PYMAPDL_RUNNING_ON_HPC` environment variable +with a ``'false'`` value or passing the ``detect_hpc=False`` argument +to the :func:`launch_mapdl() ` function. + +Alternatively, you can override these settings by either specifying +custom settings in the :func:`launch_mapdl() ` +function's arguments or using specific environment variables. +For more information, see :ref:`ref_environment_variables`. diff --git a/doc/source/user_guide/mapdl.rst b/doc/source/user_guide/mapdl.rst index c7ba053666..fceee2a4e3 100644 --- a/doc/source/user_guide/mapdl.rst +++ b/doc/source/user_guide/mapdl.rst @@ -1092,6 +1092,7 @@ are unsupported. | * ``LSWRITE`` | |:white_check_mark:| Available (Internally running in :attr:`Mapdl.non_interactive `) | |:white_check_mark:| Available | |:exclamation:| Only in :attr:`Mapdl.non_interactive ` | | +---------------+---------------------------------------------------------------------------------------------------------------------------------+----------------------------------+----------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------+ +.. _ref_environment_variables: Environment variables ===================== @@ -1189,7 +1190,7 @@ environment variable. The following table describes all arguments. | | user@machine:~$ export PYMAPDL_MAPDL_VERSION=22.2 | | | | +---------------------------------------+----------------------------------------------------------------------------------+ -| :envvar:`PYMAPDL_ON_SLURM` | With this environment variable set to ``FALSE``, you can avoid | +| :envvar:`PYMAPDL_RUNNING_ON_HPC` | With this environment variable set to ``FALSE``, you can avoid | | | PyMAPDL from detecting that it is running on a SLURM HPC cluster. | +---------------------------------------+----------------------------------------------------------------------------------+ | :envvar:`PYMAPDL_MAX_MESSAGE_LENGTH` | Maximum gRPC message length. If your | diff --git a/doc/styles/config/vocabularies/ANSYS/accept.txt b/doc/styles/config/vocabularies/ANSYS/accept.txt index 0d27d17331..583fb27fac 100644 --- a/doc/styles/config/vocabularies/ANSYS/accept.txt +++ b/doc/styles/config/vocabularies/ANSYS/accept.txt @@ -53,6 +53,7 @@ CentOS7 Chao ci container_layout +CPUs datas delet Dependabot diff --git a/pyproject.toml b/pyproject.toml index ea085cd601..7fd43b37d9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -148,8 +148,6 @@ src_paths = ["doc", "src", "tests"] [tool.coverage.run] source = ["ansys/pymapdl"] omit = [ - # omit commands - "ansys/mapdl/core/_commands/*", # ignore legacy interfaces "ansys/mapdl/core/mapdl_console.py", "ansys/mapdl/core/jupyter.py", diff --git a/src/ansys/mapdl/core/errors.py b/src/ansys/mapdl/core/errors.py index 19a09e22ba..ebbffcfa53 100644 --- a/src/ansys/mapdl/core/errors.py +++ b/src/ansys/mapdl/core/errors.py @@ -307,9 +307,9 @@ def wrapper(*args, **kwargs): old_handler = signal.signal(signal.SIGINT, handler) # Capture gRPC exceptions - n_attempts = 3 - initial_backoff = 0.05 - multiplier_backoff = 3 + n_attempts = 5 + initial_backoff = 0.1 + multiplier_backoff = 2 i_attemps = 0 diff --git a/src/ansys/mapdl/core/helpers.py b/src/ansys/mapdl/core/helpers.py index edb29704ab..308520e5d0 100644 --- a/src/ansys/mapdl/core/helpers.py +++ b/src/ansys/mapdl/core/helpers.py @@ -32,9 +32,7 @@ def is_installed(package_name: str) -> bool: """Check if a package is installed""" - - if os.name == "nt": - package_name = package_name.replace("-", ".") + package_name = package_name.replace("-", ".") try: importlib.import_module(package_name) diff --git a/src/ansys/mapdl/core/launcher.py b/src/ansys/mapdl/core/launcher.py index 8f0904b8fc..ae36861c3a 100644 --- a/src/ansys/mapdl/core/launcher.py +++ b/src/ansys/mapdl/core/launcher.py @@ -23,6 +23,7 @@ """Module for launching MAPDL locally or connecting to a remote instance with gRPC.""" import atexit +from functools import wraps import os import platform from queue import Empty, Queue @@ -64,7 +65,21 @@ import ansys.platform.instancemanagement as pypim if _HAS_ATP: - from ansys.tools.path import find_ansys, get_ansys_path, version_from_path + from ansys.tools.path import find_ansys, get_ansys_path + from ansys.tools.path import version_from_path as _version_from_path + + @wraps(_version_from_path) + def version_from_path(*args, **kwargs): + """Wrap ansys.tool.path.version_from_path to raise a warning if the + executable couldn't be found""" + if kwargs.pop("launch_on_hpc", False): + try: + return _version_from_path(*args, **kwargs) + except RuntimeError: + warnings.warn("PyMAPDL could not find the ANSYS executable. ") + else: + return _version_from_path(*args, **kwargs) + if TYPE_CHECKING: # pragma: no cover from ansys.mapdl.core.mapdl_console import MapdlConsole @@ -86,37 +101,39 @@ ALLOWABLE_VERSION_INT = tuple(SUPPORTED_ANSYS_VERSIONS.keys()) ALLOWABLE_LAUNCH_MAPDL_ARGS = [ - "exec_file", - "run_location", - "jobname", - "nproc", - "ram", - "mode", - "override", - "loglevel", + "add_env_vars", "additional_switches", - "start_timeout", - "port", "cleanup_on_exit", - "start_instance", - "ip", "clear_on_connect", - "log_apdl", - "remove_temp_dir_on_exit", + "running_on_hpc", + "exec_file", + "force_intel" "ip", + "ip", + "jobname", + "launch_on_hpc", "license_server_check", "license_type", + "log_apdl", + "loglevel", + "mode", + "nproc", + "override", + "port", "print_com", - "add_env_vars", + "ram", + "remove_temp_dir_on_exit", "replace_env_vars", - "version", - "detect_HPC", + "run_location", + "scheduler_options", "set_no_abort", - "force_intel" + "start_instance", + "start_timeout", + "version", # Non documented args - "use_vtk", + "_debug_no_launch", "just_launch", "on_pool", - "_debug_no_launch", + "use_vtk", ] ON_WSL = os.name == "posix" and ( @@ -140,6 +157,15 @@ Be aware of possible errors or unexpected behavior with this configuration. """ +LAUNCH_ON_HCP_ERROR_MESSAGE_IP = ( + "PyMAPDL cannot ensure a specific IP will be used when launching " + "MAPDL on a cluster. Hence the 'ip' argument is not compatible. " + "If you want to connect to an already started MAPDL instance, " + "just connect normally as you would with a remote instance. " + "For example:\n\n" + ">>> mapdl = launch_mapdl(start_instance=False, ip='123.45.67.89')\n\n" + "where '123.45.67.89' is the IP of the machine where MAPDL is running." +) GALLERY_INSTANCE = [None] @@ -172,10 +198,8 @@ def _is_ubuntu() -> bool: # args value is controlled by the library. # awk is not a partial path - Bandit false positive. # Excluding bandit check. - proc = subprocess.Popen( - ["awk", "-F=", "/^NAME/{print $2}", "/etc/os-release"], - stdout=subprocess.PIPE, - ) # nosec B603 B607 + proc = submitter(["awk", "-F=", "/^NAME/{print $2}", "/etc/os-release"]) + if "ubuntu" in proc.stdout.read().decode().lower(): return True @@ -280,9 +304,8 @@ def get_process_at_port(port) -> Optional[psutil.Process]: """Get the process (psutil.Process) running at the given port""" for proc in psutil.process_iter(): try: - connections = proc.connections( - kind="inet" - ) # just to check if we can access the + # just to check if we can access the port + connections = proc.connections() except psutil.AccessDenied: continue except psutil.NoSuchProcess: @@ -322,7 +345,7 @@ def generate_mapdl_launch_command( ---------- exec_file : str, optional The location of the MAPDL executable. Will use the cached - location when left at the default ``None``. + location when left at the default :class:`None`. jobname : str, optional MAPDL jobname. Defaults to ``'file'``. @@ -332,7 +355,7 @@ def generate_mapdl_launch_command( ram : float, optional Total size in megabytes of the workspace (memory) used for the initial allocation. - The default is ``None``, in which case 2 GB (2048 MB) is used. To force a fixed size + The default is :class:`None`, in which case 2 GB (2048 MB) is used. To force a fixed size throughout the run, specify a negative number. port : int @@ -356,11 +379,6 @@ def generate_mapdl_launch_command( Command """ - # verify version - if _HAS_ATP: - if version_from_path("mapdl", exec_file) < 202: - raise VersionError("The MAPDL gRPC interface requires MAPDL 20.2 or later") - cpu_sw = "-np %d" % nproc if ram: @@ -418,21 +436,26 @@ def launch_grpc( cmd: list[str], run_location: str = None, env_vars: Optional[Dict[str, str]] = None, + launch_on_hpc: bool = False, ) -> subprocess.Popen: """Start MAPDL locally in gRPC mode. Parameters ---------- - cmd: str + cmd : str Command to use to launch the MAPDL instance. run_location : str, optional MAPDL working directory. The default is the temporary working directory. - env_vars: dict, optional + env_vars : dict, optional Dictionary with the environment variables to inject in the process. + launch_on_hpc : bool, optional + If running on an HPC, this needs to be :class:`True` to avoid the + temporary file creation on Windows. + Returns ------- subprocess.Popen @@ -444,31 +467,48 @@ def launch_grpc( # disable all MAPDL pop-up errors: env_vars.setdefault("ANS_CMD_NODIAG", "TRUE") + cmd_string = " ".join(cmd) + if "sbatch" in cmd: + header = "Running an MAPDL instance on the Cluster:" + shell = os.name != "nt" + cmd_ = cmd_string + else: + header = "Running an MAPDL instance" + shell = False # To prevent shell injection + cmd_ = cmd + LOG.info( - f"Running a local instance in {run_location} with the following command: '{cmd}'" + "\n============" + "\n============\n" + f"{header}\nLocation:\n{run_location}\n" + f"Command:\n{cmd_string}\n" + f"Env vars:\n{env_vars}" + "\n============" + "\n============" ) if os.name == "nt": # getting tmp file name - tmp_inp = cmd[cmd.index("-i") + 1] - with open(os.path.join(run_location, tmp_inp), "w") as f: - f.write("FINISH\r\n") - LOG.debug(f"Writing temporary input file: {tmp_inp} with 'FINISH' command.") + if not launch_on_hpc: + # if we are running on an HPC cluster (case not considered), we will + # have to upload/create this file because it is needed for starting. + tmp_inp = cmd[cmd.index("-i") + 1] + with open(os.path.join(run_location, tmp_inp), "w") as f: + f.write("FINISH\r\n") + LOG.debug( + f"Writing temporary input file: {tmp_inp} with 'FINISH' command." + ) LOG.debug("MAPDL starting in background.") - - # cmd is controlled by the library with generate_mapdl_launch_command. - # Excluding bandit check. - process = subprocess.Popen( - cmd, + return submitter( + cmd_, + shell=shell, # sbatch does not work without shell. cwd=run_location, stdin=subprocess.DEVNULL, stdout=subprocess.PIPE, stderr=subprocess.PIPE, - env=env_vars, - ) # nosec B603 - - return process + env_vars=env_vars, + ) def check_mapdl_launch( @@ -520,7 +560,7 @@ def check_mapdl_launch( msg = ( str(e) + f"\nRun location: {run_location}" - + f"\nCommand line used: {cmd}\n\n" + + f"\nCommand line used: {' '.join(cmd)}\n\n" ) terminal_output = "\n".join(_get_std_output(std_queue=stdout_queue)).strip() @@ -681,8 +721,8 @@ def get_start_instance(start_instance: Optional[Union[bool, str]] = None) -> boo Returns ------- bool - ``True`` when the ``PYMAPDL_START_INSTANCE`` environment variable is - true, ``False`` when PYMAPDL_START_INSTANCE is false. If unset, + :class:`True` when the ``PYMAPDL_START_INSTANCE`` environment variable is + true, :class:`False` when PYMAPDL_START_INSTANCE is false. If unset, returns ``start_instance``. Raises @@ -862,7 +902,7 @@ def set_MPI_additional_switches( exec_path : str Path to the MAPDL executable. force_intel : bool, optional - Force the usage of intelmpi. The default is ``False``. + Force the usage of intelmpi. The default is :class:`False`. Returns ------- @@ -968,7 +1008,7 @@ def launch_mapdl( override: bool = False, loglevel: str = "ERROR", additional_switches: str = "", - start_timeout: int = 45, + start_timeout: Optional[int] = None, port: Optional[int] = None, cleanup_on_exit: bool = True, start_instance: Optional[bool] = None, @@ -982,7 +1022,8 @@ def launch_mapdl( add_env_vars: Optional[Dict[str, str]] = None, replace_env_vars: Optional[Dict[str, str]] = None, version: Optional[Union[int, str]] = None, - detect_HPC: bool = True, + running_on_hpc: bool = True, + launch_on_hpc: bool = False, **kwargs: Dict[str, Any], ) -> Union[MapdlGrpc, "MapdlConsole"]: """Start MAPDL locally. @@ -991,17 +1032,15 @@ def launch_mapdl( ---------- exec_file : str, optional The location of the MAPDL executable. Will use the cached - location when left at the default ``None`` and no environment + location when left at the default :class:`None` and no environment variable is set. - .. note:: + The executable path can be also set through the environment variable + :envvar:`PYMAPDL_MAPDL_EXEC`. For example: - The executable path can be also set through the environment variable - ``PYMAPDL_MAPDL_EXEC``. For example: + .. code:: console - .. code:: console - - export PYMAPDL_MAPDL_EXEC=/ansys_inc/v211/ansys/bin/mapdl + export PYMAPDL_MAPDL_EXEC=/ansys_inc/v211/ansys/bin/mapdl run_location : str, optional MAPDL working directory. Defaults to a temporary working @@ -1011,13 +1050,13 @@ def launch_mapdl( MAPDL jobname. Defaults to ``'file'``. nproc : int, optional - Number of processors. Defaults to 2. If running on an HPC cluster, + Number of processors. Defaults to ``2``. If running on an HPC cluster, this value is adjusted to the number of CPUs allocated to the job, - unless ``detect_HPC`` is set to "false". + unless the argument ``running_on_hpc`` is set to ``"false"``. ram : float, optional Total size in megabytes of the workspace (memory) used for the initial - allocation. The default is ``None``, in which case 2 GB (2048 MB) is + allocation. The default is :class:`None`, in which case 2 GB (2048 MB) is used. To force a fixed size throughout the run, specify a negative number. @@ -1055,41 +1094,47 @@ def launch_mapdl( section for additional details. start_timeout : float, optional - Maximum allowable time to connect to the MAPDL server. + Maximum allowable time to connect to the MAPDL server. By default it is + 45 seconds, however, it is increased to 90 seconds if running on HPC. port : int Port to launch MAPDL gRPC on. Final port will be the first port available after (or including) this port. Defaults to - 50052. You can also override the port default with the - environment variable ``PYMAPDL_PORT=`` - This argument has priority over the environment variable. + ``50052``. You can also provide this value through the environment variable + :envvar:`PYMAPDL_PORT`. For instance ``PYMAPDL_PORT=50053``. + However the argument (if specified) has precedence over the environment + variable. If this environment variable is empty, it is as it is not set. cleanup_on_exit : bool, optional Exit MAPDL when python exits or the mapdl Python instance is garbage collected. start_instance : bool, optional - When False, connect to an existing MAPDL instance at ``ip`` - and ``port``, which default to ip ``'127.0.0.1'`` at port 50052. - Otherwise, launch a local instance of MAPDL. You can also - override the default behavior of this keyword argument with - the environment variable ``PYMAPDL_START_INSTANCE=FALSE``. + When :class:`False`, connect to an existing MAPDL instance at ``ip`` + and ``port``, which default to ip ``'127.0.0.1'`` at port ``50052``. + Otherwise, launch a local instance of MAPDL. You can also + provide this value through the environment variable + :envvar:`PYMAPDL_START_INSTANCE`. + However the argument (if specified) has precedence over the environment + variable. If this environment variable is empty, it is as it is not set. ip : str, optional - Used only when ``start_instance`` is ``False``. If provided, - and ``start_instance`` (or its correspondent environment variable - ``PYMAPDL_START_INSTANCE``) is ``True`` then, an exception is raised. Specify the IP address of the MAPDL instance to connect to. You can also provide a hostname as an alternative to an IP address. - Defaults to ``'127.0.0.1'``. You can also override the - default behavior of this keyword argument with the - environment variable ``PYMAPDL_IP=``. If this environment variable - is empty, it is as it is not set. + Defaults to ``'127.0.0.1'``. + Used only when ``start_instance`` is :class:`False`. If this argument + is provided, and ``start_instance`` (or its correspondent environment + variable :envvar:`PYMAPDL_START_INSTANCE`) is :class:`True` then, an + exception is raised. + You can also provide this value through the environment variable + :envvar:`PYMAPDL_IP`. For instance ``PYMAPDL_IP=123.45.67.89``. + However the argument (if specified) has precedence over the environment + variable. If this environment variable is empty, it is as it is not set. clear_on_connect : bool, optional - Defaults to ``True``, giving you a fresh environment when + Defaults to :class:`True`, giving you a fresh environment when connecting to MAPDL. When if ``start_instance`` is specified - it defaults to ``False``. + it defaults to :class:`False`. log_apdl : str, optional Enables logging every APDL command to the local disk. This @@ -1099,81 +1144,92 @@ def launch_mapdl( ``log_apdl='pymapdl_log.txt'``). By default this is disabled. remove_temp_dir_on_exit : bool, optional - When ``run_location`` is ``None``, this launcher creates a new MAPDL + When ``run_location`` is :class:`None`, this launcher creates a new MAPDL working directory within the user temporary directory, obtainable with ``tempfile.gettempdir()``. When this parameter is - ``True``, this directory will be deleted when MAPDL is exited. Default - ``False``. + :class:`True`, this directory will be deleted when MAPDL is exited. + Default to :class:`False`. If you change the working directory, PyMAPDL does not delete the original working directory nor the new one. license_server_check : bool, optional Check if the license server is available if MAPDL fails to - start. Only available on ``mode='grpc'``. Defaults ``False``. + start. Only available on ``mode='grpc'``. Defaults :class:`False`. license_type : str, optional Enable license type selection. You can input a string for its license name (for example ``'meba'`` or ``'ansys'``) or its description ("enterprise solver" or "enterprise" respectively). You can also use legacy licenses (for example ``'aa_t_a'``) but it will - also raise a warning. If it is not used (``None``), no specific license - will be requested, being up to the license server to provide a specific - license type. Default is ``None``. + also raise a warning. If it is not used (:class:`None`), no specific + license will be requested, being up to the license server to provide a + specific license type. Default is :class:`None`. print_com : bool, optional Print the command ``/COM`` arguments to the standard output. - Default ``False``. + Default :class:`False`. add_env_vars : dict, optional The provided dictionary will be used to extend the MAPDL process environment variables. If you want to control all of the environment - variables, use the argument ``replace_env_vars``. Defaults to ``None``. + variables, use the argument ``replace_env_vars``. + Defaults to :class:`None`. replace_env_vars : dict, optional The provided dictionary will be used to replace all the MAPDL process environment variables. It replace the system environment variables which otherwise would be used in the process. To just add some environment variables to the MAPDL - process, use ``add_env_vars``. Defaults to ``None``. + process, use ``add_env_vars``. Defaults to :class:`None`. version : float, optional - Version of MAPDL to launch. If ``None``, the latest version is used. + Version of MAPDL to launch. If :class:`None`, the latest version is used. Versions can be provided as integers (i.e. ``version=222``) or floats (i.e. ``version=22.2``). To retrieve the available installed versions, use the function :meth:`ansys.tools.path.path.get_available_ansys_installations`. + You can also provide this value through the environment variable + :envvar:`PYMAPDL_MAPDL_VERSION`. + For instance ``PYMAPDL_MAPDL_VERSION=22.2``. + However the argument (if specified) has precedence over the environment + variable. If this environment variable is empty, it is as it is not set. - .. note:: - - The default version can be also set through the environment variable - ``PYMAPDL_MAPDL_VERSION``. For example: - - .. code:: console - - export PYMAPDL_MAPDL_VERSION=22.2 - - detect_HPC: bool, optional + running_on_hpc: bool, optional Whether detect if PyMAPDL is running on an HPC cluster. Currently only SLURM clusters are supported. By default, it is set to true. - This option can be bypassed if the ``PYMAPDL_ON_SLURM`` - environment variable is set to "true". For more information, see - :ref:`ref_hpc_slurm`. - - kwargs : dict, optional + This option can be bypassed if the :envvar:`PYMAPDL_RUNNING_ON_HPC` + environment variable is set to :class:`True`. + For more information, see :ref:`ref_hpc_slurm`. + + launch_on_hpc : bool, Optional + If :class:`True`, it uses the implemented scheduler (SLURM only) to launch + an MAPDL instance on the HPC. In this case you can pass the + '`scheduler_options`' argument to + :func:`launch_mapdl() ` + to specify the scheduler arguments as a string or as a dictionary. + For more information, see :ref:`ref_hpc_slurm`. + + kwargs : dict, Optional These keyword arguments are interface-specific or for development purposes. For more information, see Notes. + scheduler_options : :class:`str`, :class:`dict` + Use it to specify options to the scheduler run command. It can be a + string or a dictionary with arguments and its values (both as strings). + For more information visit :ref:`ref_hpc_slurm`. + set_no_abort : :class:`bool` *(Development use only)* Sets MAPDL to not abort at the first error within /BATCH mode. - Defaults to ``True``. + Defaults to :class:`True`. force_intel : :class:`bool` *(Development use only)* Forces the use of Intel message pass interface (MPI) in versions between - Ansys 2021R0 and 2022R2, where because of VPNs issues this MPI is deactivated - by default. See :ref:`vpn_issues_troubleshooting` for more information. - Defaults to ``False``. + Ansys 2021R0 and 2022R2, where because of VPNs issues this MPI is + deactivated by default. + See :ref:`vpn_issues_troubleshooting` for more information. + Defaults to :class:`False`. Returns ------- @@ -1191,8 +1247,8 @@ def launch_mapdl( **Additional switches** These are the MAPDL switch options as of 2020R2 applicable for - running MAPDL as a service via gRPC. Excluded switches such as - ``"-j"`` either not applicable or are set via keyword arguments. + running MAPDL as a service via gRPC. Excluded switches not applicable or + are set via keyword arguments such as ``"-j"`` . \\-acc Enables the use of GPU hardware. See GPU @@ -1287,8 +1343,10 @@ def launch_mapdl( Enables shared-memory parallelism. See the Parallel Processing Guide for more information. + **PyPIM** + If the environment is configured to use `PyPIM `_ - and ``start_instance`` is ``True``, then starting the instance will be delegated to PyPIM. + and ``start_instance`` is :class:`True`, then starting the instance will be delegated to PyPIM. In this event, most of the options will be ignored and the server side configuration will be used. @@ -1342,24 +1400,20 @@ def launch_mapdl( pre_check_args(args) # SLURM settings - if is_on_slurm(args): + if is_running_on_slurm(args): LOG.info("On Slurm mode.") # extracting parameters get_slurm_options(args, kwargs) - get_cpus(args) - get_start_instance_arg(args) + get_cpus(args) + get_ip(args) args["port"] = get_port(args["port"], args["start_instance"]) - get_exec_file(args) - - args["version"] = get_version(args["version"], exec_file) - if args["start_instance"]: ######################################## # Local adjustments @@ -1367,6 +1421,20 @@ def launch_mapdl( # # Only when starting MAPDL (aka Local) + get_exec_file(args) + + args["version"] = get_version( + args["version"], args.get("exec_file"), launch_on_hpc=args["launch_on_hpc"] + ) + + args["additional_switches"] = set_license_switch( + args["license_type"], args["additional_switches"] + ) + + env_vars: Dict[str, str] = update_env_vars( + args["add_env_vars"], args["replace_env_vars"] + ) + get_run_location(args) # verify lock file does not exist @@ -1376,26 +1444,17 @@ def launch_mapdl( # (as way to check if MAPDL started or not) remove_err_files(args["run_location"], args["jobname"]) - if _HAS_ATP and not args["_debug_no_launch"]: - version = version_from_path("mapdl", args["exec_file"]) - args["mode"] = check_mode(args["mode"], version) - - if not args["mode"]: - args["mode"] = "grpc" - - LOG.debug(f"Using mode {args['mode']}") - - args["additional_switches"] = set_license_switch( - args["license_type"], args["additional_switches"] - ) - - env_vars = update_env_vars(args["add_env_vars"], args["replace_env_vars"]) + # Check for a valid connection mode + args["mode"] = check_mode(args["mode"], args["version"]) ######################################## # Context specific launching adjustments # -------------------------------------- # if args["start_instance"]: + # ON HPC: + # Assuming that if login node is ubuntu, the computation ones + # are also ubuntu. env_vars = configure_ubuntu(env_vars) # Set SMP by default if student version is used. @@ -1412,8 +1471,15 @@ def launch_mapdl( LOG.debug(f"Using additional switches {args['additional_switches']}.") - start_parm = generate_start_parameters(args) + if args["running_on_hpc"] or args["launch_on_hpc"]: + env_vars.setdefault("ANS_MULTIPLE_NODES", "1") + env_vars.setdefault("HYDRA_BOOTSTRAP", "slurm") + ######################################## + # PyPIM connection + # ---------------- + # Delegating to PyPIM if applicable + # if _HAS_PIM and exec_file is None and pypim.is_configured(): # Start MAPDL with PyPIM if the environment is configured for it # and the user did not pass a directive on how to launch it. @@ -1423,9 +1489,7 @@ def launch_mapdl( cleanup_on_exit=args["cleanup_on_exit"], version=args["version"] ) - if args["ON_SLURM"]: - env_vars.setdefault("ANS_MULTIPLE_NODES", "1") - env_vars.setdefault("HYDRA_BOOTSTRAP", "slurm") + start_parm = generate_start_parameters(args) # Early exit for debugging. if args["_debug_no_launch"]: @@ -1440,6 +1504,7 @@ def launch_mapdl( LOG.debug( f"Connecting to an existing instance of MAPDL at {args['ip']}:{args['port']}" ) + start_parm["launched"] = False mapdl = MapdlGrpc( cleanup_on_exit=False, @@ -1472,43 +1537,90 @@ def launch_mapdl( lic_check = LicenseChecker(timeout=args["start_timeout"]) lic_check.start() - try: - LOG.debug("Starting MAPDL") - if args["mode"] == "console": - from ansys.mapdl.core.mapdl_console import MapdlConsole + LOG.debug("Starting MAPDL") + if args["mode"] == "console": # pragma: no cover + ######################################## + # Launch MAPDL on console mode + # ---------------------------- + # + from ansys.mapdl.core.mapdl_console import MapdlConsole - mapdl = MapdlConsole( - loglevel=args["loglevel"], - log_apdl=args["log_apdl"], - use_vtk=args["use_vtk"], - **start_parm, - ) + mapdl = MapdlConsole( + loglevel=args["loglevel"], + log_apdl=args["log_apdl"], + use_vtk=args["use_vtk"], + **start_parm, + ) - elif args["mode"] == "grpc": + elif args["mode"] == "grpc": + ######################################## + # Launch MAPDL with gRPC + # ---------------------- + # + cmd = generate_mapdl_launch_command( + exec_file=args["exec_file"], + jobname=args["jobname"], + nproc=args["nproc"], + ram=args["ram"], + port=args["port"], + additional_switches=args["additional_switches"], + ) - cmd = generate_mapdl_launch_command( - exec_file=args["exec_file"], - jobname=args["jobname"], - nproc=args["nproc"], - ram=args["ram"], - port=args["port"], - additional_switches=args["additional_switches"], + if args["launch_on_hpc"]: + # wrapping command if on HPC + cmd = generate_sbatch_command( + cmd, scheduler_options=args.get("scheduler_options") ) + try: + # process = launch_grpc( - cmd=cmd, run_location=args["run_location"], env_vars=env_vars + cmd=cmd, + run_location=args["run_location"], + env_vars=env_vars, + launch_on_hpc=args.get("launch_on_hpc"), ) - check_mapdl_launch( - process, args["run_location"], args["start_timeout"], cmd - ) + if args["launch_on_hpc"]: + start_parm["jobid"] = check_mapdl_launch_on_hpc(process, start_parm) + get_job_info(start_parm=start_parm, timeout=args["start_timeout"]) + else: + # Local mapdl launch check + check_mapdl_launch( + process, args["run_location"], args["start_timeout"], cmd + ) + + except Exception as exception: + LOG.error("An error occurred when launching MAPDL.") + + jobid: int = start_parm.get("jobid", "Not found") + + if ( + args["launch_on_hpc"] + and start_parm.get("finish_job_on_exit", True) + and jobid not in ["Not found", None] + ): + + LOG.debug(f"Killing HPC job with id: {jobid}") + kill_job(jobid) + + if args["license_server_check"]: + LOG.debug("Checking license server.") + lic_check.check() - if args["just_launch"]: - out = [args["ip"], args["port"]] - if hasattr(process, "pid"): - out += [process.pid] - return out + raise exception + if args["just_launch"]: + out = [args["ip"], args["port"]] + if hasattr(process, "pid"): + out += [process.pid] + return out + + ######################################## + # Connect to MAPDL using gRPC + # --------------------------- + # + try: mapdl = MapdlGrpc( cleanup_on_exit=args["cleanup_on_exit"], loglevel=args["loglevel"], @@ -1520,49 +1632,40 @@ def launch_mapdl( **start_parm, ) - # Setting launched property - mapdl._launched = True - mapdl._env_vars = env_vars - - except Exception as exception: - # Failed to launch for some reason. Check if failure was due - # to the license check - if args["license_server_check"]: - LOG.debug("Checking license server.") - lic_check.check() - - raise exception - - # Stopping license checker - if args["license_server_check"]: - LOG.debug("Stopping license server check.") - lic_check.is_connected = True + except Exception as exception: + LOG.error("An error occurred when connecting to MAPDL.") + raise exception return mapdl -def check_mode(mode: ALLOWABLE_MODES, version: ALLOWABLE_VERSION_INT): +def check_mode(mode: ALLOWABLE_MODES, version: Optional[int] = None): """Check if the MAPDL server mode matches the allowable version - If ``None``, the newest mode will be selected. + If :class:`None`, the newest mode will be selected. Returns a value from ``ALLOWABLE_MODES``. """ + if not mode and not version: + return "grpc" + if isinstance(mode, str): mode = mode.lower() if mode == "grpc": - if version < 211: + if version and version < 211: if version < 202 and os.name == "nt": raise VersionError( "gRPC mode requires MAPDL 2020R2 or newer " "on Windows." ) elif os.name == "posix": - raise VersionError("gRPC mode requires MAPDL 2021R1 or newer.") + raise VersionError( + "gRPC mode requires MAPDL 2021R1 or newer on Linux." + ) elif mode == "console": if os.name == "nt": raise ValueError("Console mode requires Linux.") - if version >= 211: + if version and version >= 211: warnings.warn( "Console mode not recommended in MAPDL 2021R1 or newer.\n" "Recommend using gRPC mode instead." @@ -1570,13 +1673,13 @@ def check_mode(mode: ALLOWABLE_MODES, version: ALLOWABLE_VERSION_INT): else: raise ValueError( f'Invalid MAPDL server mode "{mode}".\n\n' - f"Use one of the following modes:\n{ALLOWABLE_MODES}" + f"Use one of the following modes: {','.join(ALLOWABLE_MODES)}" ) else: # auto-select based on best version - if version >= 211: + if version and version >= 211: mode = "grpc" - elif version == 202 and os.name == "nt": + elif version and version == 202 and os.name == "nt": # Windows supports it as of 2020R2 mode = "grpc" else: @@ -1587,10 +1690,11 @@ def check_mode(mode: ALLOWABLE_MODES, version: ALLOWABLE_VERSION_INT): ) mode = "console" - if version < 130: + if version and version < 130: warnings.warn("MAPDL as a service has not been tested on MAPDL < v13") mode = "console" + LOG.debug(f"Using mode {mode}") return mode @@ -1781,7 +1885,8 @@ def get_value( SLURM_CPUS_PER_TASK = get_value("SLURM_CPUS_PER_TASK", kwargs) LOG.info(f"SLURM_CPUS_PER_TASK: {SLURM_CPUS_PER_TASK}") - # Set to value of the --ntasks option, if specified. See SLURM_NTASKS. Included for backwards compatibility. + # Set to value of the --ntasks option, if specified. See SLURM_NTASKS. + # Included for backwards compatibility. SLURM_NPROCS = get_value("SLURM_NPROCS", kwargs) LOG.info(f"SLURM_NPROCS: {SLURM_NPROCS}") @@ -1902,27 +2007,29 @@ def pack_arguments(locals_): "just_launch", locals_["kwargs"].get("just_launch", None) ) args["on_pool"] = locals_.get("on_pool", locals_["kwargs"].get("on_pool", None)) + args["_debug_no_launch"] = locals_.get( "_debug_no_launch", locals_["kwargs"].get("_debug_no_launch", None) ) + args.setdefault("launch_on_hpc", False) + args.setdefault("ip", None) return args -def is_on_slurm(args: Dict[str, Any]) -> bool: +def is_running_on_slurm(args: Dict[str, Any]) -> bool: + running_on_hpc_env_var = os.environ.get("PYMAPDL_RUNNING_ON_HPC", "True") - args["ON_SLURM"] = os.environ.get("PYMAPDL_ON_SLURM", "True") - - is_flag_false = args["ON_SLURM"].lower() == "false" + is_flag_false = running_on_hpc_env_var.lower() == "false" # Let's require the following env vars to exist to go into slurm mode. - args["ON_SLURM"] = bool( - args["detect_HPC"] + args["running_on_hpc"] = bool( + args["running_on_hpc"] and not is_flag_false # default is true and os.environ.get("SLURM_JOB_NAME") and os.environ.get("SLURM_JOB_ID") ) - return args["ON_SLURM"] + return args["running_on_hpc"] def generate_start_parameters(args: Dict[str, Any]) -> Dict[str, Any]: @@ -1961,6 +2068,8 @@ def generate_start_parameters(args: Dict[str, Any]) -> Dict[str, Any]: start_parm["override"] = args["override"] start_parm["timeout"] = args["start_timeout"] + start_parm["launched"] = True + LOG.debug(f"Using start parameters {start_parm}") return start_parm @@ -2088,6 +2197,7 @@ def get_port(port: Optional[int] = None, start_instance: Optional[bool] = None) def get_version( version: Optional[Union[str, int]] = None, exec_file: Optional[str] = None, + launch_on_hpc: bool = False, ) -> Optional[int]: """Get MAPDL version @@ -2110,6 +2220,14 @@ def get_version( version = os.getenv("PYMAPDL_MAPDL_VERSION") if not version: + # verify version + if exec_file and _HAS_ATP: + version = version_from_path("mapdl", exec_file, launch_on_hpc=launch_on_hpc) + if version and version < 202: + raise VersionError( + "The MAPDL gRPC interface requires MAPDL 20.2 or later" + ) + # Early exit return @@ -2217,8 +2335,7 @@ def get_exec_file(args: Dict[str, Any]) -> None: FileNotFoundError Invalid MAPDL executable """ - - args["exec_file"] = os.getenv("PYMAPDL_MAPDL_EXEC", args.get("exec_file")) + args["exec_file"] = args.get("exec_file") or os.getenv("PYMAPDL_MAPDL_EXEC") if not args["start_instance"] and args["exec_file"] is None: # 'exec_file' is not needed if the instance is not going to be launch @@ -2248,7 +2365,9 @@ def get_exec_file(args: Dict[str, Any]) -> None: "'exec_file' argument." ) else: # verify ansys exists at this location - if not os.path.isfile(args["exec_file"]): + if not args.get("launch_on_hpc", False) and not os.path.isfile( + args["exec_file"] + ): raise FileNotFoundError( f'Invalid MAPDL executable at "{args["exec_file"]}"\n' "Enter one manually using exec_file=" @@ -2317,7 +2436,7 @@ def check_kwargs(args: Dict[str, Any]): raise ValueError(f"The following arguments are not recognized: {ms_}") -def pre_check_args(args): +def pre_check_args(args: dict[str, Any]): if args["start_instance"] and args["ip"] and not args["on_pool"]: raise ValueError( "When providing a value for the argument 'ip', the argument " @@ -2330,6 +2449,24 @@ def pre_check_args(args): if args["exec_file"] and args["version"]: raise ValueError("Cannot specify both ``exec_file`` and ``version``.") + if args["launch_on_hpc"] and args["ip"]: + raise ValueError(LAUNCH_ON_HCP_ERROR_MESSAGE_IP) + + # Setting timeout + if args["start_timeout"] is None: + if args["launch_on_hpc"]: + args["start_timeout"] = 90 + else: + args["start_timeout"] = 45 + + # Raising warning + if args.get("scheduler_options") and args.get("nproc", None) is None: + raise ValueError( + "PyMAPDL does not read the number of cores from the 'scheduler_options'. " + "Hence you need to specify the number of cores you want to use using " + "the argument 'nproc' in 'launch_mapdl'." + ) + def get_cpus(args: Dict[str, Any]): """Get number of CPUs @@ -2348,7 +2485,7 @@ def get_cpus(args: Dict[str, Any]): # Bypassing number of processors checks because VDI/VNC might have # different number of processors than the cluster compute nodes. # Also the CPUs are set in `get_slurm_options` - if args["ON_SLURM"]: + if args["running_on_hpc"]: return # Setting number of processors @@ -2361,7 +2498,7 @@ def get_cpus(args: Dict[str, Any]): # Check the env var `PYMAPDL_NPROC` args["nproc"] = int(os.environ.get("PYMAPDL_NPROC", min_cpus)) - if machine_cores < int(args["nproc"]): + if not args.get("launch_on_hpc", False) and machine_cores < int(args["nproc"]): raise NotEnoughResources( f"The machine has {machine_cores} cores. PyMAPDL is asking for {args['nproc']} cores." ) @@ -2386,3 +2523,319 @@ def remove_err_files(run_location, jobname): f'"{run_location}"' ) raise error + + +def launch_mapdl_on_cluster( + nproc: int, + *, + scheduler_options: Union[str, Dict[str, str]] = None, + **launch_mapdl_args: Dict[str, Any], +) -> MapdlGrpc: + """Launch MAPDL on a HPC cluster + + Launches an interactive MAPDL instance on an HPC cluster. + + Parameters + ---------- + nproc : int + Number of CPUs to be used in the simulation. + + scheduler_options : Dict[str, str], optional + A string or dictionary specifying the job configuration for the + scheduler. For example ``scheduler_options = "-N 10"``. + + Returns + ------- + MapdlGrpc + Mapdl instance running on the HPC cluster. + + Examples + -------- + Run a job with 10 nodes and 2 tasks per node: + + >>> from ansys.mapdl.core import launch_mapdl + >>> scheduler_options = {"nodes": 10, "ntasks-per-node": 2} + >>> mapdl = launch_mapdl( + launch_on_hpc=True, + nproc=20, + scheduler_options=scheduler_options + ) + + Raises + ------ + ValueError + _description_ + ValueError + _description_ + ValueError + _description_ + """ + + # Processing the arguments + launch_mapdl_args["launch_on_hpc"] = True + + if launch_mapdl_args.get("mode", "grpc") != "grpc": + raise ValueError( + "The only mode allowed for launch MAPDL on an HPC cluster is gRPC." + ) + + if launch_mapdl_args.get("ip"): + raise ValueError(LAUNCH_ON_HCP_ERROR_MESSAGE_IP) + + if not launch_mapdl_args.get("start_instance", True): + raise ValueError( + "The 'start_instance' argument must be 'True' when launching on HPC." + ) + + return launch_mapdl( + nproc=nproc, + scheduler_options=scheduler_options, + **launch_mapdl_args, + ) + + +def get_hostname_host_cluster(job_id: int, timeout: int = 30) -> str: + options = f"show jobid -dd {job_id}" + LOG.debug(f"Executing the command 'scontrol {options}'") + + ready = False + time_start = time.time() + counter = 0 + while not ready: + proc = send_scontrol(options) + + stdout = proc.stdout.read().decode() + + if "JobState=RUNNING" not in stdout: + counter += 1 + time.sleep(1) + if (counter % 3 + 1) == 0: # print every 3 seconds. Skipping the first. + LOG.debug("The job is not ready yet. Waiting...") + print("The job is not ready yet. Waiting...") + else: + ready = True + break + + # Exit by raising exception + if time.time() > time_start + timeout: + state = get_state_from_scontrol(stdout) + + # Trying to get the hostname from the last valid message + try: + host = get_hostname_from_scontrol(stdout) + if not host: + # If string is empty, go to the exception clause. + raise IndexError() + + hostname_msg = f"The BatchHost for this job is '{host}'" + except (IndexError, AttributeError): + hostname_msg = "PyMAPDL couldn't get the BatchHost hostname" + + # Raising exception + raise MapdlDidNotStart( + f"The HPC job (id: {job_id}) didn't start on time (timeout={timeout}). " + f"The job state is '{state}'. " + f"{hostname_msg}. " + "You can check more information by issuing in your console:\n" + f" scontrol show jobid -dd {job_id}" + ) + + LOG.debug(f"The 'scontrol' command returned:\n{stdout}") + batchhost = get_hostname_from_scontrol(stdout) + LOG.debug(f"Batchhost: {batchhost}") + + # we should validate + batchhost_ip = socket.gethostbyname(batchhost) + LOG.debug(f"Batchhost IP: {batchhost_ip}") + + LOG.info( + f"Job {job_id} successfully allocated and running in '{batchhost}'({batchhost_ip})" + ) + return batchhost, batchhost_ip + + +def get_jobid(stdout: str) -> int: + """Extract the jobid from a command output""" + job_id = stdout.strip().split(" ")[-1] + + try: + job_id = int(job_id) + except ValueError: + LOG.error(f"The console output does not seems to have a valid jobid:\n{stdout}") + raise ValueError("PyMAPDL could not retrieve the job id.") + + LOG.debug(f"The job id is: {job_id}") + return job_id + + +def generate_sbatch_command( + cmd: Union[str, List[str]], scheduler_options: Optional[Union[str, Dict[str, str]]] +) -> List[str]: + """Generate sbatch command for a given MAPDL launch command.""" + + def add_minus(arg: str): + if not arg: + return "" + + arg = str(arg) + + if not arg.startswith("-"): + if len(arg) == 1: + arg = f"-{arg}" + else: + arg = f"--{arg}" + elif not arg.startswith("--") and len(arg) > 2: + # missing one "-" for a long argument + arg = f"-{arg}" + + return arg + + if scheduler_options: + if isinstance(scheduler_options, dict): + scheduler_options = " ".join( + [ + f"{add_minus(key)}='{value}'" + for key, value in scheduler_options.items() + ] + ) + else: + scheduler_options = "" + + if "wrap" in scheduler_options: + raise ValueError( + "The sbatch argument 'wrap' is used by PyMAPDL to submit the job." + "Hence you cannot use it as sbatch argument." + ) + LOG.debug(f"The additional sbatch arguments are: {scheduler_options}") + + if isinstance(cmd, list): + cmd = " ".join(cmd) + + cmd = ["sbatch", scheduler_options, "--wrap", f"'{cmd}'"] + cmd = [each for each in cmd if bool(each)] + return cmd + + +def get_hostname_from_scontrol(stdout: str) -> str: + return stdout.split("BatchHost=")[1].splitlines()[0].strip() + + +def get_state_from_scontrol(stdout: str) -> str: + return stdout.split("JobState=")[1].splitlines()[0].strip() + + +def check_mapdl_launch_on_hpc( + process: subprocess.Popen, start_parm: Dict[str, str] +) -> int: + """Check if the job is ready on the HPC + + Check if the job has been successfully submitted, and additionally, it does + retrieve the BathcHost hostname which is the IP to connect to using the gRPC + interface. + + Parameters + ---------- + process : subprocess.Popen + Process used to submit the job. The stdout is read from there. + start_parm : Dict[str, str] + To store the job ID, the BatchHost hostname and IP into. + + Returns + ------- + int : + The jobID + + Raises + ------ + MapdlDidNotStart + The job submission failed. + """ + stdout = process.stdout.read().decode() + if "Submitted batch job" not in stdout: + stderr = process.stderr.read().decode() + raise MapdlDidNotStart( + "PyMAPDL failed to submit the sbatch job:\n" + f"stdout:\n{stdout}\nstderr:\n{stderr}" + ) + + jobid = get_jobid(stdout) + LOG.info(f"HPC job successfully submitted. JobID: {jobid}") + return jobid + + +def get_job_info( + start_parm: Dict[str, str], jobid: Optional[int] = None, timeout: int = 30 +): + """Get job info like BatchHost IP and hostname + + Get BatchHost hostname and ip and stores them in the start_parm argument + + Parameters + ---------- + start_parm : Dict[str, str] + Starting parameters for MAPDL. + jobid : int + Job ID + timeout : int + Timeout for checking if the job is ready. Default checks for + 'start_instance' key in the 'start_parm' argument, if none + is found, it passes :class:`None` to + :func:`ansys.mapdl.core.launcher.get_hostname_host_cluster`. + """ + timeout = timeout or start_parm.get("start_instance") + + jobid = jobid or start_parm["jobid"] + + batch_host, batch_ip = get_hostname_host_cluster(jobid, timeout=timeout) + + start_parm["ip"] = batch_ip + start_parm["hostname"] = batch_host + start_parm["jobid"] = jobid + + +def kill_job(jobid: int): + """Kill SLURM job""" + submitter(["scancel", str(jobid)]) + + +def send_scontrol(args: str): + cmd = f"scontrol {args}".split(" ") + return submitter(cmd) + + +def submitter( + cmd: Union[str, List[str]], + *, + executable: str = None, + shell: bool = False, + cwd: str = None, + stdin: subprocess.PIPE = None, + stdout: subprocess.PIPE = None, + stderr: subprocess.PIPE = None, + env_vars: dict[str, str] = None, +): + + if executable: + if isinstance(cmd, list): + cmd = [executable] + cmd + else: + cmd = [executable, cmd] + + if not stdin: + stdin = subprocess.DEVNULL + if not stdout: + stdout = subprocess.PIPE + if not stderr: + stderr = subprocess.PIPE + + # cmd is controlled by the library with generate_mapdl_launch_command. + # Excluding bandit check. + return subprocess.Popen( + args=cmd, + shell=shell, # sbatch does not work without shell. + cwd=cwd, + stdin=stdin, + stdout=stdout, + stderr=stderr, + env=env_vars, + ) # nosec B603 B607 diff --git a/src/ansys/mapdl/core/mapdl_core.py b/src/ansys/mapdl/core/mapdl_core.py index f9664ad6a8..2a59332b9a 100644 --- a/src/ansys/mapdl/core/mapdl_core.py +++ b/src/ansys/mapdl/core/mapdl_core.py @@ -169,9 +169,15 @@ _ALLOWED_START_PARM = [ "additional_switches", "check_parameter_names", + "env_vars", + "launched", "exec_file", + "finish_job_on_exit", + "hostname", "ip", + "jobid", "jobname", + "launch_on_hpc", "nproc", "override", "port", @@ -179,6 +185,7 @@ "process", "ram", "run_location", + "start_instance", "start_timeout", "timeout", ] @@ -247,7 +254,7 @@ def __init__( self._response = None self._mode = None self._mapdl_process = None - self._launched: bool = False + self._launched: bool = start_parm.get("launched", False) self._stderr = None self._stdout = None self._file_type_for_plots = file_type_for_plots @@ -524,6 +531,7 @@ def directory(self) -> str: def directory(self, path: Union[str, pathlib.Path]) -> None: """Change the directory using ``Mapdl.cwd``""" self.cwd(path) + self._path = path @property def exited(self): diff --git a/src/ansys/mapdl/core/mapdl_extended.py b/src/ansys/mapdl/core/mapdl_extended.py index fa8ce48de9..94f2808fd4 100644 --- a/src/ansys/mapdl/core/mapdl_extended.py +++ b/src/ansys/mapdl/core/mapdl_extended.py @@ -1422,6 +1422,7 @@ def inquire(self, strarray="", func="", arg1="", arg2="", **kwargs): raise ValueError( f"The arguments (strarray='{strarray}', func='{func}') are not valid." ) + response = "" n_try = 3 i_try = 0 diff --git a/src/ansys/mapdl/core/mapdl_grpc.py b/src/ansys/mapdl/core/mapdl_grpc.py index 9e4158defa..3e80ea3e86 100644 --- a/src/ansys/mapdl/core/mapdl_grpc.py +++ b/src/ansys/mapdl/core/mapdl_grpc.py @@ -31,14 +31,15 @@ import pathlib import re import shutil +import socket # Subprocess is needed to start the backend. But # the input is controlled by the library. Excluding bandit check. -from subprocess import Popen # nosec B404 +import subprocess # nosec B404 import tempfile import threading import time -from typing import TYPE_CHECKING, List, Literal, Optional, Tuple, Union +from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, Tuple, Union from uuid import uuid4 from warnings import warn import weakref @@ -87,6 +88,7 @@ from ansys.mapdl.core.misc import ( check_valid_ip, last_created, + only_numbers_and_dots, random_string, run_as_prep7, supress_logging, @@ -137,6 +139,13 @@ } +def get_start_instance(*args, **kwargs) -> bool: + """Wraps get_start_instance to avoid circular imports""" + from ansys.mapdl.core.launcher import get_start_instance + + return get_start_instance(*args, **kwargs) + + def chunk_raw(raw, save_as): with io.BytesIO(raw) as f: while True: @@ -359,19 +368,32 @@ def __init__( "If `channel` is specified, neither `port` nor `ip` can be specified." ) if ip is None: - ip = "127.0.0.1" + ip = start_parm.pop("ip", None) or "127.0.0.1" - # port and ip are needed to setup the log + # setting hostname + if not only_numbers_and_dots(ip): + # it is a hostname + self._hostname = ip + ip = socket.gethostbyname(ip) + else: + # it is an IP + self._hostname = ( + "localhost" + if ip in ["127.0.0.1", "127.0.1.1", "localhost"] + else socket.gethostbyaddr(ip)[0] + ) + check_valid_ip(ip) + self._ip: str = ip + + # port and ip are needed to setup the log if port is None: from ansys.mapdl.core.launcher import MAPDL_DEFAULT_PORT port = MAPDL_DEFAULT_PORT self._port: int = int(port) - - check_valid_ip(ip) - self._ip: str = ip + start_parm["port"] = self._port # store for `open_gui` super().__init__( loglevel=loglevel, @@ -392,11 +414,13 @@ def __init__( self._cleanup: bool = cleanup_on_exit self.remove_temp_dir_on_exit: bool = remove_temp_dir_on_exit self._jobname: str = start_parm.get("jobname", "file") - self._path: str = start_parm.get("run_location", None) + self._path: Optional[str] = start_parm.get("run_location", None) + self._start_instance: Optional[str] = ( + start_parm.get("start_instance") or get_start_instance() + ) self._busy: bool = False # used to check if running a command on the server - self._local: bool = ip in ["127.0.0.1", "127.0.1.1", "localhost"] - if "local" in start_parm: # pragma: no cover # allow this to be overridden - self._local: bool = start_parm["local"] + self._local: bool = start_parm.get("local", True) + self._launched: bool = start_parm.get("launched", True) self._health_response_queue: Optional["Queue"] = None self._exiting: bool = False self._exited: Optional[bool] = None @@ -404,6 +428,7 @@ def __init__( self.__server_version: Optional[str] = None self._state: Optional[grpc.Future] = None self._timeout: int = timeout + self._env_vars: Dict[str, str] = start_parm.get("env_vars", {}) self._pids: List[Union[int, None]] = [] self._channel_state: grpc.ChannelConnectivity = ( grpc.ChannelConnectivity.CONNECTING @@ -420,15 +445,18 @@ def __init__( self._subscribe_to_channel() # connect and validate to the channel - self._mapdl_process: Popen = start_parm.pop("process", None) + self._mapdl_process: subprocess.Popen = start_parm.pop("process", None) # saving for later use (for example open_gui) - start_parm["ip"] = ip - start_parm["port"] = port - self._start_parm = start_parm + self._start_parm: Dict[str, Any] = start_parm + + # Storing HPC related stuff + self._jobid: int = start_parm.get("jobid") + self._mapdl_on_hpc: bool = bool(self._jobid) + self.finish_job_on_exit: bool = start_parm.get("finish_job_on_exit", True) # Queueing the stds - if self._mapdl_process: + if not self._mapdl_on_hpc and self._mapdl_process: self._create_process_stds_queue() try: @@ -980,25 +1008,42 @@ def _run(self, cmd: str, verbose: bool = False, mute: Optional[bool] = None) -> return response.strip() @property - def busy(self): + def busy(self) -> bool: """True when MAPDL gRPC server is executing a command.""" return self._busy @property - def exiting(self): + def exiting(self) -> bool: """Returns true if the MAPDL instance is exiting.""" return self._exiting @property - def port(self): + def port(self) -> int: """Returns the MAPDL gRPC instance port.""" return self._port @property - def ip(self): + def ip(self) -> str: """Return the MAPDL gRPC instance IP.""" return self._ip + @property + def hostname(self) -> str: + """Return the hostname of the machine MAPDL is running in.""" + return self._hostname + + @property + def jobid(self) -> int: + """Returns the job id where the MAPDL is running in. + This is only applicable if MAPDL is running on an HPC cluster.""" + return self._jobid + + @property + def mapdl_on_hpc(self) -> bool: + """Returns :class:`True` if the MAPDL instance has been launched using + an scheduler.""" + return self._mapdl_on_hpc + @protect_grpc def _send_command(self, cmd: str, mute: bool = False) -> Optional[str]: """Send a MAPDL command and return the response as a string""" @@ -1046,7 +1091,10 @@ def _threaded_heartbeat(self): except Exception: self._log.debug("Checking if MAPDL instance is still alive.") + # Placing logging in the exit method raises exceptions when + # this is triggered by "__del__" @protect_from(ValueError, "I/O operation on closed file.") + @protect_from(AttributeError, "'MapdlGrpc' object has no attribute '_log'") def exit(self, save=False, force=False, **kwargs): """Exit MAPDL. @@ -1069,16 +1117,20 @@ def exit(self, save=False, force=False, **kwargs): >>> mapdl.exit() """ # check if permitted to start (and hence exit) instances - self._log.debug( - f"Exiting MAPLD gRPC instance {self.ip}:{self.port} on '{self._path}'." - ) + from ansys.mapdl import core as pymapdl - mapdl_path = self.directory # caching + if hasattr(self, "_log"): + self._log.debug( + f"Exiting MAPLD gRPC instance {self.ip}:{self.port} on '{self._path}'." + ) + + mapdl_path = self._path # using cached version if self._exited is None: self._log.debug("'self._exited' is none.") - return # Some edge cases the class object is not completely initialized but the __del__ method - # is called when exiting python. So, early exit here instead an error in the following - # self.directory command. + return # Some edge cases the class object is not completely + # initialized but the __del__ method + # is called when exiting python. So, early exit here instead an + # error in the following self.directory command. # See issue #1796 elif self._exited: # Already exited. @@ -1090,46 +1142,54 @@ def exit(self, save=False, force=False, **kwargs): self.save() if not force: - # lazy import here to avoid circular import - from ansys.mapdl.core.launcher import get_start_instance - # ignore this method if PYMAPDL_START_INSTANCE=False - if not get_start_instance(): + if not self._start_instance: self._log.info("Ignoring exit due to PYMAPDL_START_INSTANCE=False") return - # or building the gallery - from ansys.mapdl import core as pymapdl + # or building the gallery if pymapdl.BUILDING_GALLERY: self._log.info("Ignoring exit due as BUILDING_GALLERY=True") return - self._exiting = True - - if not kwargs.pop("fake_exit", False): - # This cannot/should not be faked - if self._local: - self._cache_pids() # Recache processes - - if os.name == "nt": - self._kill_server() - self._close_process() - self._remove_lock_file(mapdl_path) - else: - self._kill_server() + # Actually exiting MAPDL instance + if self.finish_job_on_exit: + self._exiting = True + self._exit_mapdl(path=mapdl_path) + self._exited = True - self._exited = True - self._exiting = False + # Exiting HPC job + if self._mapdl_on_hpc: + self.kill_job(self.jobid) + if hasattr(self, "_log"): + self._log.debug(f"Job (id: {self.jobid}) has been cancel.") + # Exiting remote instances if self._remote_instance: # pragma: no cover # No cover: The CI is working with a single MAPDL instance self._remote_instance.delete() + self._exiting = False + + # Post-kill tasks self._remove_temp_dir_on_exit(mapdl_path) if self._local and self._port in pymapdl._LOCAL_PORTS: pymapdl._LOCAL_PORTS.remove(self._port) + def _exit_mapdl(self, path: str = None) -> None: + """Exit MAPDL and remove the lock file in `path`""" + # This cannot/should not be faked + if self._local: + self._cache_pids() # Recache processes + + if os.name == "nt": + self._kill_server() + self._close_process() + self._remove_lock_file(path) + else: + self._kill_server() + def _remove_temp_dir_on_exit(self, path=None): """Removes the temporary directory created by the launcher. @@ -1165,16 +1225,8 @@ def _kill_server(self): """ if self._exited: - self._log.debug("MAPDL server already exited") return - try: - self._log.debug("Killing MAPDL server") - except ValueError: - # It might throw ValueError: I/O operation on closed file. - # if the logger already exited. - pass - if ( self._version and self._version >= 24.2 ): # We can't use the non-cached version because of recursion error. @@ -2897,9 +2949,14 @@ def __str__(self): en = stats.find("*** PrePro") product = "\n".join(stats[st:en].splitlines()[1:]).strip() - info = f"Product: {product}\n" + info = "Mapdl\n" + info += "-----\n" + info += f"PyMAPDL Version: {__version__}\n" + info += "Interface: grpc\n" + info += f"Product: {product}\n" info += f"MAPDL Version: {self.version}\n" - info += f"ansys.mapdl Version: {__version__}\n" + info += f"Running on: {self.hostname}\n" + info += f" ({self.ip})" return info @supress_logging @@ -3700,3 +3757,30 @@ def get_file_name(path): shutil.copy(file_name, target_dir) return os.path.basename(target_dir) + + def kill_job(self, jobid: int) -> None: + cmd = ["scancel", f"{jobid}"] + # to ensure the job is stopped properly, let's issue the scancel twice. + subprocess.Popen(cmd) + + def __del__(self): + """In case the object is deleted""" + # We are just going to escape early if needed, and kill the HPC job. + # The garbage collector remove attributes before we can evaluate this. + try: + # Exiting HPC job + if ( + hasattr(self, "_mapdl_on_hpc") + and self._mapdl_on_hpc + and hasattr(self, "finish_job_on_exit") + and self.finish_job_on_exit + ): + + self.kill_job(self.jobid) + + if not self._start_instance: + return + + except Exception as e: + # This is on clean up. + pass diff --git a/src/ansys/mapdl/core/misc.py b/src/ansys/mapdl/core/misc.py index e909b0d6a5..138eb23522 100644 --- a/src/ansys/mapdl/core/misc.py +++ b/src/ansys/mapdl/core/misc.py @@ -28,6 +28,7 @@ import os from pathlib import Path import platform +import re import socket import string import tempfile @@ -599,3 +600,18 @@ def get_active_branch_name(): kind = f"release/{'.'.join(pymapdl.__version__.split('.')[:2])}" return kind + + +def only_numbers_and_dots(s): + return bool(re.fullmatch(r"[0-9.]+", s)) + + +def stack(*decorators): + """Stack multiple decorators on top of each other""" + + def deco(f): + for dec in reversed(decorators): + f = dec(f) + return f + + return deco diff --git a/tests/conftest.py b/tests/conftest.py index 703bd3a36d..384bd31495 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -28,6 +28,7 @@ import subprocess from sys import platform import time +from unittest.mock import patch from _pytest.terminal import TerminalReporter # for terminal customization import psutil @@ -437,9 +438,12 @@ def run_before_and_after_tests( yield # this is where the testing happens + # Check resetting state assert prev == mapdl.is_local - assert not mapdl.exited - assert not mapdl.ignore_errors + assert not mapdl.exited, "MAPDL is exited after the test. It should have not!" + assert not mapdl._mapdl_on_hpc, "Mapdl class is on HPC mode. It should not!" + assert mapdl.finish_job_on_exit, "Mapdl class should finish the job!" + assert not mapdl.ignore_errors, "Mapdl class is ignoring errors!" make_sure_not_instances_are_left_open() @@ -623,6 +627,7 @@ def mapdl(request, tmpdir_factory): if START_INSTANCE: mapdl._local = True mapdl._exited = False + assert mapdl.finish_job_on_exit mapdl.exit(save=True, force=True) assert mapdl._exited assert "MAPDL exited" in str(mapdl) @@ -638,6 +643,9 @@ def mapdl(request, tmpdir_factory): with pytest.raises(MapdlExitedError): mapdl._send_command_stream("/PREP7") + # Delete Mapdl object + del mapdl + SpacedPaths = namedtuple( "SpacedPaths", @@ -645,6 +653,40 @@ def mapdl(request, tmpdir_factory): ) +# Necessary patches to patch Mapdl launch +def _returns(return_=None): + return lambda *args, **kwargs: return_ + + +# Methods to patch in MAPDL when launching +def _patch_method(method): + return "ansys.mapdl.core.mapdl_grpc.MapdlGrpc." + method + + +_meth_patch_MAPDL_launch = ( + # method, and its return + (_patch_method("_connect"), _returns(True)), + (_patch_method("_run"), _returns("")), + (_patch_method("_create_channel"), _returns("")), + (_patch_method("inquire"), _returns("/home/simulation")), + (_patch_method("_subscribe_to_channel"), _returns("")), + (_patch_method("_run_at_connect"), _returns("")), + (_patch_method("_exit_mapdl"), _returns(None)), + # non-mapdl methods + ("socket.gethostbyname", _returns("123.45.67.99")), + ( + "socket.gethostbyaddr", + _returns( + [ + "mapdlhostname", + ] + ), + ), +) + +PATCH_MAPDL_START = [patch(method, ret) for method, ret in _meth_patch_MAPDL_launch] + + @pytest.fixture(scope="function") def set_env_var(request, monkeypatch): """Set an environment variable from given requests, this fixture must be used with `parametrize`""" diff --git a/tests/test_launcher.py b/tests/test_launcher.py index b0823533be..ea86036cf8 100644 --- a/tests/test_launcher.py +++ b/tests/test_launcher.py @@ -25,6 +25,7 @@ import os import subprocess import tempfile +from time import sleep from unittest.mock import patch import warnings @@ -33,33 +34,54 @@ from ansys.mapdl import core as pymapdl from ansys.mapdl.core.errors import ( + MapdlDidNotStart, NotEnoughResources, PortAlreadyInUseByAnMAPDLInstance, + VersionError, ) from ansys.mapdl.core.launcher import ( _HAS_ATP, LOCALHOST, _is_ubuntu, _parse_ip_route, + check_mapdl_launch_on_hpc, + check_mode, force_smp_in_student, generate_mapdl_launch_command, + generate_sbatch_command, generate_start_parameters, get_cpus, get_exec_file, + get_hostname_host_cluster, + get_ip, + get_jobid, + get_port, get_run_location, get_slurm_options, get_start_instance, get_version, - is_on_slurm, + is_running_on_slurm, + kill_job, launch_grpc, launch_mapdl, + launch_mapdl_on_cluster, remove_err_files, + send_scontrol, set_license_switch, set_MPI_additional_switches, + submitter, update_env_vars, ) from ansys.mapdl.core.licensing import LICENSES -from conftest import ON_LOCAL, QUICK_LAUNCH_SWITCHES, NullContext, requires +from ansys.mapdl.core.misc import stack +from conftest import ( + ON_LOCAL, + PATCH_MAPDL_START, + QUICK_LAUNCH_SWITCHES, + TESTING_MINIMAL, + NullContext, + requires, +) try: from ansys.tools.path import ( @@ -92,6 +114,27 @@ start_timeout = 30 # Seconds +def get_fake_process(message_stdout, message_stderr="", time_sleep=0): + class stdout: + def read(self): + return message_stdout.encode() + + class stderr: + def read(self): + return message_stderr.encode() + + class myprocess: + pass + + process = myprocess() + process.stdout = stdout() + process.stderr = stderr() + + sleep(time_sleep) + + return process + + @pytest.fixture def fake_local_mapdl(mapdl): """Fixture to execute asserts before and after a test is run""" @@ -210,6 +253,7 @@ def test_license_type_additional_switch(mapdl, license_name): assert f"-p {license_name}" in args["additional_switches"] +@stack(*PATCH_MAPDL_START) @requires("ansys-tools-path") def test_license_type_dummy(mapdl): dummy_license_type = "dummy" @@ -218,7 +262,7 @@ def test_license_type_dummy(mapdl): match="Still PyMAPDL will try to use it but in older MAPDL versions you might experience", ): launch_mapdl( - start_instance=False, + start_instance=True, port=mapdl.port + 1, additional_switches=f" -p {dummy_license_type} " + QUICK_LAUNCH_SWITCHES, start_timeout=start_timeout, @@ -696,17 +740,17 @@ def test_slurm_ram(monkeypatch, ram, expected, context): @pytest.mark.parametrize("slurm_env_var", ["True", "false", ""]) @pytest.mark.parametrize("slurm_job_name", ["True", "false", ""]) @pytest.mark.parametrize("slurm_job_id", ["True", "false", ""]) -@pytest.mark.parametrize("detect_HPC", [True, False, None]) -def test_is_on_slurm( - monkeypatch, slurm_env_var, slurm_job_name, slurm_job_id, detect_HPC +@pytest.mark.parametrize("running_on_hpc", [True, False, None]) +def test_is_running_on_slurm( + monkeypatch, slurm_env_var, slurm_job_name, slurm_job_id, running_on_hpc ): - monkeypatch.setenv("PYMAPDL_ON_SLURM", slurm_env_var) + monkeypatch.setenv("PYMAPDL_RUNNING_ON_HPC", slurm_env_var) monkeypatch.setenv("SLURM_JOB_NAME", slurm_job_name) monkeypatch.setenv("SLURM_JOB_ID", slurm_job_id) - flag = is_on_slurm(args={"detect_HPC": detect_HPC}) + flag = is_running_on_slurm(args={"running_on_hpc": running_on_hpc}) - if detect_HPC is not True: + if running_on_hpc is not True: assert not flag else: @@ -722,9 +766,9 @@ def test_is_on_slurm( if ON_LOCAL: assert ( launch_mapdl( - detect_HPC=detect_HPC, + running_on_hpc=running_on_hpc, _debug_no_launch=True, - )["ON_SLURM"] + )["running_on_hpc"] == flag ) @@ -882,32 +926,8 @@ def test_ip_and_start_instance( assert options["ip"] in (LOCALHOST, "0.0.0.0", "127.0.0.1") -def mycpucount(**kwargs): - return 10 # faking 10 cores - - -@patch("psutil.cpu_count", mycpucount) -def test_nproc_envvar(monkeypatch): - monkeypatch.setenv("PYMAPDL_NPROC", 10) - args = launch_mapdl(_debug_no_launch=True) - assert args["nproc"] == 10 - - -@pytest.mark.parametrize("nproc", [None, 5, 9, 15]) -@patch("psutil.cpu_count", mycpucount) -def test_nproc(monkeypatch, nproc): - monkeypatch.delenv("PYMAPDL_START_INSTANCE", False) - - if nproc and nproc > mycpucount(): - with pytest.raises(NotEnoughResources): - launch_mapdl(nproc=nproc, _debug_no_launch=True) - else: - args = launch_mapdl(nproc=nproc, _debug_no_launch=True) - assert args["nproc"] == (nproc or 2) - - @patch("os.name", "nt") -@patch("psutil.cpu_count", mycpucount) +@patch("psutil.cpu_count", lambda *args, **kwargs: 10) def test_generate_mapdl_launch_command_windows(): assert os.name == "nt" # Checking mocking is properly done @@ -928,10 +948,26 @@ def test_generate_mapdl_launch_command_windows(): ) assert isinstance(cmd, list) - assert all([isinstance(each, str) for each in cmd]) + + assert f'"{exec_file}"' in cmd + assert "-j" in cmd + assert f"{jobname}" in cmd + assert "-port" in cmd + assert f"{port}" in cmd + assert "-m" in cmd + assert f"{ram*1024}" in cmd + assert "-np" in cmd + assert f"{nproc}" in cmd + assert "-grpc" in cmd + assert f"{additional_switches}" in cmd + assert "-b" in cmd + assert "-i" in cmd + assert ".__tmp__.inp" in cmd + assert "-o" in cmd + assert ".__tmp__.out" in cmd cmd = " ".join(cmd) - assert f'"{exec_file}" ' in cmd + assert f'"{exec_file}"' in cmd assert f" -j {jobname} " in cmd assert f" -port {port} " in cmd assert f" -m {ram*1024} " in cmd @@ -963,6 +999,26 @@ def test_generate_mapdl_launch_command_linux(): assert isinstance(cmd, list) assert all([isinstance(each, str) for each in cmd]) + assert isinstance(cmd, list) + + assert f"{exec_file}" in cmd + assert "-j" in cmd + assert f"{jobname}" in cmd + assert "-port" in cmd + assert f"{port}" in cmd + assert "-m" in cmd + assert f"{ram*1024}" in cmd + assert "-np" in cmd + assert f"{nproc}" in cmd + assert "-grpc" in cmd + assert f"{additional_switches}" in cmd + + assert "-b" not in cmd + assert "-i" not in cmd + assert ".__tmp__.inp" not in cmd + assert "-o" not in cmd + assert ".__tmp__.out" not in cmd + cmd = " ".join(cmd) assert f"{exec_file} " in cmd assert f" -j {jobname} " in cmd @@ -1069,6 +1125,14 @@ def test_get_run_location_no_access(tmpdir): {"exec_file": True, "version": True}, "Cannot specify both ``exec_file`` and ``version``.", ], + [ + {"scheduler_options": True}, + "PyMAPDL does not read the number of cores from the 'scheduler_options'.", + ], + [ + {"launch_on_hpc": True, "ip": "111.22.33.44"}, + "PyMAPDL cannot ensure a specific IP will be used when launching", + ], ], ) def test_pre_check_args(args, match): @@ -1107,30 +1171,35 @@ def test_remove_err_files_fail(tmpdir): # testing on windows to account for temp file -def fake_subprocess_open(*args, **kwargs): - kwargs["cmd"] = args[0] - return kwargs - - @patch("os.name", "nt") -@patch("subprocess.Popen", fake_subprocess_open) -def test_launch_grpc(tmpdir): - cmd = "ansys.exe -b -i my_input.inp -o my_output.inp".split() +@pytest.mark.parametrize("launch_on_hpc", [None, False, True]) +@patch("subprocess.Popen", lambda *args, **kwargs: kwargs) +def test_launch_grpc(tmpdir, launch_on_hpc): + if launch_on_hpc: + cmd = ["sbatch", "--wrap", "'ansys.exe -b -i my_input.inp -o my_output.inp'"] + else: + cmd = "ansys.exe -b -i my_input.inp -o my_output.inp".split(" ") run_location = str(tmpdir) - kwags = launch_grpc(cmd, run_location) + kwargs = launch_grpc(cmd, run_location, launch_on_hpc=launch_on_hpc) inp_file = os.path.join(run_location, "my_input.inp") - assert os.path.exists(inp_file) - with open(inp_file, "r") as fid: - assert "FINISH" in fid.read() + if launch_on_hpc: + assert "sbatch" in kwargs["args"] + assert "--wrap" in kwargs["args"] + assert " ".join(cmd) == kwargs["args"] + else: + assert cmd == kwargs["args"] + assert os.path.exists(inp_file) + with open(inp_file, "r") as fid: + assert "FINISH" in fid.read() - assert cmd == kwags["cmd"] - assert "TRUE" == kwags["env"].pop("ANS_CMD_NODIAG") - assert not kwags["env"] - assert isinstance(kwags["stdin"], type(subprocess.DEVNULL)) - assert isinstance(kwags["stdout"], type(subprocess.PIPE)) - assert isinstance(kwags["stderr"], type(subprocess.PIPE)) + assert not kwargs["shell"] + assert "TRUE" == kwargs["env"].pop("ANS_CMD_NODIAG") + assert not kwargs["env"] + assert isinstance(kwargs["stdin"], type(subprocess.DEVNULL)) + assert isinstance(kwargs["stdout"], type(subprocess.PIPE)) + assert isinstance(kwargs["stderr"], type(subprocess.PIPE)) @patch("psutil.cpu_count", lambda *args, **kwags: 5) @@ -1146,7 +1215,7 @@ def test_get_cpus(monkeypatch, arg, env): if (arg and arg > cores_machine) or (arg is None and env and env > cores_machine): context = pytest.raises(NotEnoughResources) - args = {"nproc": arg, "ON_SLURM": False} + args = {"nproc": arg, "running_on_hpc": False} with context: get_cpus(args) @@ -1160,6 +1229,667 @@ def test_get_cpus(monkeypatch, arg, env): @patch("psutil.cpu_count", lambda *args, **kwags: 1) def test_get_cpus_min(): - args = {"nproc": None, "ON_SLURM": False} + args = {"nproc": None, "running_on_hpc": False} get_cpus(args) assert args["nproc"] == 1 + + +@pytest.mark.parametrize( + "scheduler_options", + [None, "-N 10", {"N": 10, "nodes": 10, "-tasks": 3, "--ntask-per-node": 2}], +) +def test_generate_sbatch_command(scheduler_options): + cmd = [ + "/ansys_inc/v242/ansys/bin/ansys242", + "-j", + "myjob", + "-np", + "10", + "-m", + "1024", + "-port", + "50052", + "-my_add=switch", + ] + + cmd_post = generate_sbatch_command(cmd, scheduler_options) + + assert cmd_post[0] == "sbatch" + if scheduler_options: + if isinstance(scheduler_options, dict): + assert ( + cmd_post[1] == "-N='10' --nodes='10' --tasks='3' --ntask-per-node='2'" + ) + else: + assert cmd_post[1] == scheduler_options + + assert cmd_post[-2] == "--wrap" + assert cmd_post[-1] == f"""'{" ".join(cmd)}'""" + + +@pytest.mark.parametrize( + "scheduler_options", + [None, "--wrap '/bin/bash", {"--wrap": "/bin/bash", "nodes": 10}], +) +def test_generate_sbatch_wrap_in_arg(scheduler_options): + cmd = ["/ansys_inc/v242/ansys/bin/ansys242", "-grpc"] + if scheduler_options: + context = pytest.raises( + ValueError, + match="The sbatch argument 'wrap' is used by PyMAPDL to submit the job.", + ) + else: + context = NullContext() + + with context: + cmd_post = generate_sbatch_command(cmd, scheduler_options) + assert cmd[0] in cmd_post[-1] + + +def myfakegethostbyname(*args, **kwargs): + return "mycoolhostname" + + +def myfakegethostbynameIP(*args, **kwargs): + return "123.45.67.89" + + +@pytest.mark.parametrize( + "message_stdout, message_stderr", + [ + ["Submitted batch job 1001", ""], + ["Submission failed", "Something very bad happened"], + ], +) +@patch("socket.gethostbyname", myfakegethostbynameIP) +@patch("ansys.mapdl.core.launcher.get_hostname_host_cluster", myfakegethostbyname) +def test_check_mapdl_launch_on_hpc(message_stdout, message_stderr): + + process = get_fake_process(message_stdout, message_stderr) + + start_parm = {} + if "Submitted batch job" in message_stdout: + context = NullContext() + + else: + context = pytest.raises( + MapdlDidNotStart, + match=f"stdout:\n{message_stdout}\nstderr:\n{message_stderr}", + ) + + with context: + assert check_mapdl_launch_on_hpc(process, start_parm) == 1001 + + +@patch("ansys.mapdl.core.Mapdl._exit_mapdl", lambda *args, **kwargs: None) +@patch("ansys.mapdl.core.mapdl_grpc.MapdlGrpc.kill_job") +def test_exit_job(mock_popen, mapdl): + # Setting to exit + mapdl._mapdl_on_hpc = True + mapdl.finish_job_on_exit = True + prev_rem = mapdl.remove_temp_dir_on_exit + mapdl.remove_temp_dir_on_exit = False + + mock_popen.return_value = lambda *args, **kwargs: True + + mapdl._jobid = 1001 + assert mapdl.jobid == 1001 + + mapdl.exit(force=True) + + # Returning to state + mapdl._jobid = None + mapdl._exited = False + mapdl._mapdl_on_hpc = False + mapdl.finish_job_on_exit = True + mapdl.remove_temp_dir_on_exit = prev_rem + + # Checking + mock_popen.assert_called_once_with(1001) + + +@requires("ansys-tools-path") +@patch( + "ansys.tools.path.path._get_application_path", + lambda *args, **kwargs: "path/to/mapdl/executable", +) +@patch("ansys.tools.path.path._mapdl_version_from_path", lambda *args, **kwargs: 242) +@stack(*PATCH_MAPDL_START) +@patch("ansys.mapdl.core.launcher.launch_grpc") +@patch("ansys.mapdl.core.mapdl_grpc.MapdlGrpc.kill_job") +@patch("ansys.mapdl.core.launcher.send_scontrol") +def test_launch_on_hpc_found_ansys(mck_ssctrl, mck_del, mck_launch_grpc, monkeypatch): + monkeypatch.delenv("PYMAPDL_START_INSTANCE", False) + + mck_launch_grpc.return_value = get_fake_process("Submitted batch job 1001") + mck_ssctrl.return_value = get_fake_process( + "a long scontrol...\nJobState=RUNNING\n...\nBatchHost=myhostname\n...\nin message" + ) + + mapdl_a = launch_mapdl( + launch_on_hpc=True, + ) + mapdl_a.exit() + + mck_launch_grpc.assert_called_once() + cmd = mck_launch_grpc.call_args_list[0][1]["cmd"] + env_vars = mck_launch_grpc.call_args_list[0][1]["env_vars"] + + assert "sbatch" in cmd + assert "--wrap" in cmd + assert "path/to/mapdl/executable" in cmd[-1] + assert "-grpc" in cmd[-1] + + assert env_vars.get("ANS_MULTIPLE_NODES") == "1" + assert env_vars.get("HYDRA_BOOTSTRAP") == "slurm" + + mck_ssctrl.assert_called_once() + assert "show" in mck_ssctrl.call_args[0][0] + assert "1001" in mck_ssctrl.call_args[0][0] + + mck_del.assert_called_once() + + +@stack(*PATCH_MAPDL_START) +@patch("ansys.mapdl.core.mapdl_grpc.MapdlGrpc.kill_job") +@patch("ansys.mapdl.core.launcher.launch_grpc") +@patch("ansys.mapdl.core.launcher.send_scontrol") +def test_launch_on_hpc_not_found_ansys(mck_sc, mck_lgrpc, mck_kj, monkeypatch): + monkeypatch.delenv("PYMAPDL_START_INSTANCE", False) + exec_file = "path/to/mapdl/v242/executable/ansys242" + + mck_lgrpc.return_value = get_fake_process("Submitted batch job 1001") + mck_kj.return_value = None + mck_sc.return_value = get_fake_process( + "a long scontrol...\nJobState=RUNNING\n...\nBatchHost=myhostname\n...\nin message" + ) + + if TESTING_MINIMAL: + context = NullContext() + else: + context = pytest.warns( + UserWarning, match="PyMAPDL could not find the ANSYS executable." + ) + + with context: + mapdl = launch_mapdl( + launch_on_hpc=True, + exec_file=exec_file, + ) + mapdl.exit() + + mck_lgrpc.assert_called_once() + cmd = mck_lgrpc.call_args_list[0][1]["cmd"] + env_vars = mck_lgrpc.call_args_list[0][1]["env_vars"] + + assert "sbatch" in cmd + assert "--wrap" in cmd + assert exec_file in cmd[-1] + assert "-grpc" in cmd[-1] + + assert env_vars.get("ANS_MULTIPLE_NODES") == "1" + assert env_vars.get("HYDRA_BOOTSTRAP") == "slurm" + + mck_sc.assert_called_once() + assert "show" in mck_sc.call_args[0][0] + assert "1001" in mck_sc.call_args[0][0] + + mck_kj.assert_called_once() + + +def test_launch_on_hpc_exception_launch_mapdl(monkeypatch): + monkeypatch.delenv("PYMAPDL_START_INSTANCE", False) + exec_file = "path/to/mapdl/v242/executable/ansys242" + + process = get_fake_process("ERROR") + + with patch("ansys.mapdl.core.launcher.launch_grpc") as mock_launch_grpc: + with patch("ansys.mapdl.core.launcher.kill_job") as mock_popen: + + mock_launch_grpc.return_value = process + + with pytest.raises( + Exception, match="PyMAPDL failed to submit the sbatch job:" + ): + launch_mapdl( + launch_on_hpc=True, + exec_file=exec_file, + ) + + mock_launch_grpc.assert_called_once() + cmd = mock_launch_grpc.call_args_list[0][1]["cmd"] + env_vars = mock_launch_grpc.call_args_list[0][1]["env_vars"] + + assert "sbatch" in cmd + assert "--wrap" in cmd + assert exec_file in cmd[-1] + assert "-grpc" in cmd[-1] + + assert env_vars.get("ANS_MULTIPLE_NODES") == "1" + assert env_vars.get("HYDRA_BOOTSTRAP") == "slurm" + + # Popen wi + mock_popen.assert_not_called() + + +def test_launch_on_hpc_exception_successfull_sbatch(monkeypatch): + monkeypatch.delenv("PYMAPDL_START_INSTANCE", False) + exec_file = "path/to/mapdl/v242/executable/ansys242" + + def raise_exception(*args, **kwargs): + raise Exception("Fake exception when launching MAPDL") + + process_launch_grpc = get_fake_process("Submitted batch job 1001") + + process_scontrol = get_fake_process("Submitted batch job 1001") + process_scontrol.stdout.read = raise_exception + + with patch("ansys.mapdl.core.launcher.launch_grpc") as mock_launch_grpc: + with patch("ansys.mapdl.core.launcher.send_scontrol") as mock_scontrol: + with patch("ansys.mapdl.core.launcher.kill_job") as mock_kill_job: + + mock_launch_grpc.return_value = process_launch_grpc + mock_scontrol.return_value = process_scontrol + + with pytest.raises( + Exception, match="Fake exception when launching MAPDL" + ): + launch_mapdl( + launch_on_hpc=True, + exec_file=exec_file, + replace_env_vars={"myenvvar": "myenvvarvalue"}, + ) + + mock_launch_grpc.assert_called_once() + cmd = mock_launch_grpc.call_args_list[0][1]["cmd"] + + assert "sbatch" in cmd + assert "--wrap" in cmd + assert exec_file in cmd[-1] + assert "-grpc" in cmd[-1] + + envvars = mock_launch_grpc.call_args_list[0][1]["env_vars"] + + assert envvars["ANS_MULTIPLE_NODES"] == "1" + assert envvars["HYDRA_BOOTSTRAP"] == "slurm" + assert envvars["myenvvar"] == "myenvvarvalue" + + mock_scontrol.assert_called_once() + args = mock_scontrol.call_args_list[0][0][0] + + assert "show" in args + assert "jobid" in args + assert "1001" in args + + mock_kill_job.assert_called_once() + + +@pytest.mark.parametrize( + "args,context", + [ + [ + {"nproc": 10, "mode": "console"}, + pytest.raises( + ValueError, + match="The only mode allowed for launch MAPDL on an HPC cluster is gRPC.", + ), + ], + [ + {"nproc": 10, "ip": "123.11.22.33"}, + pytest.raises( + ValueError, + match="PyMAPDL cannot ensure a specific IP will be used when launching MAPDL on a cluster", + ), + ], + [ + {"nproc": 10, "start_instance": False}, + pytest.raises( + ValueError, + match="The 'start_instance' argument must be 'True' when launching on HPC.", + ), + ], + [{"nproc": 10}, NullContext()], + ], +) +@patch("ansys.mapdl.core.launcher.launch_mapdl", lambda *args, **kwargs: kwargs) +def test_launch_mapdl_on_cluster_exceptions(args, context): + with context: + ret = launch_mapdl_on_cluster(**args) + assert ret["launch_on_hpc"] + assert ret["nproc"] == 10 + + +@patch( + "socket.gethostbyname", + lambda *args, **kwargs: "123.45.67.89" if args[0] != LOCALHOST else LOCALHOST, +) +@pytest.mark.parametrize( + "ip,ip_env", + [[None, None], [None, "123.45.67.89"], ["123.45.67.89", "111.22.33.44"]], +) +def test_get_ip(monkeypatch, ip, ip_env): + monkeypatch.delenv("PYMAPDL_IP", False) + if ip_env: + monkeypatch.setenv("PYMAPDL_IP", ip_env) + args = {"ip": ip} + + get_ip(args) + + if ip: + assert args["ip"] == ip + else: + if ip_env: + assert args["ip"] == ip_env + else: + assert args["ip"] == LOCALHOST + + +@pytest.mark.parametrize( + "port,port_envvar,start_instance,port_busy,result", + ( + [None, None, True, False, 50052], # Standard case + [None, None, True, True, 50055], # Busy port case, not sure why it is not 50054 + [None, 50053, True, True, 50053], + [None, 50053, False, False, 50053], + [50054, 50053, True, False, 50054], + [50054, 50053, True, False, 50054], + [50054, None, False, False, 50054], + ), +) +@patch("ansys.mapdl.core._LOCAL_PORTS", []) +def test_get_port(monkeypatch, port, port_envvar, start_instance, port_busy, result): + # Settings + monkeypatch.delenv("PYMAPDL_PORT", False) + if port_envvar: + monkeypatch.setenv("PYMAPDL_PORT", port_envvar) + + # Testing + if port_busy: + # Success after the second retry, it should go up to 2. + # But for some reason, it goes up 3. + side_effect = [True, True, False] + else: + side_effect = [False] + + context = patch("ansys.mapdl.core.launcher.port_in_use", side_effect=side_effect) + + with context: + assert get_port(port, start_instance) == result + + +@pytest.mark.parametrize("stdout", ["Submitted batch job 1001", "Something bad"]) +def test_get_jobid(stdout): + if "1001" in stdout: + context = NullContext() + else: + context = pytest.raises( + ValueError, match="PyMAPDL could not retrieve the job id" + ) + + with context: + jobid = get_jobid(stdout) + assert jobid == 1001 + + +@patch("socket.gethostbyname", lambda *args, **kwargs: "111.22.33.44") +@pytest.mark.parametrize( + "jobid,timeout,time_to_stop,state,hostname, hostname_msg, raises", + [ + [1001, 30, 2, "RUNNING", "myhostname", "BatchHost=myhostname", None], + [ + 1002, + 2, + 3, + "CONFIGURING", + "otherhostname", + "BatchHost=otherhostname", + MapdlDidNotStart, + ], + [1002, 2, 3, "CONFIGURING", "", "BatchHost=", MapdlDidNotStart], + [1002, 2, 3, "CONFIGURING", None, "Batch", MapdlDidNotStart], + ], +) +def test_get_hostname_host_cluster( + jobid, timeout, time_to_stop, state, hostname, hostname_msg, raises +): + def fake_proc(*args, **kwargs): + assert f"show jobid -dd {jobid}" == args[0] + return get_fake_process( + f"a long scontrol...\nJobState={state}\n...\n{hostname_msg}\n...\nin message", + "", + time_to_stop, + ) + + with patch("ansys.mapdl.core.launcher.send_scontrol", fake_proc) as mck_sc: + + if raises: + context = pytest.raises(raises) + else: + context = NullContext() + + with context as excinfo: + batchhost, batchhost_ip = get_hostname_host_cluster( + job_id=jobid, timeout=timeout + ) + + if raises: + assert f"The HPC job (id: {jobid})" in excinfo.value.args[0] + assert f"(timeout={timeout})." in excinfo.value.args[0] + assert f"The job state is '{state}'. " in excinfo.value.args[0] + + if hostname: + assert f"The BatchHost for this job is '{hostname}'" + else: + assert ( + "PyMAPDL couldn't get the BatchHost hostname" + in excinfo.value.args[0] + ) + + else: + assert batchhost == "myhostname" + assert batchhost_ip == "111.22.33.44" + + +@requires("ansys-tools-path") +@patch("ansys.tools.path.path._mapdl_version_from_path", lambda *args, **kwargs: 201) +@patch("ansys.mapdl.core._HAS_ATP", True) +def test_get_version_version_error(monkeypatch): + monkeypatch.delenv("PYMAPDL_MAPDL_VERSION", False) + + with pytest.raises( + VersionError, match="The MAPDL gRPC interface requires MAPDL 20.2 or later" + ): + get_version(None, "/path/to/executable") + + +@pytest.mark.parametrize("version", [211, 221, 232]) +def test_get_version_env_var(monkeypatch, version): + monkeypatch.setenv("PYMAPDL_MAPDL_VERSION", version) + + assert version == get_version(None) + assert version != get_version(241) + + +@pytest.mark.parametrize( + "mode, version, osname, context, res", + [ + [None, None, None, NullContext(), "grpc"], # default + [ + "grpc", + 201, + "nt", + pytest.raises( + VersionError, match="gRPC mode requires MAPDL 2020R2 or newer on Window" + ), + None, + ], + [ + "grpc", + 202, + "posix", + pytest.raises( + VersionError, match="gRPC mode requires MAPDL 2021R1 or newer on Linux." + ), + None, + ], + ["grpc", 212, "nt", NullContext(), "grpc"], + ["grpc", 221, "posix", NullContext(), "grpc"], + ["grpc", 221, "nt", NullContext(), "grpc"], + [ + "console", + 221, + "nt", + pytest.raises(ValueError, match="Console mode requires Linux."), + None, + ], + [ + "console", + 221, + "posix", + pytest.warns( + UserWarning, + match="Console mode not recommended in MAPDL 2021R1 or newer.", + ), + "console", + ], + [ + "nomode", + 221, + "posix", + pytest.raises(ValueError, match=f'Invalid MAPDL server mode "nomode"'), + None, + ], + [None, 211, "posix", NullContext(), "grpc"], + [None, 211, "nt", NullContext(), "grpc"], + [None, 202, "nt", NullContext(), "grpc"], + [ + None, + 201, + "nt", + pytest.raises(VersionError, match="Running MAPDL as a service requires"), + None, + ], + [None, 202, "posix", NullContext(), "console"], + [None, 201, "posix", NullContext(), "console"], + [ + None, + 110, + "posix", + pytest.warns( + UserWarning, + match="MAPDL as a service has not been tested on MAPDL < v13", + ), + "console", + ], + [ + None, + 110, + "nt", + pytest.raises(VersionError, match="Running MAPDL as a service requires"), + None, + ], + ], +) +def test_check_mode(mode, version, osname, context, res): + with patch("os.name", osname): + with context: + assert res == check_mode(mode, version) + + +@pytest.mark.parametrize("jobid", [1001, 2002]) +@patch("subprocess.Popen", lambda *args, **kwargs: None) +def test_kill_job(jobid): + with patch("ansys.mapdl.core.launcher.submitter") as mck_sub: + assert kill_job(jobid) is None + mck_sub.assert_called_once() + arg = mck_sub.call_args_list[0][0][0] + assert arg[0] == "scancel" + assert arg[1] == str(jobid) + + +@pytest.mark.parametrize("jobid", [1001, 2002]) +@patch( + "ansys.mapdl.core.launcher.submitter", lambda *args, **kwargs: kwargs +) # return command +def test_send_scontrol(jobid): + with patch("ansys.mapdl.core.launcher.submitter") as mck_sub: + args = f"my args {jobid}" + assert send_scontrol(args) + + mck_sub.assert_called_once() + arg = mck_sub.call_args_list[0][0][0] + assert " ".join(arg) == f"scontrol my args {jobid}" + assert "scontrol" in arg + assert f"{jobid}" in arg + + +@pytest.mark.parametrize( + "cmd,executable,shell,cwd,stdin,stdout,stderr,envvars", + [ + ["mycmd", None, True, "my_cwd", None, None, None, None], + [["my", "cmd"], None, True, "my_cwd", None, None, None, None], + [ + "mycmd", + "exec", + False, + "my_other_cwd", + "other_obj", + "other_obj", + "other_obj", + {"aaa": 1}, + ], + [ + ["my", "cmd"], + "exec", + False, + "my_single_cwd", + "other_obj", + "other_obj", + "other_obj", + {"a": "b", "b": "c"}, + ], + ], +) +def test_submitter(cmd, executable, shell, cwd, stdin, stdout, stderr, envvars): + def return_everything(*arg, **kwags): + return arg, kwags + + with patch("subprocess.Popen", return_everything) as mck_popen: + args, kwargs = submitter( + cmd=cmd, + executable=executable, + shell=shell, + cwd=cwd, + stdin=stdin, + stdout=stdout, + stderr=stderr, + env_vars=envvars, + ) + + if executable: + if isinstance(cmd, str): + assert kwargs["args"] == [executable, cmd] + else: # list + assert kwargs["args"] == [executable] + cmd + else: + assert kwargs["args"] == cmd + + assert kwargs["shell"] == shell + assert kwargs["cwd"] == cwd + + if stdin: + assert kwargs["stdin"] == stdin + else: + assert isinstance(kwargs["stdin"], type(subprocess.DEVNULL)) + + if stdout: + assert kwargs["stdout"] == stdout + else: + assert isinstance(kwargs["stdout"], type(subprocess.PIPE)) + + if stderr: + assert kwargs["stderr"] == stderr + else: + assert isinstance(kwargs["stderr"], type(subprocess.PIPE)) + + assert kwargs["env"] == envvars diff --git a/tests/test_mapdl.py b/tests/test_mapdl.py index c3812a17f6..8749850056 100644 --- a/tests/test_mapdl.py +++ b/tests/test_mapdl.py @@ -35,7 +35,7 @@ import psutil import pytest -from conftest import VALID_PORTS, has_dependency +from conftest import PATCH_MAPDL_START, VALID_PORTS, has_dependency if has_dependency("pyvista"): from pyvista import MultiBlock @@ -55,7 +55,7 @@ ) from ansys.mapdl.core.launcher import launch_mapdl from ansys.mapdl.core.mapdl_grpc import SESSION_ID_NAME -from ansys.mapdl.core.misc import random_string +from ansys.mapdl.core.misc import random_string, stack from conftest import IS_SMP, ON_CI, ON_LOCAL, QUICK_LAUNCH_SWITCHES, requires # Path to files needed for examples @@ -1932,32 +1932,22 @@ def test_igesin_whitespace(mapdl, cleared, tmpdir): assert int(n_ent[0]) > 0 -def test_save_on_exit(mapdl, cleared): - with mapdl.non_interactive: - mapdl.exit(save=True, fake_exit=True) - mapdl._exited = False # avoiding set exited on the class. - - lines = "\n".join(mapdl._stored_commands.copy()) - assert "SAVE" in lines.upper() - - mapdl._stored_commands = [] # resetting - mapdl.prep7() - - mapdl.prep7() - +@pytest.mark.parametrize("save", [None, True, False]) +@patch("ansys.mapdl.core.Mapdl.save") +@patch("ansys.mapdl.core.mapdl_grpc.MapdlGrpc._exit_mapdl") +def test_save_on_exit(mck_exit, mck_save, mapdl, cleared, save): -def test_save_on_exit_not(mapdl, cleared): - with mapdl.non_interactive: - mapdl.exit(save=False, fake_exit=True) - mapdl._exited = False # avoiding set exited on the class. + mck_exit.return_value = None - lines = "\n".join(mapdl._stored_commands.copy()) - assert "SAVE" not in lines.upper() + mapdl.exit(save=save) + mapdl._exited = False # avoiding set exited on the class. - mapdl._stored_commands = [] # resetting - mapdl.prep7() + if save: + mck_save.assert_called_once() + else: + mck_save.assert_not_called() - mapdl.prep7() + assert mapdl.prep7() def test_input_strings_inside_non_interactive(mapdl, cleared): @@ -2465,6 +2455,29 @@ def test_no_flush_stored(mapdl): assert mapdl._stored_commands == [] +@pytest.mark.parametrize("ip", ["123.45.67.89", "myhostname"]) +@stack(*PATCH_MAPDL_START) +def test_ip_hostname_in_start_parm(ip): + start_parm = { + "ip": ip, + "local": False, + "set_no_abort": False, + "jobid": 1001, + } + + with patch("socket.gethostbyaddr") as mck_sock: + mck_sock.return_value = ("myhostname",) + mapdl = pymapdl.Mapdl(disable_run_at_connect=False, **start_parm) + + if ip == "myhostname": + assert mapdl.ip == "123.45.67.99" + else: + assert mapdl.ip == ip + + assert mapdl.hostname == "myhostname" + del mapdl + + def test_directory_setter(mapdl): # Testing edge cases prev_path = mapdl._path