feat: add apple silicon GPU acceleration (#6151)

* feat: add apple silicon GPU acceleration * add release notes * small fix * Update utils.py * Update utils.py * ci fix mps * Revert "ci fix mps" This reverts commit 783ae50. * mps fix * Update experiment_tracking.py * try removing upper watermark limit * disable mps CI * Use xl runner * initialise env * small fix * black linting --------- Co-authored-by: Massimiliano Pippi <[email protected]>
deepset-ai · Oct 30, 2023 · 708d33a · 708d33a
1 parent 789e524
commit 708d33a
Show file tree

Hide file tree

Showing 8 changed files with 61 additions and 4 deletions.
diff --git a/.github/workflows/tests_preview.yml b/.github/workflows/tests_preview.yml
@@ -219,7 +219,9 @@ jobs:
   integration-tests-macos:
     name: Integration / macos-latest
     needs: unit-tests
-    runs-on: macos-latest
+    runs-on: macos-latest-xl
+    env:
+      HAYSTACK_MPS_ENABLED : false
     steps:
       - uses: actions/checkout@v4
 

diff --git a/e2e/modeling/test_dpr.py b/e2e/modeling/test_dpr.py
@@ -1,5 +1,6 @@
 from typing import Dict, Any
 from pathlib import Path
+import os
 
 import numpy as np
 import pytest
@@ -707,6 +708,12 @@ def test_dpr_processor_save_load_non_bert_tokenizer(tmp_path: Path, query_and_pa
 
     if torch.cuda.is_available():
         device = torch.device("cuda")
+    elif (
+        hasattr(torch.backends, "mps")
+        and torch.backends.mps.is_available()
+        and os.getenv("HAYSTACK_MPS_ENABLED", "true") != "false"
+    ):
+        device = torch.device("mps")
     else:
         device = torch.device("cpu")
     model = BiAdaptiveModel(
@@ -753,6 +760,12 @@ def test_dpr_processor_save_load_non_bert_tokenizer(tmp_path: Path, query_and_pa
 
     if torch.cuda.is_available():
         device = torch.device("cuda")
+    elif (
+        hasattr(torch.backends, "mps")
+        and torch.backends.mps.is_available()
+        and os.getenv("HAYSTACK_MPS_ENABLED", "true") != "false"
+    ):
+        device = torch.device("mps")
     else:
         device = torch.device("cpu")
     loaded_model = BiAdaptiveModel(
@@ -879,6 +892,12 @@ def test_dpr_processor_save_load_non_bert_tokenizer(tmp_path: Path, query_and_pa
 
     if torch.cuda.is_available():
         device = torch.device("cuda")
+    elif (
+        hasattr(torch.backends, "mps")
+        and torch.backends.mps.is_available()
+        and os.getenv("HAYSTACK_MPS_ENABLED", "true") != "false"
+    ):
+        device = torch.device("mps")
     else:
         device = torch.device("cpu")
     model = BiAdaptiveModel(

diff --git a/haystack/environment.py b/haystack/environment.py
@@ -106,11 +106,16 @@ def collect_static_system_specs() -> Dict[str, Any]:
 
     try:
         torch_import.check()
+        has_mps = (
+            hasattr(torch.backends, "mps")
+            and torch.backends.mps.is_available()
+            and os.getenv("HAYSTACK_MPS_ENABLED", "true") != "false"
+        )
         specs.update(
             {
                 "libraries.torch": torch.__version__,
                 "libraries.cuda": torch.version.cuda if torch.cuda.is_available() else False,
-                "hardware.gpus": torch.cuda.device_count() if torch.cuda.is_available() else 0,
+                "hardware.gpus": torch.cuda.device_count() if torch.cuda.is_available() else 1 if has_mps else 0,
             }
         )
     except ImportError:

diff --git a/haystack/modeling/utils.py b/haystack/modeling/utils.py
@@ -112,6 +112,13 @@ def initialize_device_settings(
             else:
                 devices_to_use = [torch.device("cuda:0")]
                 n_gpu = 1
+        elif (
+            hasattr(torch.backends, "mps")
+            and torch.backends.mps.is_available()
+            and os.getenv("HAYSTACK_MPS_ENABLED", "true") != "false"
+        ):
+            devices_to_use = [torch.device("mps")]
+            n_gpu = 1
         else:
             devices_to_use = [torch.device("cpu")]
             n_gpu = 0
@@ -180,6 +187,7 @@ def all_gather_list(data, group=None, max_size=16384):
         data (Any): data from the local worker to be gathered on other workers
         group (optional): group of the collective
     """
+    # pylint: disable=all
     SIZE_STORAGE_BYTES = 4  # int32 to encode the payload size
 
     enc = pickle.dumps(data)

diff --git a/haystack/preview/components/readers/extractive.py b/haystack/preview/components/readers/extractive.py
@@ -2,6 +2,7 @@
 from typing import Any, Dict, List, Optional, Tuple, Union
 import math
 import warnings
+import os
 
 from haystack.preview import component, default_to_dict, ComponentError, Document, ExtractedAnswer
 from haystack.preview.lazy_imports import LazyImport
@@ -111,6 +112,12 @@ def warm_up(self):
         if self.model is None:
             if torch.cuda.is_available():
                 self.device = self.device or "cuda:0"
+            elif (
+                hasattr(torch.backends, "mps")
+                and torch.backends.mps.is_available()
+                and os.getenv("HAYSTACK_MPS_ENABLED", "true") != "false"
+            ):
+                self.device = self.device or "mps:0"
             else:
                 self.device = self.device or "cpu:0"
             self.model = AutoModelForQuestionAnswering.from_pretrained(self.model_name_or_path, token=self.token).to(

diff --git a/haystack/utils/experiment_tracking.py b/haystack/utils/experiment_tracking.py
@@ -17,7 +17,7 @@
     import transformers
 
 with LazyImport("Run Run 'pip install farm-haystack[metrics]'") as mlflow_import:
-    import mlflow
+    import mlflow  # pylint: disable=import-error
 
 
 logger = logging.getLogger(__name__)
@@ -236,6 +236,11 @@ def get_or_create_env_meta_data() -> Dict[str, Any]:
     from haystack.telemetry import HAYSTACK_EXECUTION_CONTEXT
 
     global env_meta_data  # pylint: disable=global-statement
+    has_mps = (
+        hasattr(torch.backends, "mps")
+        and torch.backends.mps.is_available()
+        and os.getenv("HAYSTACK_MPS_ENABLED", "true") != "false"
+    )
     if not env_meta_data:
         env_meta_data = {
             "os_version": platform.release(),
@@ -246,7 +251,7 @@ def get_or_create_env_meta_data() -> Dict[str, Any]:
             "transformers_version": transformers.__version__,
             "torch_version": torch.__version__,
             "torch_cuda_version": torch.version.cuda if torch.cuda.is_available() else 0,
-            "n_gpu": torch.cuda.device_count() if torch.cuda.is_available() else 0,
+            "n_gpu": torch.cuda.device_count() if torch.cuda.is_available() else 1 if has_mps else 0,
             "n_cpu": os.cpu_count(),
             "context": os.environ.get(HAYSTACK_EXECUTION_CONTEXT),
             "execution_env": _get_execution_environment(),

diff --git a/haystack/utils/torch_utils.py b/haystack/utils/torch_utils.py
@@ -1,4 +1,5 @@
 from typing import Optional, List, Union
+import os
 
 import torch
 from torch.utils.data import Dataset
@@ -44,4 +45,10 @@ def get_devices(devices: Optional[List[Union[str, torch.device]]]) -> List[torch
         return [torch.device(device) for device in devices]
     elif torch.cuda.is_available():
         return [torch.device(device) for device in range(torch.cuda.device_count())]
+    elif (
+        hasattr(torch.backends, "mps")
+        and torch.backends.mps.is_available()
+        and os.getenv("HAYSTACK_MPS_ENABLED", "true") != "false"
+    ):
+        return [torch.device("mps")]
     return [torch.device("cpu")]
diff --git a/releasenotes/notes/add-apple-silicon-gpu-acceleration-38bf69781a933b95.yaml b/releasenotes/notes/add-apple-silicon-gpu-acceleration-38bf69781a933b95.yaml
@@ -0,0 +1,4 @@
+---
+enhancements:
+  - |
+    Added support for Apple Silicon GPU acceleration through "mps pytorch", enabling better performance on Apple M1 hardware.