From fa218e1942ac06b5e86e823cc8f5bff6880157f3 Mon Sep 17 00:00:00 2001
From: Fanli Lin <fanli.lin@intel.com>
Date: Wed, 21 Aug 2024 21:10:19 +0800
Subject: [PATCH] TST test_mixed_adapter_batches_lora_opt_timing on XPU (#2021)

---
 tests/test_custom_models.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_custom_models.py b/tests/test_custom_models.py
index 2e674f79d1..c3f4d1368f 100644
--- a/tests/test_custom_models.py
+++ b/tests/test_custom_models.py
@@ -52,7 +52,7 @@
 from peft.utils import ModulesToSaveWrapper, infer_device
 
 from .testing_common import PeftCommonTester
-from .testing_utils import get_state_dict, require_torch_gpu
+from .testing_utils import get_state_dict, require_non_cpu
 
 
 # MLP is a vanilla FF network with only linear layers
@@ -3276,7 +3276,7 @@ def test_mixed_adapter_batches_lora_with_dora_raises(self):
         with pytest.raises(ValueError, match=msg):
             peft_model.forward(**inputs)
 
-    @require_torch_gpu
+    @require_non_cpu
     def test_mixed_adapter_batches_lora_opt_timing(self):
         # Use a more realistic model (opt-125m) and do a simple runtime check to ensure that mixed adapter batches
         # don't add too much overhead. These types of tests are inherently flaky, so we try to add in some robustness.