Bnb integration test tweaks (huggingface#1242)

* allow bitsandbytes integration test selection * fix typo: mutli -> multi * enable tests to run on >2 GPUs * fix for >3 GPUs, due to artidoro/qlora huggingface#186 * fix formatting
dengdifan · Dec 8, 2023 · fc9f4b3 · fc9f4b3
1 parent 895513c
commit fc9f4b3
Show file tree

Hide file tree

Showing 4 changed files with 23 additions and 16 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -41,4 +41,5 @@ markers = [
     "single_gpu_tests: tests that run on a single GPU",
     "multi_gpu_tests: tests that run on multiple GPUs",
     "regression: whether to run regression suite test",
+    "bitsandbytes: select bitsandbytes integration tests"
 ]
diff --git a/tests/test_common_gpu.py b/tests/test_common_gpu.py
@@ -301,7 +301,7 @@ def test_ia3_bnb_4bit_quantization(self):
 
     @pytest.mark.multi_gpu_tests
     @require_torch_multi_gpu
-    def test_lora_causal_lm_mutli_gpu_inference(self):
+    def test_lora_causal_lm_multi_gpu_inference(self):
         r"""
         Test if LORA can be used for inference on multiple GPUs.
         """
@@ -317,7 +317,7 @@ def test_lora_causal_lm_mutli_gpu_inference(self):
         model = AutoModelForCausalLM.from_pretrained(self.causal_lm_model_id, device_map="balanced")
         tokenizer = AutoTokenizer.from_pretrained(self.seq2seq_model_id)
 
-        self.assertEqual(set(model.hf_device_map.values()), {0, 1})
+        self.assertEqual(set(model.hf_device_map.values()), set(range(torch.cuda.device_count())))
 
         model = get_peft_model(model, lora_config)
         self.assertTrue(isinstance(model, PeftModel))
@@ -331,7 +331,7 @@ def test_lora_causal_lm_mutli_gpu_inference(self):
     @require_torch_multi_gpu
     @pytest.mark.multi_gpu_tests
     @require_bitsandbytes
-    def test_lora_seq2seq_lm_mutli_gpu_inference(self):
+    def test_lora_seq2seq_lm_multi_gpu_inference(self):
         r"""
         Test if LORA can be used for inference on multiple GPUs - 8bit version.
         """
@@ -342,7 +342,7 @@ def test_lora_seq2seq_lm_mutli_gpu_inference(self):
         model = AutoModelForSeq2SeqLM.from_pretrained(self.seq2seq_model_id, device_map="balanced", load_in_8bit=True)
         tokenizer = AutoTokenizer.from_pretrained(self.seq2seq_model_id)
 
-        self.assertEqual(set(model.hf_device_map.values()), {0, 1})
+        self.assertEqual(set(model.hf_device_map.values()), set(range(torch.cuda.device_count())))
 
         model = get_peft_model(model, lora_config)
         self.assertTrue(isinstance(model, PeftModel))

diff --git a/tests/test_gpu_examples.py b/tests/test_gpu_examples.py
@@ -252,7 +252,7 @@ def test_causal_lm_training_4bit(self):
             self.assertIsNotNone(trainer.state.log_history[-1]["train_loss"])
 
     @pytest.mark.multi_gpu_tests
-    def test_causal_lm_training_mutli_gpu_4bit(self):
+    def test_causal_lm_training_multi_gpu_4bit(self):
         r"""
         Test the CausalLM training on a multi-GPU device with 4bit base model. The test would simply fail if the
         adapters are not set correctly.
@@ -265,7 +265,7 @@ def test_causal_lm_training_mutli_gpu_4bit(self):
                 load_in_4bit=True,
             )
 
-            self.assertEqual(set(model.hf_device_map.values()), {0, 1})
+            self.assertEqual(set(model.hf_device_map.values()), set(range(torch.cuda.device_count())))
 
             model = prepare_model_for_kbit_training(model)
 
@@ -320,7 +320,11 @@ def test_4bit_adalora_causalLM(self):
         """
         model_id = "facebook/opt-350m"
 
-        model = AutoModelForCausalLM.from_pretrained(model_id, load_in_4bit=True)
+        model = AutoModelForCausalLM.from_pretrained(
+            model_id,
+            load_in_4bit=True,
+            device_map={"": "cuda:0"},  # fix for >3 GPUs
+        )
         tokenizer = AutoTokenizer.from_pretrained(model_id)
 
         model.gradient_checkpointing_enable()
@@ -440,7 +444,7 @@ def test_8bit_adalora_causalLM(self):
 
     @pytest.mark.multi_gpu_tests
     @require_torch_multi_gpu
-    def test_causal_lm_training_mutli_gpu(self):
+    def test_causal_lm_training_multi_gpu(self):
         r"""
         Test the CausalLM training on a multi-GPU device. This test is a converted version of
         https://github.com/huggingface/peft/blob/main/examples/int8_training/Finetune_opt_bnb_peft.ipynb where we train
@@ -454,7 +458,7 @@ def test_causal_lm_training_mutli_gpu(self):
                 device_map="auto",
             )
 
-            self.assertEqual(set(model.hf_device_map.values()), {0, 1})
+            self.assertEqual(set(model.hf_device_map.values()), set(range(torch.cuda.device_count())))
 
             tokenizer = AutoTokenizer.from_pretrained(self.causal_lm_model_id)
             model = prepare_model_for_int8_training(model)
@@ -564,7 +568,7 @@ def test_seq2seq_lm_training_single_gpu(self):
 
     @pytest.mark.multi_gpu_tests
     @require_torch_multi_gpu
-    def test_seq2seq_lm_training_mutli_gpu(self):
+    def test_seq2seq_lm_training_multi_gpu(self):
         r"""
         Test the Seq2SeqLM training on a multi-GPU device. This test is a converted version of
         https://github.com/huggingface/peft/blob/main/examples/int8_training/Finetune_opt_bnb_peft.ipynb where we train
@@ -578,7 +582,7 @@ def test_seq2seq_lm_training_mutli_gpu(self):
                 device_map="balanced",
             )
 
-            self.assertEqual(set(model.hf_device_map.values()), {0, 1})
+            self.assertEqual(set(model.hf_device_map.values()), set(range(torch.cuda.device_count())))
 
             tokenizer = AutoTokenizer.from_pretrained(self.seq2seq_model_id)
             model = prepare_model_for_int8_training(model)
@@ -875,7 +879,7 @@ def test_adalora_causalLM(self):
 
     @pytest.mark.multi_gpu_tests
     @require_torch_multi_gpu
-    def test_causal_lm_training_mutli_gpu(self):
+    def test_causal_lm_training_multi_gpu(self):
         r"""
         Test the CausalLM training on a multi-GPU device. The test would simply fail if the adapters are not set
         correctly.
@@ -889,7 +893,7 @@ def test_causal_lm_training_mutli_gpu(self):
                 quantization_config=self.quantization_config,
             )
 
-            self.assertEqual(set(model.hf_device_map.values()), {0, 1})
+            self.assertEqual(set(model.hf_device_map.values()), set(range(torch.cuda.device_count())))
 
             model = prepare_model_for_kbit_training(model)
 

diff --git a/tests/testing_utils.py b/tests/testing_utils.py
@@ -16,6 +16,7 @@
 from contextlib import contextmanager
 
 import numpy as np
+import pytest
 import torch
 
 from peft.import_utils import is_auto_gptq_available, is_optimum_available
@@ -47,10 +48,11 @@ def require_bitsandbytes(test_case):
     """
     try:
         import bitsandbytes  # noqa: F401
+
+        test_case = pytest.mark.bitsandbytes(test_case)
     except ImportError:
-        return unittest.skip("test requires bitsandbytes")(test_case)
-    else:
-        return test_case
+        test_case = pytest.mark.skip(reason="test requires bitsandbytes")(test_case)
+    return test_case
 
 
 def require_auto_gptq(test_case):