Skip to content

Commit

Permalink
Bnb integration test tweaks (huggingface#1242)
Browse files Browse the repository at this point in the history
* allow bitsandbytes integration test selection

* fix typo: mutli -> multi

* enable tests to run on >2 GPUs

* fix for >3 GPUs, due to artidoro/qlora huggingface#186

* fix formatting
  • Loading branch information
Titus-von-Koeller authored Dec 8, 2023
1 parent 895513c commit fc9f4b3
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 16 deletions.
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,5 @@ markers = [
"single_gpu_tests: tests that run on a single GPU",
"multi_gpu_tests: tests that run on multiple GPUs",
"regression: whether to run regression suite test",
"bitsandbytes: select bitsandbytes integration tests"
]
8 changes: 4 additions & 4 deletions tests/test_common_gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,7 @@ def test_ia3_bnb_4bit_quantization(self):

@pytest.mark.multi_gpu_tests
@require_torch_multi_gpu
def test_lora_causal_lm_mutli_gpu_inference(self):
def test_lora_causal_lm_multi_gpu_inference(self):
r"""
Test if LORA can be used for inference on multiple GPUs.
"""
Expand All @@ -317,7 +317,7 @@ def test_lora_causal_lm_mutli_gpu_inference(self):
model = AutoModelForCausalLM.from_pretrained(self.causal_lm_model_id, device_map="balanced")
tokenizer = AutoTokenizer.from_pretrained(self.seq2seq_model_id)

self.assertEqual(set(model.hf_device_map.values()), {0, 1})
self.assertEqual(set(model.hf_device_map.values()), set(range(torch.cuda.device_count())))

model = get_peft_model(model, lora_config)
self.assertTrue(isinstance(model, PeftModel))
Expand All @@ -331,7 +331,7 @@ def test_lora_causal_lm_mutli_gpu_inference(self):
@require_torch_multi_gpu
@pytest.mark.multi_gpu_tests
@require_bitsandbytes
def test_lora_seq2seq_lm_mutli_gpu_inference(self):
def test_lora_seq2seq_lm_multi_gpu_inference(self):
r"""
Test if LORA can be used for inference on multiple GPUs - 8bit version.
"""
Expand All @@ -342,7 +342,7 @@ def test_lora_seq2seq_lm_mutli_gpu_inference(self):
model = AutoModelForSeq2SeqLM.from_pretrained(self.seq2seq_model_id, device_map="balanced", load_in_8bit=True)
tokenizer = AutoTokenizer.from_pretrained(self.seq2seq_model_id)

self.assertEqual(set(model.hf_device_map.values()), {0, 1})
self.assertEqual(set(model.hf_device_map.values()), set(range(torch.cuda.device_count())))

model = get_peft_model(model, lora_config)
self.assertTrue(isinstance(model, PeftModel))
Expand Down
22 changes: 13 additions & 9 deletions tests/test_gpu_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@ def test_causal_lm_training_4bit(self):
self.assertIsNotNone(trainer.state.log_history[-1]["train_loss"])

@pytest.mark.multi_gpu_tests
def test_causal_lm_training_mutli_gpu_4bit(self):
def test_causal_lm_training_multi_gpu_4bit(self):
r"""
Test the CausalLM training on a multi-GPU device with 4bit base model. The test would simply fail if the
adapters are not set correctly.
Expand All @@ -265,7 +265,7 @@ def test_causal_lm_training_mutli_gpu_4bit(self):
load_in_4bit=True,
)

self.assertEqual(set(model.hf_device_map.values()), {0, 1})
self.assertEqual(set(model.hf_device_map.values()), set(range(torch.cuda.device_count())))

model = prepare_model_for_kbit_training(model)

Expand Down Expand Up @@ -320,7 +320,11 @@ def test_4bit_adalora_causalLM(self):
"""
model_id = "facebook/opt-350m"

model = AutoModelForCausalLM.from_pretrained(model_id, load_in_4bit=True)
model = AutoModelForCausalLM.from_pretrained(
model_id,
load_in_4bit=True,
device_map={"": "cuda:0"}, # fix for >3 GPUs
)
tokenizer = AutoTokenizer.from_pretrained(model_id)

model.gradient_checkpointing_enable()
Expand Down Expand Up @@ -440,7 +444,7 @@ def test_8bit_adalora_causalLM(self):

@pytest.mark.multi_gpu_tests
@require_torch_multi_gpu
def test_causal_lm_training_mutli_gpu(self):
def test_causal_lm_training_multi_gpu(self):
r"""
Test the CausalLM training on a multi-GPU device. This test is a converted version of
https://github.com/huggingface/peft/blob/main/examples/int8_training/Finetune_opt_bnb_peft.ipynb where we train
Expand All @@ -454,7 +458,7 @@ def test_causal_lm_training_mutli_gpu(self):
device_map="auto",
)

self.assertEqual(set(model.hf_device_map.values()), {0, 1})
self.assertEqual(set(model.hf_device_map.values()), set(range(torch.cuda.device_count())))

tokenizer = AutoTokenizer.from_pretrained(self.causal_lm_model_id)
model = prepare_model_for_int8_training(model)
Expand Down Expand Up @@ -564,7 +568,7 @@ def test_seq2seq_lm_training_single_gpu(self):

@pytest.mark.multi_gpu_tests
@require_torch_multi_gpu
def test_seq2seq_lm_training_mutli_gpu(self):
def test_seq2seq_lm_training_multi_gpu(self):
r"""
Test the Seq2SeqLM training on a multi-GPU device. This test is a converted version of
https://github.com/huggingface/peft/blob/main/examples/int8_training/Finetune_opt_bnb_peft.ipynb where we train
Expand All @@ -578,7 +582,7 @@ def test_seq2seq_lm_training_mutli_gpu(self):
device_map="balanced",
)

self.assertEqual(set(model.hf_device_map.values()), {0, 1})
self.assertEqual(set(model.hf_device_map.values()), set(range(torch.cuda.device_count())))

tokenizer = AutoTokenizer.from_pretrained(self.seq2seq_model_id)
model = prepare_model_for_int8_training(model)
Expand Down Expand Up @@ -875,7 +879,7 @@ def test_adalora_causalLM(self):

@pytest.mark.multi_gpu_tests
@require_torch_multi_gpu
def test_causal_lm_training_mutli_gpu(self):
def test_causal_lm_training_multi_gpu(self):
r"""
Test the CausalLM training on a multi-GPU device. The test would simply fail if the adapters are not set
correctly.
Expand All @@ -889,7 +893,7 @@ def test_causal_lm_training_mutli_gpu(self):
quantization_config=self.quantization_config,
)

self.assertEqual(set(model.hf_device_map.values()), {0, 1})
self.assertEqual(set(model.hf_device_map.values()), set(range(torch.cuda.device_count())))

model = prepare_model_for_kbit_training(model)

Expand Down
8 changes: 5 additions & 3 deletions tests/testing_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from contextlib import contextmanager

import numpy as np
import pytest
import torch

from peft.import_utils import is_auto_gptq_available, is_optimum_available
Expand Down Expand Up @@ -47,10 +48,11 @@ def require_bitsandbytes(test_case):
"""
try:
import bitsandbytes # noqa: F401

test_case = pytest.mark.bitsandbytes(test_case)
except ImportError:
return unittest.skip("test requires bitsandbytes")(test_case)
else:
return test_case
test_case = pytest.mark.skip(reason="test requires bitsandbytes")(test_case)
return test_case


def require_auto_gptq(test_case):
Expand Down

0 comments on commit fc9f4b3

Please sign in to comment.