From a02eb243a315af6b875dd2d9325ca117ecd598ce Mon Sep 17 00:00:00 2001 From: Cemberk Date: Thu, 31 Oct 2024 16:20:44 +0000 Subject: [PATCH] skips due to padding size issue 63 on amd --- tests/models/jamba/test_modeling_jamba.py | 2 ++ tests/models/mixtral/test_modeling_mixtral.py | 2 ++ tests/models/qwen2_moe/test_modeling_qwen2_moe.py | 2 ++ 3 files changed, 6 insertions(+) diff --git a/tests/models/jamba/test_modeling_jamba.py b/tests/models/jamba/test_modeling_jamba.py index 6e1a2cf2cf9c..9c9c11817c76 100644 --- a/tests/models/jamba/test_modeling_jamba.py +++ b/tests/models/jamba/test_modeling_jamba.py @@ -29,6 +29,7 @@ require_torch_gpu, slow, torch_device, + skipIfRocm ) from ...generation.test_utils import GenerationTesterMixin @@ -365,6 +366,7 @@ def test_decoder_model_past_with_large_inputs(self): config_and_inputs = self.model_tester.prepare_config_and_inputs_for_decoder() self.model_tester.create_and_check_decoder_model_past_large_inputs(*config_and_inputs) + @skipIfRocm def test_load_balancing_loss(self): r""" Let's make sure we can actually compute the loss and do a backward on it. diff --git a/tests/models/mixtral/test_modeling_mixtral.py b/tests/models/mixtral/test_modeling_mixtral.py index db9641e3dcb2..606eb0e30915 100644 --- a/tests/models/mixtral/test_modeling_mixtral.py +++ b/tests/models/mixtral/test_modeling_mixtral.py @@ -28,6 +28,7 @@ require_torch_sdpa, slow, torch_device, + skipIfRocm ) from ...generation.test_utils import GenerationTesterMixin @@ -493,6 +494,7 @@ def test_flash_attn_2_inference_equivalence_right_padding(self): self.skipTest(reason="Mixtral flash attention does not support right padding") # Ignore copy + @skipIfRocm def test_load_balancing_loss(self): r""" Let's make sure we can actually compute the loss and do a backward on it. diff --git a/tests/models/qwen2_moe/test_modeling_qwen2_moe.py b/tests/models/qwen2_moe/test_modeling_qwen2_moe.py index 0425172a6fba..bdc9c9a75fca 100644 --- a/tests/models/qwen2_moe/test_modeling_qwen2_moe.py +++ b/tests/models/qwen2_moe/test_modeling_qwen2_moe.py @@ -30,6 +30,7 @@ require_torch_sdpa, slow, torch_device, + skipIfRocm ) from ...generation.test_utils import GenerationTesterMixin @@ -528,6 +529,7 @@ def test_flash_attn_2_inference_equivalence_right_padding(self): self.skipTest(reason="Qwen2Moe flash attention does not support right padding") # Ignore copy + @skipIfRocm def test_load_balancing_loss(self): r""" Let's make sure we can actually compute the loss and do a backward on it.