From a02eb243a315af6b875dd2d9325ca117ecd598ce Mon Sep 17 00:00:00 2001
From: Cemberk <Cem.Bozkus@amd.com>
Date: Thu, 31 Oct 2024 16:20:44 +0000
Subject: [PATCH] skips due to padding size issue 63 on amd

---
 tests/models/jamba/test_modeling_jamba.py         | 2 ++
 tests/models/mixtral/test_modeling_mixtral.py     | 2 ++
 tests/models/qwen2_moe/test_modeling_qwen2_moe.py | 2 ++
 3 files changed, 6 insertions(+)

diff --git a/tests/models/jamba/test_modeling_jamba.py b/tests/models/jamba/test_modeling_jamba.py
index 6e1a2cf2cf9c..9c9c11817c76 100644
--- a/tests/models/jamba/test_modeling_jamba.py
+++ b/tests/models/jamba/test_modeling_jamba.py
@@ -29,6 +29,7 @@
     require_torch_gpu,
     slow,
     torch_device,
+    skipIfRocm
 )
 
 from ...generation.test_utils import GenerationTesterMixin
@@ -365,6 +366,7 @@ def test_decoder_model_past_with_large_inputs(self):
         config_and_inputs = self.model_tester.prepare_config_and_inputs_for_decoder()
         self.model_tester.create_and_check_decoder_model_past_large_inputs(*config_and_inputs)
 
+    @skipIfRocm
     def test_load_balancing_loss(self):
         r"""
         Let's make sure we can actually compute the loss and do a backward on it.
diff --git a/tests/models/mixtral/test_modeling_mixtral.py b/tests/models/mixtral/test_modeling_mixtral.py
index db9641e3dcb2..606eb0e30915 100644
--- a/tests/models/mixtral/test_modeling_mixtral.py
+++ b/tests/models/mixtral/test_modeling_mixtral.py
@@ -28,6 +28,7 @@
     require_torch_sdpa,
     slow,
     torch_device,
+    skipIfRocm
 )
 
 from ...generation.test_utils import GenerationTesterMixin
@@ -493,6 +494,7 @@ def test_flash_attn_2_inference_equivalence_right_padding(self):
         self.skipTest(reason="Mixtral flash attention does not support right padding")
 
     # Ignore copy
+    @skipIfRocm
     def test_load_balancing_loss(self):
         r"""
         Let's make sure we can actually compute the loss and do a backward on it.
diff --git a/tests/models/qwen2_moe/test_modeling_qwen2_moe.py b/tests/models/qwen2_moe/test_modeling_qwen2_moe.py
index 0425172a6fba..bdc9c9a75fca 100644
--- a/tests/models/qwen2_moe/test_modeling_qwen2_moe.py
+++ b/tests/models/qwen2_moe/test_modeling_qwen2_moe.py
@@ -30,6 +30,7 @@
     require_torch_sdpa,
     slow,
     torch_device,
+    skipIfRocm
 )
 
 from ...generation.test_utils import GenerationTesterMixin
@@ -528,6 +529,7 @@ def test_flash_attn_2_inference_equivalence_right_padding(self):
         self.skipTest(reason="Qwen2Moe flash attention does not support right padding")
 
     # Ignore copy
+    @skipIfRocm
     def test_load_balancing_loss(self):
         r"""
         Let's make sure we can actually compute the loss and do a backward on it.