disable or fix tests broken in CI with PT nightly (#1470)

Lightning-AI · Nov 26, 2024 · 75a0d47 · 75a0d47
1 parent 81f83f3
commit 75a0d47
Show file tree

Hide file tree

Showing 5 changed files with 63 additions and 9 deletions.
diff --git a/thunder/tests/distributed/helper.py b/thunder/tests/distributed/helper.py
@@ -112,6 +112,11 @@ def world_size(self) -> int:
     def init_method(self):
         return f"{common_utils.FILE_SCHEMA}{self.file_name}"
 
+    @property
+    def destroy_pg_upon_exit(self) -> bool:
+        # Overriding base test class: do not auto destroy PG upon exit.
+        return False
+
     @classmethod
     def _run(cls, rank, test_name, file_name, pipe, *, fake_pg=False):
         assert not fake_pg, "Not yet supported here..."
@@ -130,14 +135,10 @@ def _run(cls, rank, test_name, file_name, pipe, *, fake_pg=False):
         local_rank = self.rank % torch.cuda.device_count()
         torch.cuda.set_device(local_rank)
         os.environ["LOCAL_RANK"] = str(local_rank)
-        if "destroy_process_group" in inspect.signature(self.run_test).parameters:
-            run_test_kwargs = {"destroy_process_group": False}
-        else:
-            run_test_kwargs = {}
 
         torch.distributed.barrier()
         try:
-            self.run_test(test_name, pipe, **run_test_kwargs)
+            self.run_test(test_name, pipe)
         except Exception:
             raise
         finally:

diff --git a/thunder/tests/test_dynamo.py b/thunder/tests/test_dynamo.py
@@ -20,6 +20,7 @@
     DynamoThunderExecutor,
     IS_WINDOWS,
     requiresCUDA,
+    version_between,
 )
 from thunder.tests.make_tensor import make_tensor
 
@@ -450,6 +451,10 @@ def func(x):
             LooseVersion(torch.__version__) < LooseVersion("2.6.0"),
             reason="Skip until the Torch bug is fixed - https://github.com/pytorch/pytorch/pull/139275",
         ),
+        pytest.mark.skipif(
+            version_between(torch.__version__, min_ver="2.6.0a0", max_ver="2.6.0a99"),
+            reason="https://github.com/Lightning-AI/lightning-thunder/issues/1471",
+        ),
     ),
 )
 @requiresCUDA

diff --git a/thunder/tests/test_networks.py b/thunder/tests/test_networks.py
@@ -11,7 +11,13 @@
 
 import thunder
 import thunder.torch as ttorch
-from thunder.tests.framework import instantiate, requiresCUDA, DynamoThunderExecutor, _all_test_executors
+from thunder.tests.framework import (
+    instantiate,
+    requiresCUDA,
+    DynamoThunderExecutor,
+    _all_test_executors,
+    version_between,
+)
 import thunder.tests.nanogpt_model as nanogpt_model
 import thunder.tests.hf_bart_self_attn as hf_bart_self_attn
 
@@ -214,7 +220,16 @@ def test_nanogpt_mlp(executor, device, dtype):
     assert_close(torch_result, thunder_result)
 
 
-@instantiate(dtypes=(thunder.float32,), executors=all_test_executors_and_dynamo)
+@instantiate(
+    dtypes=(thunder.float32,),
+    executors=all_test_executors_and_dynamo,
+    decorators=(
+        pytest.mark.skipif(
+            version_between(torch.__version__, min_ver="2.6.0a0", max_ver="2.6.0a99"),
+            reason="https://github.com/Lightning-AI/lightning-thunder/issues/1471",
+        ),
+    ),
+)
 def test_nanogpt_gelu(executor, device, dtype):
     tdtype = ttorch.to_torch_dtype(dtype)
     make = partial(make_tensor, dtype=tdtype, device=device)
@@ -269,6 +284,10 @@ def dummy(*args):
     assert_close(actual, expected)
 
 
+@pytest.mark.skipif(
+    version_between(torch.__version__, min_ver="2.6.0a0", max_ver="2.6.0a99"),
+    reason="https://github.com/bitsandbytes-foundation/bitsandbytes/pull/1413",
+)
 @requiresCUDA
 def test_quantization():
     try:
@@ -349,6 +368,10 @@ def test_quantization():
         assert_close(v, sd2[k])
 
 
+@pytest.mark.skipif(
+    version_between(torch.__version__, min_ver="2.6.0a0", max_ver="2.6.0a99"),
+    reason="https://github.com/Lightning-AI/lightning-thunder/issues/1471",
+)
 @thunder.tests.framework.requiresCUDA
 def test_thunderfx_mistral_nemo_small():
     """
@@ -400,6 +423,10 @@ def test_thunderfx_mistral_nemo_small():
     assert th_backend.subgraph_infos, "Should have at least 1 subgraph"
 
 
+@pytest.mark.skipif(
+    version_between(torch.__version__, min_ver="2.6.0a0", max_ver="2.6.0a99"),
+    reason="https://github.com/Lightning-AI/lightning-thunder/issues/1471",
+)
 @thunder.tests.framework.requiresCUDA
 @pytest.mark.parametrize("model_id", ["Qwen/Qwen2.5-7B-Instruct", "microsoft/Phi-3-mini-128k-instruct"])
 def test_hf_for_nemo(model_id):

diff --git a/thunder/tests/test_recipes.py b/thunder/tests/test_recipes.py
@@ -1,8 +1,10 @@
 import thunder
 import transformers
 import torch
+import pytest
 
 from torch.testing import assert_close, make_tensor
+from thunder.tests.framework import version_between
 
 
 def test_recipe_basic_bert():
@@ -22,6 +24,10 @@ def test_recipe_basic_bert():
     assert_close(actual, expected)
 
 
+@pytest.mark.skipif(
+    version_between(torch.__version__, min_ver="2.6.0a0", max_ver="2.6.0a99"),
+    reason="https://github.com/Lightning-AI/lightning-thunder/issues/1471",
+)
 def test_recipe_basic_bert_dynamo():
     bert = transformers.BertForSequenceClassification(transformers.BertConfig())
     del bert.bert.encoder.layer[1:]

diff --git a/thunder/tests/test_transforms.py b/thunder/tests/test_transforms.py
@@ -5,7 +5,7 @@
 
 import thunder
 from thunder.dev_utils.nvtx_profile_transform import NvtxProfileTransform, nvtx_push, nvtx_pop
-from thunder.tests.framework import requiresCUDA
+from thunder.tests.framework import requiresCUDA, version_between
 
 
 @requiresCUDA
@@ -112,6 +112,10 @@ def test_materialization():
     assert_close(actual, expected, rtol=1e-2, atol=1e-2)
 
 
+@pytest.mark.skipif(
+    version_between(torch.__version__, min_ver="2.6.0a0", max_ver="2.6.0a99"),
+    reason="https://github.com/bitsandbytes-foundation/bitsandbytes/pull/1413",
+)
 @pytest.mark.skipif(not package_available("bitsandbytes"), reason="`bitsandbytes` is not available")
 @requiresCUDA
 def test_quantization_on_meta():
@@ -185,7 +189,14 @@ def test_quantization_on_meta():
     assert_close(actual, actual2)
 
 
-@pytest.mark.skipif(not package_available("bitsandbytes"), reason="`bitsandbytes` is not available")
+@pytest.mark.skipif(
+    version_between(torch.__version__, min_ver="2.6.0a0", max_ver="2.6.0a99"),
+    reason="https://github.com/bitsandbytes-foundation/bitsandbytes/pull/1413",
+)
+@pytest.mark.skipif(
+    not package_available("bitsandbytes"),
+    reason="`bitsandbytes` is not available",
+)
 @requiresCUDA
 def test_nvfuser_cse():
     with torch.device("cuda"):
@@ -289,6 +300,10 @@ def f(x):
     jf(weights)
 
 
+@pytest.mark.skipif(
+    version_between(torch.__version__, min_ver="2.6.0a0", max_ver="2.6.0a99"),
+    reason="https://github.com/bitsandbytes-foundation/bitsandbytes/pull/1413",
+)
 @pytest.mark.skipif(not package_available("bitsandbytes"), reason="`bitsandbytes` is not available")
 @requiresCUDA
 def test_materialization_init():