huggingface
diff --git a/‎tests/models/testing_utils/__init__.py‎
Lines changed: 5 additions & 3 deletions b/‎tests/models/testing_utils/__init__.py‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎tests/models/testing_utils/attention.py‎
Lines changed: 19 additions & 8 deletions b/‎tests/models/testing_utils/attention.py‎
Lines changed: 19 additions & 8 deletions
@@ -1,8 +1,8 @@
 from .attention import AttentionTesterMixin, ContextParallelTesterMixin
-from .common import ModelTesterMixin
+from .common import BaseModelTesterConfig, ModelTesterMixin
 from .compile import TorchCompileTesterMixin
 from .ip_adapter import IPAdapterTesterMixin
-from .lora import LoraTesterMixin
+from .lora import LoraHotSwappingForModelTesterMixin, LoraTesterMixin
 from .memory import CPUOffloadTesterMixin, GroupOffloadTesterMixin, LayerwiseCastingTesterMixin, MemoryTesterMixin
 from .quantization import (
     BitsAndBytesTesterMixin,
@@ -17,14 +17,16 @@
 
 
 __all__ = [
-    "ContextParallelTesterMixin",
     "AttentionTesterMixin",
+    "BaseModelTesterConfig",
     "BitsAndBytesTesterMixin",
+    "ContextParallelTesterMixin",
     "CPUOffloadTesterMixin",
     "GGUFTesterMixin",
     "GroupOffloadTesterMixin",
     "IPAdapterTesterMixin",
     "LayerwiseCastingTesterMixin",
+    "LoraHotSwappingForModelTesterMixin",
     "LoraTesterMixin",
     "MemoryTesterMixin",
     "ModelOptTesterMixin",
 
@@ -25,7 +25,13 @@
     AttnProcessor,
 )
 
-from ...testing_utils import is_attention, is_context_parallel, require_torch_multi_accelerator, torch_device
+from ...testing_utils import (
+    assert_tensors_close,
+    is_attention,
+    is_context_parallel,
+    require_torch_multi_accelerator,
+    torch_device,
+)
 
 
 @is_attention
@@ -89,8 +95,12 @@ def test_fuse_unfuse_qkv_projections(self):
                     output_after_fusion = output_after_fusion.to_tuple()[0]
 
             # Verify outputs match
-            assert torch.allclose(output_before_fusion, output_after_fusion, atol=self.base_precision), (
-                "Output should not change after fusing projections"
+            assert_tensors_close(
+                output_before_fusion,
+                output_after_fusion,
+                atol=self.base_precision,
+                rtol=0,
+                msg="Output should not change after fusing projections",
             )
 
             # Unfuse projections
@@ -110,8 +120,12 @@ def test_fuse_unfuse_qkv_projections(self):
                     output_after_unfusion = output_after_unfusion.to_tuple()[0]
 
             # Verify outputs still match
-            assert torch.allclose(output_before_fusion, output_after_unfusion, atol=self.base_precision), (
-                "Output should match original after unfusing projections"
+            assert_tensors_close(
+                output_before_fusion,
+                output_after_unfusion,
+                atol=self.base_precision,
+                rtol=0,
+                msg="Output should match original after unfusing projections",
             )
 
     def test_get_set_processor(self):
@@ -238,9 +252,6 @@ def test_context_parallel_inference(self, cp_type):
         if not torch.distributed.is_available():
             pytest.skip("torch.distributed is not available.")
 
-        if not torch.cuda.is_available() or torch.cuda.device_count() < 2:
-            pytest.skip("Context parallel requires at least 2 CUDA devices.")
-
         if not hasattr(self.model_class, "_cp_plan") or self.model_class._cp_plan is None:
             pytest.skip("Model does not have a _cp_plan defined for context parallel inference.")