diff --git a/.github/workflows/regression_test.yml b/.github/workflows/regression_test.yml
index 05b9ece53..85a79cd5c 100644
--- a/.github/workflows/regression_test.yml
+++ b/.github/workflows/regression_test.yml
@@ -31,9 +31,9 @@ jobs:
             torch-spec: 'torch==2.3.0'
             gpu-arch-type: "cuda"
             gpu-arch-version: "12.1"
-          - name: CUDA Nightly
+          - name: CUDA 2.4.0.dev20240421
             runs-on: linux.g5.12xlarge.nvidia.gpu
-            torch-spec: '--pre torch --index-url https://download.pytorch.org/whl/nightly/cu121'
+            torch-spec: '--pre torch==2.4.0.dev20240421+cu121 --index-url https://download.pytorch.org/whl/nightly/cu121'
             gpu-arch-type: "cuda"
             gpu-arch-version: "12.1"
           - name: CPU 2.2.2
diff --git a/test/integration/test_integration.py b/test/integration/test_integration.py
index 521f8a040..585709491 100644
--- a/test/integration/test_integration.py
+++ b/test/integration/test_integration.py
@@ -641,8 +641,6 @@ def test__int_mm(self):
         torch.testing.assert_close(y_ref, y_opt, atol=0, rtol=0)
 
     @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available")
-    @unittest.skipIf(TORCH_VERSION_AFTER_2_4 and torch.cuda.is_available(), "SystemError: AST constructor recursion depth mismatch (before=45, after=84)")
-
     def test__int_mm_eager_and_torch_compile_numerics(self):
         def __int_mm_ref(x, w):
             x = x.cpu().to(torch.int32)
@@ -950,7 +948,6 @@ def test_aq_int8_weight_only_quant_2_subclass(self, device, dtype):
         )
 
     @parameterized.expand(COMMON_DEVICE_DTYPE)
-    @unittest.skipIf(TORCH_VERSION_AFTER_2_4 and torch.cuda.is_available(), "SystemError: AST constructor recursion depth mismatch (before=45, after=84)")
     def test_aq_int8_weight_only_quant_3_subclass(self, device, dtype):
         self._test_lin_weight_subclass_impl(
             AQWeightOnlyQuantizedLinearWeight3.from_float, device, 35, test_dtype=dtype
@@ -1024,8 +1021,6 @@ def test_int8_dynamic_quant_subclass_api(self, device, dtype):
         )
 
     @parameterized.expand(COMMON_DEVICE_DTYPE)
-    @unittest.skipIf(TORCH_VERSION_AFTER_2_4 and torch.cuda.is_available(), "SystemError: AST constructor recursion depth mismatch (before=45, after=84)")
-
     def test_int8_weight_only_quant_subclass_api(self, device, dtype):
         self._test_lin_weight_subclass_api_impl(
             change_linear_weights_to_int8_woqtensors, device, 40, test_dtype=dtype
@@ -1092,7 +1087,6 @@ def test_weight_only_quant(self):
     @parameterized.expand(COMMON_DEVICE_DTYPE)
     @torch.no_grad()
     @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available")
-    @unittest.skipIf(TORCH_VERSION_AFTER_2_4 and torch.cuda.is_available(), "SystemError: AST constructor recursion depth mismatch (before=45, after=84)")
     def test_weight_only_quant_force_mixed_mm(self, device, dtype):
         if device != "cuda":
             self.skipTest(f"weight_only_quant_force_mixed_mm can't be constructed on {device}")
@@ -1119,8 +1113,6 @@ def test_weight_only_quant_force_mixed_mm(self, device, dtype):
 
     @parameterized.expand(COMMON_DEVICE_DTYPE)
     @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available")
-    @unittest.skipIf(TORCH_VERSION_AFTER_2_4 and torch.cuda.is_available(), "SystemError: AST constructor recursion depth mismatch (before=45, after=84)")
-
     def test_weight_only_quant_use_mixed_mm(self, device, dtype):
         if device != "cuda":
             self.skipTest(f"weight_only_quant_force_mixed_mm can't be constructed on {device}")
@@ -1357,8 +1349,6 @@ class TestAutoQuant(unittest.TestCase):
             # (256, 256, 128), TODO: Runs out of shared memory on T4
         ]))
     @unittest.skipIf(not TORCH_VERSION_AFTER_2_3, "autoquant requires 2.3+.")
-    @unittest.skipIf(TORCH_VERSION_AFTER_2_4 and torch.cuda.is_available(), "SystemError: AST constructor recursion depth mismatch (before=45, after=84)")
-
     def test_autoquant_one_input(self, device, dtype, m, k, n):
         print("(m, k, n): ", (m, k, n))
         if device != "cuda" or not torch.cuda.is_available():
@@ -1392,8 +1382,6 @@ def test_autoquant_one_input(self, device, dtype, m, k, n):
             (32, 32, 128, 128),
         ]))
     @unittest.skipIf(not TORCH_VERSION_AFTER_2_3, "autoquant requires 2.3+.")
-    @unittest.skipIf(TORCH_VERSION_AFTER_2_4 and torch.cuda.is_available(), "SystemError: AST constructor recursion depth mismatch (before=45, after=84)")
-
     def test_autoquant_multi_input(self, device, dtype, m1, m2, k, n):
         if device != "cuda" or not torch.cuda.is_available():
             self.skipTest(f"autoquant currently does not support {device}")