Add mx_fp4 path

drisspg · drisspg · commit ff25836275a9 · 2025-05-12T13:38:51.000-07:00
stack-info: PR: #2201, branch: drisspg/stack/54
diff --git a/test/prototype/mx_formats/test_mx_linear.py b/test/prototype/mx_formats/test_mx_linear.py
@@ -11,6 +11,7 @@
 import torch.nn as nn
 
 from torchao.prototype.mx_formats.config import (
+    MXGemmKernelChoice,
     MXInferenceLinearConfig,
     MXLinearConfig,
     MXLinearRecipeName,
@@ -380,7 +381,7 @@ def test_inference_print_str():
     not TORCH_VERSION_AT_LEAST_2_8, reason="torch.compile requires PyTorch 2.8+"
 )
 @pytest.mark.skipif(not is_sm_at_least_100, reason="Reqs sm100")
-@pytest.mark.parametrize("elem_dtype", [torch.float8_e4m3fn])
+@pytest.mark.parametrize("elem_dtype", [torch.float8_e4m3fn, torch.float4_e2m1fn_x2])
 @pytest.mark.parametrize("bias", [True, False])
 @pytest.mark.parametrize("compile", [True, False])
 @torch.no_grad()
@@ -394,7 +395,16 @@ def test_inference_subclass(elem_dtype, bias: bool, compile: bool):
 
     m = nn.Linear(32, 128, bias=bias, dtype=torch.bfloat16, device="cuda")
     m_mx = copy.deepcopy(m)
-    config = MXFPInferenceConfig()
+    kernel_choice = (
+        MXGemmKernelChoice.CUTLASS
+        if elem_dtype == DTYPE_FP4
+        else MXGemmKernelChoice.CUBLAS
+    )
+    config = MXFPInferenceConfig(
+        activation_dtype=elem_dtype,
+        weight_dtype=elem_dtype,
+        gemm_kernel_choice=kernel_choice,
+    )
     quantize_(m_mx, config=config)
     if compile:
         m_mx = torch.compile(m_mx, fullgraph=True)
@@ -403,4 +413,7 @@ def test_inference_subclass(elem_dtype, bias: bool, compile: bool):
     y_ref = m(x)
     y_mx = m_mx(x)
     sqnr = compute_error(y_ref, y_mx)
-    assert sqnr >= 25.0, f"Got a sqnr of {sqnr} for {elem_dtype} and bias={bias}"
+    SQNR_THRESHOLD = 25.0 if elem_dtype == torch.float8_e4m3fn else 15.0
+    assert sqnr >= SQNR_THRESHOLD, (
+        f"Got a sqnr of {sqnr} for {elem_dtype} and bias={bias}"
+    )
diff --git a/torchao/prototype/mx_formats/constants.py b/torchao/prototype/mx_formats/constants.py
@@ -5,10 +5,12 @@
 # LICENSE file in the root directory of this source tree.
 import torch
 
+from torchao.utils import TORCH_VERSION_AT_LEAST_2_8
+
 # This is conceptually an enum of non-core dtypes
 # TODO(future PR): change to a cleaner way to represent this without
 # regressing torch.compile and while keeping things readable.
-DTYPE_FP4 = "fp4_e2m1"
+DTYPE_FP4 = torch.float4_e2m1fn_x2 if TORCH_VERSION_AT_LEAST_2_8 else "fp4_e2m1"
 DTYPE_FP6_E3M2 = "fp6_e3m2"
 DTYPE_FP6_E2M3 = "fp6_e2m3"