microsoft · justinchuby · Oct 22, 2024 · Oct 15, 2024 · Oct 15, 2024 · Oct 15, 2024
diff --git a/onnxscript/ir/_core.py b/onnxscript/ir/_core.py
@@ -70,6 +70,7 @@
         _enums.DataType.FLOAT8E5M2FNUZ,
         _enums.DataType.INT4,
         _enums.DataType.UINT4,
+        _enums.DataType.FLOAT4E2M1,
     )
 )
 
@@ -182,7 +183,7 @@
     When the dtype is not one of the numpy native dtypes, the value needs need to be:
 
     - ``int8`` or ``uint8`` for int4, with the sign bit extended to 8 bits.
-    - ``uint8`` for uint4.
+    - ``uint8`` for uint4 or float4.
     - ``uint8`` for 8-bit data types.
     - ``uint16`` for bfloat16
 
@@ -213,6 +214,11 @@
                 raise TypeError(
                     f"The numpy array dtype must be uint8 or or ml_dtypes.uint4 (not {array.dtype}) for IR data type {dtype}."
                 )
+        if dtype == _enums.DataType.FLOAT4E2M1:
+            if array.dtype not in (np.uint8, ml_dtypes.float4_e2m1fn):
+                raise TypeError(
+                    f"The numpy array dtype must be uint8 or ml_dtypes.float4_e2m1fn (not {array.dtype}) for IR data type {dtype}."
+                )
         return
 
     try:
@@ -256,6 +262,8 @@
         return array.view(ml_dtypes.int4)
     if dtype == _enums.DataType.UINT4:
         return array.view(ml_dtypes.uint4)
+    if dtype == _enums.DataType.FLOAT4E2M1:
+        return array.view(ml_dtypes.float4_e2m1fn)
     return array
 
 
@@ -431,7 +439,11 @@
         """
         # TODO(justinchuby): Support DLPack
         array = self.numpy()
-        if self.dtype in {_enums.DataType.INT4, _enums.DataType.UINT4}:
+        if self.dtype in {
+            _enums.DataType.INT4,
+            _enums.DataType.UINT4,
+            _enums.DataType.FLOAT4E2M1,
+        }:
             # Pack the array into int4
             array = _type_casting.pack_int4(array)
         else:
@@ -609,7 +621,11 @@
             )
         # Handle the byte order correctly by always using little endian
         dt = np.dtype(self.dtype.numpy()).newbyteorder("<")
-        if self.dtype in {_enums.DataType.INT4, _enums.DataType.UINT4}:
+        if self.dtype in {
+            _enums.DataType.INT4,
+            _enums.DataType.UINT4,
+            _enums.DataType.FLOAT4E2M1,
+        }:
             # Use uint8 to read in the full byte. Otherwise ml_dtypes.int4 will clip the values
             dt = np.dtype(np.uint8).newbyteorder("<")
             count = self.size // 2 + self.size % 2
@@ -622,6 +638,8 @@
             self._array = _type_casting.unpack_int4(self._array, shape)
         elif self.dtype == _enums.DataType.UINT4:
             self._array = _type_casting.unpack_uint4(self._array, shape)
+        elif self.dtype == _enums.DataType.FLOAT4E2M1:
+            self._array = _type_casting.unpack_float4_e2m1(self._array, shape)
         else:
             self._array = self._array.reshape(shape)
 

diff --git a/onnxscript/ir/_enums.py b/onnxscript/ir/_enums.py
@@ -64,6 +64,7 @@ class DataType(enum.IntEnum):
     FLOAT8E5M2FNUZ = 20
     UINT4 = 21
     INT4 = 22
+    FLOAT4E2M1 = 23
 
     @classmethod
     def from_numpy(cls, dtype: np.dtype) -> DataType:
@@ -150,5 +151,12 @@ def __str__(self) -> str:
     np.dtype(ml_dtypes.uint4): DataType.UINT4,
 }
 
+# TODO(after min req for ml_dtypes>=0.5): Move this inside _NP_TYPE_TO_DATA_TYPE
+_NP_TYPE_TO_DATA_TYPE.update(
+    {np.dtype(ml_dtypes.float4_e2m1fn): DataType.FLOAT4E2M1}
+    if hasattr(ml_dtypes, "float4_e2m1fn")
+    else {}
+)
+
 # ONNX DataType to Numpy dtype.
 _DATA_TYPE_TO_NP_TYPE = {v: k for k, v in _NP_TYPE_TO_DATA_TYPE.items()}
diff --git a/onnxscript/ir/_enums_test.py b/onnxscript/ir/_enums_test.py
@@ -32,6 +32,8 @@ def test_enums_are_the_same_as_spec(self):
         self.assertEqual(_enums.DataType.FLOAT8E5M2FNUZ, onnx.TensorProto.FLOAT8E5M2FNUZ)
         self.assertEqual(_enums.DataType.UINT4, onnx.TensorProto.UINT4)
         self.assertEqual(_enums.DataType.INT4, onnx.TensorProto.INT4)
+        if hasattr(onnx.TensorProto, "FLOAT4E2M1"):
+            self.assertEqual(_enums.DataType.FLOAT4E2M1, onnx.TensorProto.FLOAT4E2M1)
         self.assertEqual(_enums.DataType.UNDEFINED, onnx.TensorProto.UNDEFINED)
 
     def test_from_numpy_takes_np_dtype_and_returns_data_type(self):

diff --git a/onnxscript/ir/_type_casting.py b/onnxscript/ir/_type_casting.py
@@ -89,3 +89,18 @@
     """
     unpacked = _unpack_uint4_as_uint8(data, dims)
     return _extend_int4_sign_bits(unpacked).view(ml_dtypes.int4)
+
+
+def unpack_float4e2m1(
+    data: npt.NDArray[np.uint8], dims: Sequence[int]
+) -> npt.NDArray[ml_dtypes.float4e2m1]:
+    """Convert a packed float4e2m1 array to unpacked float4e2m1 array.
+
+    Args:
+        data: A numpy array.
+        dims: The dimensions are used to reshape the unpacked buffer.
+
+    Returns:
+        A numpy array of float32 reshaped to dims.
+    """
+    return _unpack_uint4_as_uint8(data, dims).view(ml_dtypes.float4e2m1)
diff --git a/onnxscript/ir/serde.py b/onnxscript/ir/serde.py
@@ -323,6 +323,8 @@
             return _type_casting.unpack_int4(array.astype(np.uint8), self._proto.dims)
         elif dtype == _enums.DataType.UINT4:
             return _type_casting.unpack_uint4(array.astype(np.uint8), self._proto.dims)
+        elif dtype == _enums.DataType.FLOAT4E2M1:
+            return _type_casting.unpack_float4e2m1(array.astype(np.uint8), self._proto.dims)
         else:
             # Otherwise convert to the correct dtype and reshape
             # Note we cannot use view() here because the storage dtype may not be the same size as the target
@@ -369,6 +371,7 @@
                 _enums.DataType.FLOAT8E5M2FNUZ,
                 _enums.DataType.INT4,
                 _enums.DataType.UINT4,
+                _enums.DataType.FLOAT4E2M1,
             }:
                 # uint4 and int4 values are already packed, even when stored as int32
                 # so we don't need to pack them again