From 55f4c781ba7f8b665b0f23e600f98a656c8fa1eb Mon Sep 17 00:00:00 2001 From: Josh Fromm Date: Thu, 19 Sep 2024 16:02:37 -0700 Subject: [PATCH] Switch default MX4 rounding mode to Even (#3111) Summary: Pull Request resolved: https://github.com/pytorch/FBGEMM/pull/3111 X-link: https://github.com/facebookresearch/FBGEMM/pull/198 Even rounding is a bit faster and more accurate than ceil rounding. This diff swtiches the default to it. Reviewed By: jspark1105 Differential Revision: D62466094 fbshipit-source-id: 9e80c49f536332ae65c665df7b325cecdbfef92b --- fbgemm_gpu/fbgemm_gpu/quantize/quantize_ops.py | 2 +- fbgemm_gpu/fbgemm_gpu/quantize_comm.py | 4 ++-- fbgemm_gpu/fbgemm_gpu/quantize_utils.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/fbgemm_gpu/fbgemm_gpu/quantize/quantize_ops.py b/fbgemm_gpu/fbgemm_gpu/quantize/quantize_ops.py index 890f9d331..3da107b61 100644 --- a/fbgemm_gpu/fbgemm_gpu/quantize/quantize_ops.py +++ b/fbgemm_gpu/fbgemm_gpu/quantize/quantize_ops.py @@ -19,7 +19,7 @@ def quantize_mx( elem_mbits: int = 3, elem_max_norm: float = 6.0, mx_group_size: int = 32, - rounding_mode: Union[RoundingMode, int] = RoundingMode.ceil, + rounding_mode: Union[RoundingMode, int] = RoundingMode.even, ) -> torch.Tensor: """ Registered quantize_mx ops for E2E comm. diff --git a/fbgemm_gpu/fbgemm_gpu/quantize_comm.py b/fbgemm_gpu/fbgemm_gpu/quantize_comm.py index ca2ebd02d..e699a4f15 100644 --- a/fbgemm_gpu/fbgemm_gpu/quantize_comm.py +++ b/fbgemm_gpu/fbgemm_gpu/quantize_comm.py @@ -66,7 +66,7 @@ class QuantizationContext: row_dim: int = ROW_DIM_DEFAULT row_dim_quant: int = -1 mx_group_size: int = MX_GROUP_SIZE_DEFAULT - rounding_mode: RoundingMode = RoundingMode.ceil + rounding_mode: RoundingMode = RoundingMode.even padded_dim_sum_per_rank: Optional[List[int]] = None @@ -110,7 +110,7 @@ def _quantize_tensor( return input_quant_all2all elif comm_precision == SparseType.MX4: mx_group_size = ctx.mx_group_size if ctx is not None else MX_GROUP_SIZE_DEFAULT - rounding_mode = ctx.rounding_mode if ctx is not None else RoundingMode.ceil + rounding_mode = ctx.rounding_mode if ctx is not None else RoundingMode.even return fp32_to_mx4( input_tensor, mx_group_size, rounding_mode=rounding_mode ).view(-1) diff --git a/fbgemm_gpu/fbgemm_gpu/quantize_utils.py b/fbgemm_gpu/fbgemm_gpu/quantize_utils.py index a0020f8b2..7646edf4e 100644 --- a/fbgemm_gpu/fbgemm_gpu/quantize_utils.py +++ b/fbgemm_gpu/fbgemm_gpu/quantize_utils.py @@ -36,7 +36,7 @@ def fp32_to_mx4( group_size: int = 32, ebits: int = 2, mbits: int = 1, - rounding_mode: Optional[Union[RoundingMode, int]] = RoundingMode.ceil, + rounding_mode: Optional[Union[RoundingMode, int]] = RoundingMode.even, stochastic_casting: bool = False, use_triton: bool = True, ) -> torch.Tensor: @@ -58,7 +58,7 @@ def fp32_to_mx4( # Accelerated MX4 is only available on cuda, if input is on cpu, use python. # Operate on flattened input. if rounding_mode is None: - rounding_mode = RoundingMode.ceil + rounding_mode = RoundingMode.even if not tensor.is_cuda: return py_quantize_mx4(