diff --git a/fbgemm_gpu/docs/src/fbgemm_gpu-python-api/quantize_ops.rst b/fbgemm_gpu/docs/src/fbgemm_gpu-python-api/quantize_ops.rst new file mode 100644 index 000000000..df2a6c2d7 --- /dev/null +++ b/fbgemm_gpu/docs/src/fbgemm_gpu-python-api/quantize_ops.rst @@ -0,0 +1,6 @@ +Quantization Operators +====================== + +.. automodule:: fbgemm_gpu + +.. autofunction:: torch.ops.fbgemm.FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf diff --git a/fbgemm_gpu/docs/src/index.rst b/fbgemm_gpu/docs/src/index.rst index ba0d8ba6b..1669bf22f 100644 --- a/fbgemm_gpu/docs/src/index.rst +++ b/fbgemm_gpu/docs/src/index.rst @@ -91,6 +91,7 @@ Table of Contents fbgemm_gpu-python-api/jagged_tensor_ops.rst fbgemm_gpu-python-api/pooled_embedding_ops.rst + fbgemm_gpu-python-api/quantize_ops.rst .. _fbgemm-gpu.toc.api.python.modules: diff --git a/fbgemm_gpu/fbgemm_gpu/docs/__init__.py b/fbgemm_gpu/fbgemm_gpu/docs/__init__.py index 4b621cbe3..e531e1254 100644 --- a/fbgemm_gpu/fbgemm_gpu/docs/__init__.py +++ b/fbgemm_gpu/fbgemm_gpu/docs/__init__.py @@ -11,6 +11,7 @@ jagged_tensor_ops, merge_pooled_embedding_ops, permute_pooled_embedding_ops, + quantize_ops, ) except Exception: pass diff --git a/fbgemm_gpu/fbgemm_gpu/docs/quantize_ops.py b/fbgemm_gpu/fbgemm_gpu/docs/quantize_ops.py new file mode 100644 index 000000000..3662b12c7 --- /dev/null +++ b/fbgemm_gpu/fbgemm_gpu/docs/quantize_ops.py @@ -0,0 +1,41 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import torch + +from .common import add_docs + +add_docs( + torch.ops.fbgemm.FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf, + """ +FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf(input, bit_rate) -> Tensor + +Convert FP32/16 to INT8/4/2 using rowwise quantization. + +Args: + input (Tensor): An input tensor. Must be either FP32 (`torch.float`) + or FP16 (`torch.half`) and must be 2 dimensions. + + bit_rate (int): Quantized bit rate (2 for INT2, 4 for INT4, or 8 for + INT8) + +Returns: + Quantized output (Tensor). Data type is `torch.uint8` (byte type) + +**Example:** + + >>> # Randomize input + >>> input = torch.randn(2, 4, dtype=torch.float32, device="cuda") + >>> print(input) + tensor([[ 0.8247, 0.0031, -1.0068, -1.2081], + [ 0.5427, 1.5772, 1.0291, -0.7626]], device='cuda:0') + >>> # Quantize + >>> output = torch.ops.fbgemm.FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf(input, bit_rate=4) + >>> print(output) + tensor([[159, 1, 86, 48, 213, 188], + [248, 11, 254, 48, 26, 186]], device='cuda:0', dtype=torch.uint8) + """, +)