diff --git a/fbgemm_gpu/experimental/gen_ai/src/quantize/cutlass_extensions.cu b/fbgemm_gpu/experimental/gen_ai/src/quantize/cutlass_extensions.cu index d973d28ac..ef44aedb1 100644 --- a/fbgemm_gpu/experimental/gen_ai/src/quantize/cutlass_extensions.cu +++ b/fbgemm_gpu/experimental/gen_ai/src/quantize/cutlass_extensions.cu @@ -1896,6 +1896,14 @@ at::Tensor f8f8bf16_cublas( "CUDA version is older than 12.0"); // requires CUDA>=12 } at::Tensor f8f8bf16( + at::Tensor XQ, // FP8 + at::Tensor WQ, // FP8 + at::Tensor scale, + bool use_fast_accum) { + throw std::runtime_error( + "CUDA version is older than 12.0"); // requires CUDA>=12 +} +at::Tensor f8f8bf16_tensorwise( at::Tensor XQ, // FP8 at::Tensor WQ, // FP8 double scale,