From 131a80ad4375662a57ca18cd0d7b5eb4f70c9a91 Mon Sep 17 00:00:00 2001 From: facebook-github-bot Date: Wed, 24 Apr 2024 05:20:03 +0000 Subject: [PATCH] =?UTF-8?q?Deploying=20to=20gh-pages=20from=20@=20pytorch/?= =?UTF-8?q?FBGEMM@0fea06cf3714a766bdff750d0c219f12cc3acef7=20=F0=9F=9A=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- _modules/fbgemm_gpu/docs/examples.html | 1 + ...table_batched_embeddings_ops_training.html | 1 + _modules/index.html | 1 + .../experimental_ops.rst.txt | 7 + _sources/index.rst.txt | 1 + annotated.html | 2 +- classes.html | 20 +- classfbgemm_1_1_code_cache-members.html | 2 +- classfbgemm_1_1_code_cache.html | 2 +- classfbgemm_1_1_code_gen_base-members.html | 2 +- classfbgemm_1_1_code_gen_base.html | 6 +- ..._1_1_compressed_sparse_column-members.html | 2 +- classfbgemm_1_1_compressed_sparse_column.html | 2 +- classfbgemm_1_1_do_nothing-members.html | 2 +- classfbgemm_1_1_do_nothing.html | 2 +- ...m_1_1_do_s_conv_on_inp_buffer-members.html | 2 +- classfbgemm_1_1_do_s_conv_on_inp_buffer.html | 2 +- ...mm_1_1_do_spmdm_on_inp_buffer-members.html | 2 +- classfbgemm_1_1_do_spmdm_on_inp_buffer.html | 2 +- classfbgemm_1_1_execute_kernel-members.html | 2 +- classfbgemm_1_1_execute_kernel.html | 2 +- ...penam9894617fba2431fcc8042b1a22b96270.html | 2 +- ...penamed4eae1e4a482e951633b3f2b3cce49d.html | 2 +- classfbgemm_1_1_pack_a_matrix-members.html | 2 +- classfbgemm_1_1_pack_a_matrix.html | 2 +- ...bgemm_1_1_pack_a_with_im2_col-members.html | 2 +- classfbgemm_1_1_pack_a_with_im2_col.html | 2 +- ..._pack_a_with_quant_row_offset-members.html | 2 +- ...gemm_1_1_pack_a_with_quant_row_offset.html | 6 +- ...mm_1_1_pack_a_with_row_offset-members.html | 2 +- classfbgemm_1_1_pack_a_with_row_offset.html | 6 +- classfbgemm_1_1_pack_b_matrix-members.html | 2 +- classfbgemm_1_1_pack_b_matrix.html | 6 +- classfbgemm_1_1_pack_matrix-members.html | 2 +- classfbgemm_1_1_pack_matrix.html | 2 +- ...pack_weight_matrix_for_g_conv-members.html | 2 +- ...emm_1_1_pack_weight_matrix_for_g_conv.html | 6 +- ...emm_1_1_pack_weights_for_conv-members.html | 2 +- classfbgemm_1_1_pack_weights_for_conv.html | 2 +- ...gemm_1_1_packed_gemm_matrix_b-members.html | 2 +- classfbgemm_1_1_packed_gemm_matrix_b.html | 2 +- ...emm_1_1_re_quantize_for_float-members.html | 2 +- classfbgemm_1_1_re_quantize_for_float.html | 2 +- ...fbgemm_1_1_re_quantize_output-members.html | 2 +- classfbgemm_1_1_re_quantize_output.html | 2 +- classfbgemm_1_1_relu_output-members.html | 2 +- classfbgemm_1_1_relu_output.html | 2 +- classfbgemm_1_1_scale_o_p-members.html | 2 +- classfbgemm_1_1_scale_o_p.html | 2 +- ...1_1_sparse_ada_grad_signature-members.html | 2 +- ...sfbgemm_1_1_sparse_ada_grad_signature.html | 2 +- classfbgemm_1_1mem_copy-members.html | 2 +- classfbgemm_1_1mem_copy.html | 2 +- dir_34e8510f39fc35193d2c335cf69ac2ab.html | 93 + dir_3564159197d660dd468988083a59c7ac.html | 87 + dir_5dcc8468a72bed6dcb0f21f3eee5d218.html | 93 + dir_99ba4fd65b63105d4d187e7a275d7ae6.html | 87 + dir_d0c69fd2abdd01678e72de86d9296855.html | 87 + dir_fad263dcd5898f620d525bed679b1c92.html | 87 + doxygen_crawl.html | 40 + fbgemm-cpp-api/QuantUtils.html | 1 + fbgemm-development/BuildInstructions.html | 1 + fbgemm_gpu-cpp-api/embedding_ops.html | 5 +- fbgemm_gpu-cpp-api/experimental_ops.html | 743 ++++ fbgemm_gpu-cpp-api/input_combine.html | 1 + fbgemm_gpu-cpp-api/jagged_tensor_ops.html | 1 + fbgemm_gpu-cpp-api/layout_transform_ops.html | 1 + fbgemm_gpu-cpp-api/memory_utils.html | 1 + .../merge_pooled_embeddings.html | 1 + fbgemm_gpu-cpp-api/quantize_ops.html | 1 + fbgemm_gpu-cpp-api/sparse_ops.html | 1 + .../split_table_batched_embeddings.html | 1 + fbgemm_gpu-development/BuildInstructions.html | 1 + .../InstallationInstructions.html | 1 + fbgemm_gpu-development/TestInstructions.html | 1 + .../jagged-tensor-ops/JaggedTensorOps.html | 1 + fbgemm_gpu-python-api/jagged_tensor_ops.html | 1 + .../table_batched_embedding_ops.html | 5 +- general/ContactUs.html | 1 + general/Contributing.html | 1 + general/License.html | 1 + general/documentation/Cpp.html | 1 + general/documentation/Overview.html | 1 + general/documentation/Python.html | 1 + general/documentation/Sphinx.html | 1 + genindex.html | 3 + group__experimental-gen-ai-attention.html | 156 + index.html | 2 + namespacefbgemm.html | 3276 +++++++++++++++++ namespacemembers.html | 244 ++ namespacemembers_enum.html | 86 + namespacemembers_func.html | 236 ++ objects.inv | Bin 14063 -> 14260 bytes py-modindex.html | 1 + search.html | 1 + search/all_1.js | 6 +- search/all_10.js | 86 +- search/all_11.js | 5 +- search/all_12.js | 29 +- search/all_13.js | 10 +- search/all_14.js | 19 +- search/all_2.js | 21 +- search/all_3.js | 40 +- search/all_4.js | 18 +- search/all_5.js | 15 +- search/all_6.js | 48 +- search/all_7.js | 33 +- search/all_9.js | 31 +- search/all_d.js | 23 +- search/all_f.js | 3 +- search/enums_0.js | 5 + search/enums_1.js | 4 + search/enums_2.js | 4 + search/functions_10.js | 17 +- search/functions_11.js | 8 +- search/functions_12.js | 7 +- search/functions_2.js | 13 +- search/functions_3.js | 8 +- search/functions_4.js | 6 +- search/functions_5.js | 10 +- search/functions_6.js | 47 +- search/functions_7.js | 28 +- search/functions_9.js | 21 +- search/functions_c.js | 6 +- search/functions_e.js | 24 +- search/functions_f.js | 3 +- search/groups_0.js | 6 +- search/groups_3.js | 3 +- search/groups_4.js | 5 +- search/namespaces_0.js | 4 + search/searchdata.js | 24 +- searchindex.js | 2 +- structfbgemm_1_1_b_c_s_r_matrix-members.html | 2 +- structfbgemm_1_1_b_c_s_r_matrix.html | 2 +- ...ctfbgemm_1_1_blocking_factors-members.html | 2 +- structfbgemm_1_1_blocking_factors.html | 2 +- structfbgemm_1_1_packing_traits.html | 4 +- ...emm_1_1_requantization_params-members.html | 2 +- structfbgemm_1_1_requantization_params.html | 2 +- ..._1_tensor_quantization_params-members.html | 2 +- ...fbgemm_1_1_tensor_quantization_params.html | 2 +- structfbgemm_1_1block__type__t-members.html | 2 +- structfbgemm_1_1block__type__t.html | 2 +- structfbgemm_1_1conv__param__t-members.html | 2 +- structfbgemm_1_1conv__param__t.html | 2 +- structfbgemm_1_1is__8bit-members.html | 2 +- structfbgemm_1_1is__8bit.html | 2 +- ...ntization_for_float_params__t-members.html | 2 +- ...1_1requantization_for_float_params__t.html | 2 +- ...m_1_1requantization_params__t-members.html | 2 +- structfbgemm_1_1requantization_params__t.html | 2 +- structfbgemm_1_1simd__info.html | 2 +- structfbgemm_1_1thread__type__t-members.html | 2 +- structfbgemm_1_1thread__type__t.html | 2 +- topics.html | 21 +- 155 files changed, 5895 insertions(+), 313 deletions(-) create mode 100644 _sources/fbgemm_gpu-cpp-api/experimental_ops.rst.txt create mode 100644 dir_34e8510f39fc35193d2c335cf69ac2ab.html create mode 100644 dir_3564159197d660dd468988083a59c7ac.html create mode 100644 dir_5dcc8468a72bed6dcb0f21f3eee5d218.html create mode 100644 dir_99ba4fd65b63105d4d187e7a275d7ae6.html create mode 100644 dir_d0c69fd2abdd01678e72de86d9296855.html create mode 100644 dir_fad263dcd5898f620d525bed679b1c92.html create mode 100644 fbgemm_gpu-cpp-api/experimental_ops.html create mode 100644 group__experimental-gen-ai-attention.html create mode 100644 namespacefbgemm.html create mode 100644 namespacemembers.html create mode 100644 namespacemembers_enum.html create mode 100644 namespacemembers_func.html create mode 100644 search/enums_0.js create mode 100644 search/enums_1.js create mode 100644 search/enums_2.js create mode 100644 search/namespaces_0.js diff --git a/_modules/fbgemm_gpu/docs/examples.html b/_modules/fbgemm_gpu/docs/examples.html index 49e32b5ff..50160594a 100644 --- a/_modules/fbgemm_gpu/docs/examples.html +++ b/_modules/fbgemm_gpu/docs/examples.html @@ -292,6 +292,7 @@
  • Combine Input Operators
  • Layout Transformation Operators
  • Embedding Operators
  • +
  • Experimental Operators
  • FBGEMM_GPU Python API

    FBGEMM_GPU Python API

    FBGEMM_GPU Python API

    FBGEMM_GPU Python API

    FBGEMM_GPU Python API

    FBGEMM_GPU Python API

    FBGEMM_GPU Python API

    FBGEMM_GPU Python API

    FBGEMM_GPU Python API

    FBGEMM_GPU Python API

    FBGEMM_GPU Python API

    FBGEMM_GPU Python API

    FBGEMM_GPU Python API

    FBGEMM_GPU Python API

    FBGEMM_GPU Python API

    FBGEMM_GPU Python API

    FBGEMM_GPU Python API

    FBGEMM_GPU Python API

    FBGEMM_GPU Python API

    diff --git a/namespacefbgemm.html b/namespacefbgemm.html new file mode 100644 index 000000000..bb16814a5 --- /dev/null +++ b/namespacefbgemm.html @@ -0,0 +1,3276 @@ + + + + + + + +fbgemm_gpu: fbgemm Namespace Reference + + + + + + + + + + + +
    +
    + + + + + + +
    +
    fbgemm_gpu +
    +
    +
    + + + + + + + + +
    +
    + + +
    +
    +
    +
    +
    +
    Loading...
    +
    Searching...
    +
    No Matches
    +
    +
    +
    +
    + +
    +
    + +
    fbgemm Namespace Reference
    +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Classes

    struct  BCSRMatrix
     
    struct  block_type_t
     
    struct  BlockingFactors
     
    class  CodeCache
     
    class  CodeGenBase
     
    class  CompressedSparseColumn
     
    struct  conv_param_t
     
    class  DoNothing
     
    class  DoSConvOnInpBuffer
     
    class  DoSpmdmOnInpBuffer
     
    class  ExecuteKernel
     
    class  ExecuteKernel< packingAMatrix, PackBMatrix< int8_t, typename packingAMatrix::accType >, cT, processOutputType >
     
    struct  is_8bit
     
    class  memCopy
     
    class  PackAMatrix
     
    class  PackAWithIm2Col
     
    class  PackAWithQuantRowOffset
     
    class  PackAWithRowOffset
     
    class  PackBMatrix
     
    class  PackedGemmMatrixB
     
    struct  PackingTraits
     
    class  PackMatrix
     
    class  PackWeightMatrixForGConv
     
    class  PackWeightsForConv
     
    class  ReluOutput
     
    struct  requantizationForFloatParams_t
     
    struct  RequantizationParams
     
    struct  requantizationParams_t
     
    class  ReQuantizeForFloat
     
    class  ReQuantizeOutput
     
    class  ScaleOP
     
    struct  simd_info
     
    class  SparseAdaGradSignature
     
    struct  TensorQuantizationParams
     
    struct  thread_type_t
     
    + + + +

    +Enumerations

    enum class  impl_type_t
     
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

    +Functions

    template<typename packingAMatrix , typename packingBMatrix , typename cT , typename processOutputType >
    void fbgemmPacked (PackMatrix< packingAMatrix, typename packingAMatrix::inpType, typename packingAMatrix::accType > &packA, PackMatrix< packingBMatrix, typename packingBMatrix::inpType, typename packingBMatrix::accType > &packB, cT *C, std::int32_t *C_buffer, std::uint32_t ldc, const processOutputType &outProcess, int thread_id, int num_threads, const BlockingFactors *blocking_params=nullptr)
     
    template<typename packed_W , typename outType , bool FUSE_RELU, QuantizationGranularity Q_GRAN, int SPATIAL_DIM = 2, typename BIAS_TYPE = std::int32_t>
    void fbgemmGroupwiseConv (const conv_param_t< SPATIAL_DIM > &conv_param, const std::uint8_t *activations, std::int32_t a_zero_point, std::int32_t *rowOffsetBuf, packed_W &packed_weights, outType *out, std::int32_t *outBuffer, const ReQuantizeOutput< FUSE_RELU, Q_GRAN, BIAS_TYPE > &outProcess, int thread_id, int num_threads)
     
    template<int SPATIAL_DIM = 2>
    int rowOffsetBufferSizeGConv (const conv_param_t< SPATIAL_DIM > &conv_param)
     
    template<typename processOutputType , int SPATIAL_DIM = 2, typename ACC_T = std::int32_t>
    int fbgemmConv (const conv_param_t< SPATIAL_DIM > &conv_p, const std::uint8_t *activations, PackWeightsForConv< SPATIAL_DIM, std::int8_t, ACC_T > &packed_weights, typename processOutputType::outType *out, std::int32_t *outBuffer, processOutputType &outProcess, int thread_id, int num_threads, const BlockingFactors *blocking_params=nullptr)
     
    template<int SPATIAL_DIM = 2, typename ACC_T = std::int32_t>
    optimized_conv_t ConvFastPath (const conv_param_t< SPATIAL_DIM > &conv_p)
     
    void FloatToBfloat16_ref (const float *src, bfloat16 *dst, size_t size)
     
    void Bfloat16ToFloat_ref (const bfloat16 *src, float *dst, size_t size)
     
    void FloatToBfloat16_simd (const float *src, bfloat16 *dst, size_t size)
     
    void Bfloat16ToFloat_simd (const bfloat16 *src, float *dst, size_t size)
     
    void FloatToFloat16_ref (const float *src, float16 *dst, size_t size, bool do_clip=false)
     
    void Float16ToFloat_ref (const float16 *src, float *dst, size_t size)
     
    void FloatToFloat16_simd (const float *src, float16 *dst, size_t size, bool do_clip=false)
     
    void Float16ToFloat_simd (const float16 *src, float *dst, size_t size)
     
    template<typename InType , typename IndexType , typename OffsetType = std::int32_t, typename OutType = float, bool THREAD_LOCAL = false>
    EmbeddingSpMDMKernelSignature< InType, IndexType, OffsetType, OutType >::Type GenerateEmbeddingSpMDM (const std::int64_t block_size, bool has_weight, bool normalize_by_lengths, int prefetch=16, bool is_weight_positional=false, bool use_offsets=true, bool is_bf16_out=false, bool is_bf16_in=false)
     
    template<typename InType , typename IndexType , typename OffsetType = std::int32_t, typename OutType = float, bool THREAD_LOCAL = false>
    EmbeddingSpMDMKernelSignature< InType, IndexType, OffsetType, OutType >::Type GenerateEmbeddingSpMDMWithStrides (const std::int64_t block_size, bool has_weight, bool normalize_by_lengths, int prefetch=16, bool is_weight_positional=false, bool use_offsets=true, std::int64_t output_stride=-1, std::int64_t input_stride=-1, bool scale_bias_last=true, bool no_bag=false, bool is_bf16_out=false, bool is_bf16_in=false)
     
    template<typename IndexType , typename OffsetType = std::int32_t, typename OutType = float>
    EmbeddingSpMDMKernelSignature< std::uint8_t, IndexType, OffsetType, OutType >::Type GenerateEmbeddingSpMDMNBit (int bit_rate, const std::int64_t block_size, bool has_weight, bool normalize_by_lengths, int prefetch=16, bool is_weight_positional=false, bool use_offsets=true)
     
    template<typename IndexType , typename OffsetType = std::int32_t, typename OutType = float, bool THREAD_LOCAL = false>
    EmbeddingSpMDMKernelSignature< std::uint8_t, IndexType, OffsetType, OutType >::Type GenerateEmbeddingSpMDMNBitWithStrides (int bit_rate, const std::int64_t block_size, bool has_weight, bool normalize_by_lengths, int prefetch=16, bool is_weight_positional=false, bool use_offsets=true, std::int64_t output_stride=-1, std::int64_t input_stride=-1, bool scale_bias_last=true, bool is_bf16_out=false)
     
    template<typename IndexType , typename OffsetType = std::int32_t, typename OutType = float>
    EmbeddingSpMDMKernelSignature< std::uint8_t, IndexType, OffsetType, OutType >::Type GenerateEmbeddingSpMDMFP8WithStrides (const std::int64_t block_size, bool normalize_by_lengths, bool is_weight_positional=false, bool use_offsets=true, std::int64_t output_stride=-1, std::int64_t input_stride=-1, int exponent_bits=4, int exponent_bias=7, bool is_bf16_out=false)
     
    template<typename InType , typename IndexType , typename OffsetType = std::int32_t>
    EmbeddingSpMDMRowWiseSparseKernelSignature< InType, IndexType, OffsetType >::Type GenerateEmbeddingSpMDMRowWiseSparse (const std::int64_t block_size, bool has_weight, bool normalize_by_lengths, int prefetch=16, bool is_weight_positional=false, bool use_offsets=true)
     
    template<typename IndexType , typename OffsetType = std::int32_t>
    EmbeddingSpMDMRowWiseSparseKernelSignature< std::uint8_t, IndexType, OffsetType >::Type GenerateEmbeddingSpMDMNBitRowWiseSparse (int bit_rate, const std::int64_t block_size, bool has_weight, bool normalize_by_lengths, int prefetch=16, bool is_weight_positional=false, bool use_offsets=true)
     
    template<typename IndexType , typename OffsetType = std::int32_t, typename DataType = float>
    RowWiseSparseAdaGradFusedSignature< IndexType, OffsetType, DataType >::Type GenerateRowWiseSparseAdaGradFused (int block_size, int prefetch=16, bool use_offsets=true, bool use_stochastic_rounding=true, int grad_stride=-1)
     
    void PackA (int nrow, int ncol, const float *from, int ldim, float *to)
     
    template<QuantizationGranularity Q_GRAN, typename BIAS_TYPE = std::int32_t>
    void depthwise_2d_same_pad (int N, int H, int W, int IC, int OC, int stride_h, int stride_w, std::int32_t A_zero_point, const std::uint8_t *A, const std::int32_t *B_zero_point, const PackedDepthWiseConvMatrix &Bp, const float *C_multiplier, std::int32_t C_zero_point, std::uint8_t *C, const std::int32_t *col_offsets, const BIAS_TYPE *bias, bool fuse_relu=false, const float *act_times_w_scale=nullptr, int thread_id=0, int num_threads=1)
     
    template<QuantizationGranularity Q_GRAN, typename BIAS_TYPE = std::int32_t>
    void depthwise_3d_same_pad (const conv_param_t< 3 > &conv_p, std::int32_t A_zero_point, const std::uint8_t *A, const std::int32_t *B_zero_point, const PackedDepthWiseConvMatrix &Bp, const float *C_multiplier, std::int32_t C_zero_point, std::uint8_t *C, const std::int32_t *col_offsets, const BIAS_TYPE *bias, bool fuse_relu=false, const float *act_times_w_scale=nullptr, int thread_id=0, int num_threads=1)
     
    void SparseDenseMM (int M, int N, const int *row_ptr, const int *col_idx, const float *values, const float *B, int ldb, float *C, int ldc, bool accum=false)
     
    template<typename T , bool LEGACY = true>
    Quantize (float src, std::int32_t zero_point, float scale, int result_precision, bool result_is_signed=std::is_signed< T >::value)
     
    template<typename T , layout_t LAYOUT = layout_t::KCX>
    void QuantizeGroupwise (const float *src, int K, int C, int X, int G, const float *scales, const std::int32_t *zero_points, T *dst)
     
    template<typename T >
    void FusedQuantizeDequantize (const float *src, float *dst, std::int64_t len, const TensorQuantizationParams &qparams, int thread_id=0, int num_threads=1, float noise_ratio=0.0f)
     
    template<typename InputType >
    void FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf (int bit_rate, const InputType *input, size_t input_rows, int input_columns, std::uint8_t *output)
     
    template<typename OutputType >
    void FusedNBitRowwiseQuantizedSBHalfToFloatOrHalf (int bit_rate, const uint8_t *input, size_t input_rows, int input_columns, OutputType *output)
     
    template<typename InputType >
    void FloatOrHalfToFused8BitRowwiseQuantizedSBFloat (const InputType *input, size_t input_rows, int input_columns, std::uint8_t *output)
     
    template<typename OutputType >
    void Fused8BitRowwiseQuantizedSBFloatToFloatOrHalf (const uint8_t *input, size_t input_rows, int input_columns, OutputType *output)
     
    template<typename InputType >
    void FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfRef (int bit_rate, const InputType *input, size_t input_rows, int input_columns, std::uint8_t *output)
     
    template<typename InputType >
    void FloatOrHalfToFused8BitRowwiseQuantizedSBFloatRef (const InputType *input, size_t input_rows, int input_columns, std::uint8_t *output)
     
    template<typename OutputType >
    void FusedNBitRowwiseQuantizedSBHalfToFloatOrHalfRef (int bit_rate, const uint8_t *input, size_t input_rows, int input_columns, OutputType *output)
     
    template<typename OutputType >
    void Fused8BitRowwiseQuantizedSBFloatToFloatOrHalfRef (const uint8_t *input, size_t input_rows, int input_columns, OutputType *output)
     
    uint32_t Xor128 (void)
     
    template<bool A_SYMMETRIC, bool B_SYMMETRIC, QuantizationGranularity Q_GRAN, bool HAS_BIAS, bool FUSE_RELU, typename BIAS_TYPE = std::int32_t, bool DIRECT = false>
    void requantizeOutputProcessingAvx2 (std::uint8_t *out, const std::int32_t *inp, const block_type_t &block, int ld_out, int ld_in, const requantizationParams_t< BIAS_TYPE > &r)
     
    template<bool A_SYMMETRIC, bool B_SYMMETRIC, QuantizationGranularity Q_GRAN, bool HAS_BIAS, bool FUSE_RELU, int C_PER_G, typename BIAS_TYPE = std::int32_t>
    void requantizeOutputProcessingGConvAvx512 (std::uint8_t *out, const std::int32_t *inp, const block_type_t &block, int ld_out, int ld_in, const requantizationParams_t< BIAS_TYPE > &r)
     
    template<typename T >
    int compare_buffers (const T *ref, const T *test, int m, int n, int ld, size_t max_mismatches_to_report, float atol=1e-3)
     
    template<typename T >
    void printMatrix (matrix_op_t trans, const T *inp, size_t R, size_t C, size_t ld, std::string name)
     
    template<typename T >
    void transpose_simd (int64_t M, int64_t N, const T *src, int64_t ld_src, T *dst, int64_t ld_dst)
     
    void fbgemmForceIsa (inst_set_t)
     
    void fbgemmEnableAvx512Ymm (bool)
     
    inst_set_t fbgemmInstructionSet ()
     
    int fbgemmGet2DPartition (int m, int n, int nthreads, int n_align, double aspect_ratio)
     
    void fbgemmPartition1D (int thread_id, int num_threads, std::int64_t total_work, std::int64_t &start, std::int64_t &end)
     
    void fbgemmPartition1DBlocked (int thread_id, int num_threads, std::int64_t total_work, int block_size, std::int64_t &start, std::int64_t &end)
     
    bool is_autovec_disabled ()
     
    template<inst_set_t instSet, typename T , typename std::enable_if< instSet==inst_set_t::avx2, int >::type = 0>
    void gen16BitVectorOne (x86::Emitter *a, T dest)
     
    template<inst_set_t instSet, typename T , typename std::enable_if< instSet==inst_set_t::avx2, int >::type = 0>
    void emitLoadDWord (x86::Emitter *a, T dest, const x86::Mem &ptr)
     
    template<inst_set_t instSet, typename T , typename std::enable_if< instSet==inst_set_t::avx512||instSet==inst_set_t::avx512_ymm||instSet==inst_set_t::avx512_vnni||instSet==inst_set_t::avx512_vnni_ymm, int >::type = 0>
    void emitExtractHalfVector (x86::Emitter *a, x86::Ymm half, const x86::Zmm vec, int idx)
     
    template<typename T , typename std::enable_if< std::is_same< T, x86::Ymm >::value, int >::type = 0>
    void gen8BitVectorOne (x86::Emitter *a, T dest)
     
    template<inst_set_t INST_SET, typename std::enable_if< INST_SET==inst_set_t::avx2||INST_SET==inst_set_t::avx512, int >::type = 0>
    void genU8I8S32FMA (x86::Emitter *a, typename simd_info< INST_SET >::vec_reg_t aReg, typename simd_info< INST_SET >::vec_reg_t bReg, typename simd_info< INST_SET >::vec_reg_t cReg, typename simd_info< INST_SET >::vec_reg_t oneReg16Bit, typename simd_info< INST_SET >::vec_reg_t tmpReg)
     
    template<inst_set_t INST_SET, typename std::enable_if< INST_SET==inst_set_t::avx2||INST_SET==inst_set_t::avx512, int >::type = 0>
    void genU8Sum4 (x86::Emitter *a, typename simd_info< INST_SET >::vec_reg_t src, typename simd_info< INST_SET >::vec_reg_t dest, typename simd_info< INST_SET >::vec_reg_t oneReg16Bit, typename simd_info< INST_SET >::vec_reg_t tmpReg)
     
    template<typename T >
    void genU8Sum8 (x86::Emitter *a, T src, T dest, T tmpReg)
     
    void initCRegs (x86::Emitter *a, int rowRegs, int colRegs)
     
    void requantize_u8acc32_ref (int M, int N, int ld, const std::int32_t *inp, std::uint8_t *out, std::int32_t C_multiplier, std::int32_t C_right_shift, std::int32_t C_zero_point, std::int32_t A_zero_point, std::int32_t B_zero_point, const std::int32_t *row_offsets, const std::int32_t *col_offsets, const std::int32_t *bias, bool fuse_relu=false)
     
    void requantize_u8acc32_ref (int M, int N, int ld, const std::int32_t *inp, std::uint8_t *out, const float *C_multiplier, std::int32_t C_zero_point, std::int32_t A_zero_point, const std::int32_t *B_zero_point, const std::int32_t *row_offsets, const std::int32_t *col_offsets, const std::int32_t *bias, int ncols_per_quant_group, bool fuse_relu=false)
     
    void col_offsets_with_zero_pt_s8acc32_ref (int K, int N, int ld, const std::int8_t *Bint8, const std::int32_t *B_zero_point, std::int32_t *col_offsets, int ncols_per_quant_group)
     
    void spmdm_ref (int M, const std::uint8_t *A, int lda, CompressedSparseColumn &B, bool accumulation, std::int32_t *C, int ldc, int groups=1)
     
    template<typename IndexType >
    int sparse_adagrad_ref (int num_rows, int block_size, std::uint64_t param_size, float *w, const float *g, float *h, const IndexType *indices, float epsilon, float lr, float weight_decay=0.f, const double *counter=nullptr, const int64_t counter_halflife=0)
     
    template<typename IndexType >
    int rowwise_sparse_adagrad_ref (int num_rows, int block_size, std::uint64_t param_size, float *w, const float *g, float *h, const IndexType *indices, float epsilon, float lr, float weight_decay=0.f, const double *counter=nullptr, const int64_t counter_halflife=0)
     
    template<typename T >
    void transpose_ref (int64_t M, int64_t N, const T *src, int64_t ld_src, T *dst, int64_t ld_dst)
     
    +

    Detailed Description

    +

    Top level include file for FBGEMM.

    +

    Enumeration Type Documentation

    + +

    ◆ impl_type_t

    + +
    +
    + + + + + +
    + + + + +
    enum class impl_type_t
    +
    +strong
    +
    + +

    Typed enum for implementation type.

    +

    ref is reference and opt is optimized.

    + +
    +
    +

    Function Documentation

    + +

    ◆ Bfloat16ToFloat_ref()

    + +
    +
    + + + + + + + + + + + + + + + + +
    void Bfloat16ToFloat_ref (const bfloat16 * src,
    float * dst,
    size_t size )
    +
    +

    @ Transform all entries in a matrix from bfloat16 to fp32: reference implementation.

    + +
    +
    + +

    ◆ Bfloat16ToFloat_simd()

    + +
    +
    + + + + + + + + + + + + + + + + +
    void Bfloat16ToFloat_simd (const bfloat16 * src,
    float * dst,
    size_t size )
    +
    +

    @ Transform all entries in a matrix from bfloat16 to fp32: simd implementation.

    + +
    +
    + +

    ◆ col_offsets_with_zero_pt_s8acc32_ref()

    + +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    void col_offsets_with_zero_pt_s8acc32_ref (int K,
    int N,
    int ld,
    const std::int8_t * Bint8,
    const std::int32_t * B_zero_point,
    std::int32_t * col_offsets,
    int ncols_per_quant_group )
    +
    + +

    Reference implementation to compute adjusted col_offsets (sum of columns of B and adjusted with B_zero_point)

    +
    Parameters
    + + +
    ncols_per_quant_groupsee ncols_per_quant_group in requantize_u8acc32_ref
    +
    +
    + +
    +
    + +

    ◆ compare_buffers()

    + +
    +
    +
    +template<typename T >
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    int compare_buffers (const T * ref,
    const T * test,
    int m,
    int n,
    int ld,
    size_t max_mismatches_to_report,
    float atol )
    +
    + +

    A function to compare data in two buffers for closeness/equality.

    +

    Compare the reference and test result matrix to check the correctness.

    +
    Parameters
    + + + + + + + + +
    refThe buffer for the reference result matrix.
    testThe buffer for the test result matrix.
    mThe height of the reference and test result matrix.
    nThe width of the reference and test result matrix.
    ldThe leading dimension of the reference and test result matrix.
    max_mismatches_to_reportThe maximum number of tolerable mismatches to report.
    atolThe tolerable error.
    +
    +
    +
    Return values
    + + + +
    falseIf the number of mismatches for reference and test result matrix exceeds max_mismatches_to_report.
    trueIf the number of mismatches for reference and test result matrix is tolerable.
    +
    +
    + +
    +
    + +

    ◆ ConvFastPath()

    + +
    +
    +
    +template<int SPATIAL_DIM = 2, typename ACC_T = std::int32_t>
    + + + + + + + +
    optimized_conv_t ConvFastPath (const conv_param_t< SPATIAL_DIM > & conv_p)
    +
    + +

    Returns which fast path to take.

    +
    Template Parameters
    + + +
    SPATIAL_DIMIt's 2 for 2D convolutions and 3 for 3D convolutions.
    +
    +
    +
    Returns
    optimized_conv_t::depthwise, optimized_conv_t::groupwise or optimized_conv_t::im2col
    + +
    +
    + +

    ◆ depthwise_2d_same_pad()

    + +
    +
    +
    +template<QuantizationGranularity Q_GRAN, typename BIAS_TYPE = std::int32_t>
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    void depthwise_2d_same_pad (int N,
    int H,
    int W,
    int IC,
    int OC,
    int stride_h,
    int stride_w,
    std::int32_t A_zero_point,
    const std::uint8_t * A,
    const std::int32_t * B_zero_point,
    const PackedDepthWiseConvMatrix & Bp,
    const float * C_multiplier,
    std::int32_t C_zero_point,
    std::uint8_t * C,
    const std::int32_t * col_offsets,
    const BIAS_TYPE * bias,
    bool fuse_relu = false,
    const float * act_times_w_scale = nullptr,
    int thread_id = 0,
    int num_threads = 1 )
    +
    +

    Depth-wise convolution that results in the same output feature size as the input feature. That is PAD_T = PAD_B = (R - 1) / 2 and PAD_L = PAD_R = (S - 1) / 2. This function also does requantization.

    Parameters
    + + + +
    col_offsetsnullptr if col_offsets are folded into bias
    act_times_w_scaleOnly used if BIAS_TYPE is float, i.e., bias is unquantized.
    +
    +
    + +
    +
    + +

    ◆ depthwise_3d_same_pad()

    + +
    +
    +
    +template<QuantizationGranularity Q_GRAN, typename BIAS_TYPE = std::int32_t>
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    void depthwise_3d_same_pad (const conv_param_t< 3 > & conv_p,
    std::int32_t A_zero_point,
    const std::uint8_t * A,
    const std::int32_t * B_zero_point,
    const PackedDepthWiseConvMatrix & Bp,
    const float * C_multiplier,
    std::int32_t C_zero_point,
    std::uint8_t * C,
    const std::int32_t * col_offsets,
    const BIAS_TYPE * bias,
    bool fuse_relu = false,
    const float * act_times_w_scale = nullptr,
    int thread_id = 0,
    int num_threads = 1 )
    +
    +
    Parameters
    + + +
    col_offsetsnullptr if col_offsets are folded into bias
    +
    +
    + +
    +
    + +

    ◆ emitExtractHalfVector()

    + +
    +
    +
    +template<inst_set_t instSet, typename T , typename std::enable_if< instSet==inst_set_t::avx512||instSet==inst_set_t::avx512_ymm||instSet==inst_set_t::avx512_vnni||instSet==inst_set_t::avx512_vnni_ymm, int >::type = 0>
    + + + + + + + + + + + + + + + + + + + + + +
    void emitExtractHalfVector (x86::Emitter * a,
    x86::Ymm half,
    const x86::Zmm vec,
    int idx )
    +
    + +

    Emit partial extract from Wide regiter to Half Register, eg. Zmm -> Ymm or Ymm -> Xmm.

    +
    Template Parameters
    + + +
    instSetinstruction set to be used
    +
    +
    +
    Parameters
    + + + + +
    halfDestination (half) vector register
    vecSource (full) vector register
    idxIndex of of the half vector 0 or 1
    +
    +
    + +
    +
    + +

    ◆ emitLoadDWord()

    + +
    +
    +
    +template<inst_set_t instSet, typename T , typename std::enable_if< instSet==inst_set_t::avx2, int >::type = 0>
    + + + + + + + + + + + + + + + + +
    void emitLoadDWord (x86::Emitter * a,
    T dest,
    const x86::Mem & ptr )
    +
    + +

    Emit instruction do load 32-bit integer. AVX512 has different instrunction to load registers with index >= 16.

    +
    Template Parameters
    + + +
    TRegister type of destination, e.g., x86::Ymm or x86::Zmm
    +
    +
    +
    Parameters
    + + +
    destDestination vector register
    +
    +
    + +
    +
    + +

    ◆ fbgemmConv()

    + +
    +
    +
    +template<typename processOutputType , int SPATIAL_DIM = 2, typename ACC_T = std::int32_t>
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    int fbgemmConv (const conv_param_t< SPATIAL_DIM > & conv_p,
    const std::uint8_t * activations,
    PackWeightsForConv< SPATIAL_DIM, std::int8_t, ACC_T > & packed_weights,
    typename processOutputType::outType * out,
    std::int32_t * outBuffer,
    processOutputType & outProcess,
    int thread_id,
    int num_threads,
    const BlockingFactors * blocking_params = nullptr )
    +
    + +

    Performs convolution using fastest path available.

    +
    Template Parameters
    + + +
    SPATIAL_DIMIt's 2 for 2D convolutions and 3 for 3D convolutions.
    +
    +
    + +
    +
    + +

    ◆ fbgemmEnableAvx512Ymm()

    + +
    +
    + + + + + + + +
    void fbgemmEnableAvx512Ymm (bool flag)
    +
    + +

    Enable AVX512-256 path for Intel(r) Xeon(r) D servers.

    +

    Enables AVX512-256 if appriate. Inteded for Skylake based Xeon-D processors, wherein AXV512-256 is preferred due to higher Turbo frequencis.

    +
    Parameters
    + + +
    flagTrue enables / False disables
    +
    +
    + +
    +
    + +

    ◆ fbgemmForceIsa()

    + +
    +
    + + + + + + + +
    void fbgemmForceIsa (inst_set_t isa)
    +
    + +

    Explicitly set instruction set to be used.

    +

    Force specific architecure to for GEMM kernel execution overides FBGEMM_ENABLE_AVX512_256 env. variable.

    +
    Parameters
    + + +
    isathe ISA to enforce, supported optionsi AVX2 inst_set_t::avx2 AVX512 inst_set_t::avx512 AVX512_E1 inst_set_t::avx512_vnni AVX512_256 inst_set_t::avx512_ymm AVX512_E1_256 inst_set_t::avx512_vnni_ymm
    +
    +
    + +
    +
    + +

    ◆ fbgemmGet2DPartition()

    + +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + +
    int fbgemmGet2DPartition (int m,
    int n,
    int nthreads,
    int n_align,
    double aspect_ratio )
    +
    + +

    A heuristic algorithm to partition the threads across m and n dimensions for parallelization, ensuring the ratio between the number of rows allocated to each thread in the m dimension and the number of columns allocated to each thread in the n dimension is approximately aspect_ratio.

    +

    The less aspect_ratio is, the more favorable it is to parallelize the m dimension over the n dimension.

    + +
    +
    + +

    ◆ fbgemmGroupwiseConv()

    + +
    +
    +
    +template<typename packed_W , typename outType , bool FUSE_RELU, QuantizationGranularity Q_GRAN, int SPATIAL_DIM = 2, typename BIAS_TYPE = std::int32_t>
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    void fbgemmGroupwiseConv (const conv_param_t< SPATIAL_DIM > & conv_param,
    const std::uint8_t * activations,
    std::int32_t a_zero_point,
    std::int32_t * rowOffsetBuf,
    packed_W & packed_weights,
    outType * out,
    std::int32_t * outBuffer,
    const ReQuantizeOutput< FUSE_RELU, Q_GRAN, BIAS_TYPE > & outProcess,
    int thread_id,
    int num_threads )
    +
    + +

    Perform small-channels-per-group groupwise convolution Note: Currently threading is not supported. This function does nothing for thread_ids > 0, i.e., returns early.

    +
    Parameters
    + + +
    rowOffsetBufnullptr if B uses symmetric quantization Note: Currently threading is not supported. This function does nothing for thread_ids > 0, i.e., returns early.
    +
    +
    + +
    +
    + +

    ◆ fbgemmInstructionSet()

    + +
    +
    + + + + + + + +
    inst_set_t fbgemmInstructionSet ()
    +
    + +

    Retrieve current CPU instruction set.

    +

    Determine the best available x86 machine ISA to be used for GEMM kernels. FBGEMM_ENABLE_AVX512_256 env. or fbgemmForceIsa() are set forces to specific architecture if supported by the processor. Enforcing on Skylake to AVX2 will execute AVX2 version of the kernel However, enforcing AVX512-256 on Broadwell will fail, and AVX2 version of the kernels will be executed.

    + +
    +
    + +

    ◆ fbgemmPacked()

    + +
    +
    +
    +template<typename packingAMatrix , typename packingBMatrix , typename cT , typename processOutputType >
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    void fbgemmPacked (PackMatrix< packingAMatrix, typename packingAMatrix::inpType, typename packingAMatrix::accType > & packA,
    PackMatrix< packingBMatrix, typename packingBMatrix::inpType, typename packingBMatrix::accType > & packB,
    cT * C,
    std::int32_t * C_buffer,
    std::uint32_t ldc,
    const processOutputType & outProcess,
    int thread_id,
    int num_threads,
    const BlockingFactors * blocking_params = nullptr )
    +
    +

    Matrix B must be prepacked. For matrix A, packA.pack function is called to pack it.

    +
    Template Parameters
    + + + + + +
    packingAMatrixprocessing of A matrix while packing, e.g., PackAWithQuantRowOffset
    packingBMatrixprocessing of B matrix while packing, e.g., pre-multiply by alpha
    cTdata type of C matrix
    processOutputTypefurther processing of outputs, e.g., Relu
    +
    +
    + +
    +
    + +

    ◆ fbgemmPartition1D()

    + +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + +
    void fbgemmPartition1D (int thread_id,
    int num_threads,
    std::int64_t total_work,
    std::int64_t & start,
    std::int64_t & end )
    +
    + +

    Partition work across given number of threads.

    +
    Parameters
    + + + +
    startGiven thread_id should execute starting from the index start
    stopGiven thread_id should stop executing at the index stop
    +
    +
    +

    i.e., the loop should be equivalent to for(int i = start; i < end; ++i)

    + +
    +
    + +

    ◆ fbgemmPartition1DBlocked()

    + +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    void fbgemmPartition1DBlocked (int thread_id,
    int num_threads,
    std::int64_t total_work,
    int block_size,
    std::int64_t & start,
    std::int64_t & end )
    +
    + +

    Partition work across given number of threads in blocks of size block_size. Each thread gets a multiple of block_size work or nothing, except the last one. The last one might receive the fringe case.

    +
    Parameters
    + + + +
    startGiven thread_id should execute starting from the index start
    stopGiven thread_id should stop executing at the index stop
    +
    +
    +

    The loop can be equivalent to for(int i = start; i < end; i+=block_size) except for the last thread. (i.e., thread_id = num_threads - 1)

    +

    Example 1: block_size = 2, num_threads = 2 total_work start(th 0) end(th 0) start(th 1) end(th 1) 4 0 2 2 4 5 0 2 2 5

    +

    Example 2: block_size = 2, num_threads = 3 total_work start(th 0) end(th 0) start(th 1) end(th 1) 4 0 2 2 4 5 0 2 2 4

    +

    total_work start(th 2) end(th 2) 4 4 4 5 4 5

    +

    Example 3: block_size = 2, num_threads = 4 total_work start(th 0) end(th 0) start(th 1) end(th 1) 4 0 2 2 4 5 0 2 2 4

    +

    total_work start(th 2) end(th 2) start(th 3) end(th 3) 4 4 4 4 4 5 4 4 4 5

    + +
    +
    + +

    ◆ Float16ToFloat_ref()

    + +
    +
    + + + + + + + + + + + + + + + + +
    void Float16ToFloat_ref (const float16 * src,
    float * dst,
    size_t size )
    +
    +

    @ Transform all entries in a matrix from float16 to fp32: reference implementation.

    + +
    +
    + +

    ◆ Float16ToFloat_simd()

    + +
    +
    + + + + + + + + + + + + + + + + +
    void Float16ToFloat_simd (const float16 * src,
    float * dst,
    size_t size )
    +
    +

    @ Transform all entries in a matrix from float16 to fp32: simd implementation.

    + +
    +
    + +

    ◆ FloatOrHalfToFused8BitRowwiseQuantizedSBFloat()

    + +
    +
    +
    +template<typename InputType >
    + + + + + + + + + + + + + + + + + + + + + +
    void FloatOrHalfToFused8BitRowwiseQuantizedSBFloat (const InputType * input,
    size_t input_rows,
    int input_columns,
    std::uint8_t * output )
    +
    +

    Convert float or half inputs to rowwise quantized (8-bit) outputs. Scale and Bias are in float. Each row's Scale and Bias are stored in the row itself (fused) at the end.

    +

    This version intentionally supports only 8-bit because we want to discourage the usage of float scale and bias with 2 and 4 bit cases as that diminishes the overall memory savings.

    + +
    +
    + +

    ◆ FloatOrHalfToFused8BitRowwiseQuantizedSBFloatRef()

    + +
    +
    +
    +template<typename InputType >
    + + + + + + + + + + + + + + + + + + + + + +
    void FloatOrHalfToFused8BitRowwiseQuantizedSBFloatRef (const InputType * input,
    size_t input_rows,
    int input_columns,
    std::uint8_t * output )
    +
    +

    Same as FloatOrHalfToFused8BitRowwiseQuantizedSBFloat but unoptimized. This should not be called directly except in testing.

    + +
    +
    + +

    ◆ FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfRef()

    + +
    +
    +
    +template<typename InputType >
    + + + + + + + + + + + + + + + + + + + + + + + + + + +
    void FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfRef (int bit_rate,
    const InputType * input,
    size_t input_rows,
    int input_columns,
    std::uint8_t * output )
    +
    +

    Same as ToFusedNBitRowwiseQuantizedSBHalf but unoptimized. This should not be called directly except in testing.

    + +
    +
    + +

    ◆ FloatToBfloat16_ref()

    + +
    +
    + + + + + + + + + + + + + + + + +
    void FloatToBfloat16_ref (const float * src,
    bfloat16 * dst,
    size_t size )
    +
    +

    @ Transform all entries in a matrix from fp32 to bfloat16: reference implementation.

    + +
    +
    + +

    ◆ FloatToBfloat16_simd()

    + +
    +
    + + + + + + + + + + + + + + + + +
    void FloatToBfloat16_simd (const float * src,
    bfloat16 * dst,
    size_t size )
    +
    +

    @ Transform all entries in a matrix from fp32 to bfloat16: simd implementation.

    + +
    +
    + +

    ◆ FloatToFloat16_ref()

    + +
    +
    + + + + + + + + + + + + + + + + + + + + + +
    void FloatToFloat16_ref (const float * src,
    float16 * dst,
    size_t size,
    bool do_clip = false )
    +
    +

    @ Transform all entries in a matrix from fp32 to float16: reference implementation.

    +
    Parameters
    + + +
    do_clipif true we saturate to fp16 min and max instead of generating infinities.
    +
    +
    + +
    +
    + +

    ◆ FloatToFloat16_simd()

    + +
    +
    + + + + + + + + + + + + + + + + + + + + + +
    void FloatToFloat16_simd (const float * src,
    float16 * dst,
    size_t size,
    bool do_clip = false )
    +
    +

    @ Transform all entries in a matrix from fp32 to float16: simd implementation.

    +
    Parameters
    + + +
    do_clipif true we saturate to fp16 min and max instead of generating infinities.
    +
    +
    + +
    +
    + +

    ◆ Fused8BitRowwiseQuantizedSBFloatToFloatOrHalf()

    + +
    +
    +
    +template<typename OutputType >
    + + + + + + + + + + + + + + + + + + + + + +
    void Fused8BitRowwiseQuantizedSBFloatToFloatOrHalf (const uint8_t * input,
    size_t input_rows,
    int input_columns,
    OutputType * output )
    +
    +

    Convert fused rowwise quantized (8-bit) inputs to float or half outputs. Scale and Bias are in float. Each row's Scale and Bias are stored in the row itself (fused) at the end.

    +

    This version intentionally supports only 8-bit because the corresponding quantize version only supports 8-bit.

    + +
    +
    + +

    ◆ Fused8BitRowwiseQuantizedSBFloatToFloatOrHalfRef()

    + +
    +
    +
    +template<typename OutputType >
    + + + + + + + + + + + + + + + + + + + + + +
    void Fused8BitRowwiseQuantizedSBFloatToFloatOrHalfRef (const uint8_t * input,
    size_t input_rows,
    int input_columns,
    OutputType * output )
    +
    +

    Same as Fused8BitRowwiseQuantizedSBFloatToFloatOrHalf but unoptimized. This should not be called directly except in testing.

    + +
    +
    + +

    ◆ FusedNBitRowwiseQuantizedSBHalfToFloatOrHalf()

    + +
    +
    +
    +template<typename OutputType >
    + + + + + + + + + + + + + + + + + + + + + + + + + + +
    void FusedNBitRowwiseQuantizedSBHalfToFloatOrHalf (int bit_rate,
    const uint8_t * input,
    size_t input_rows,
    int input_columns,
    OutputType * output )
    +
    +

    Convert fused rowwise quantized inputs to float (fp32 or fp16). bitrate specifies the number of bits in quantized input. Scale and Bias are in fp16. Each row's Scale and Bias are stored in the row itself (fused) at the end.

    +
    Parameters
    + + +
    bit_ratecan be 2, 4, or 8
    +
    +
    + +
    +
    + +

    ◆ FusedNBitRowwiseQuantizedSBHalfToFloatOrHalfRef()

    + +
    +
    +
    +template<typename OutputType >
    + + + + + + + + + + + + + + + + + + + + + + + + + + +
    void FusedNBitRowwiseQuantizedSBHalfToFloatOrHalfRef (int bit_rate,
    const uint8_t * input,
    size_t input_rows,
    int input_columns,
    OutputType * output )
    +
    +

    Same as FusedNBitRowwiseQuantizedSBHalfToFloat but unoptimized. This should not be called directly except in testing.

    + +
    +
    + +

    ◆ gen16BitVectorOne()

    + +
    +
    +
    +template<inst_set_t instSet, typename T , typename std::enable_if< instSet==inst_set_t::avx2, int >::type = 0>
    + + + + + + + + + + + +
    void gen16BitVectorOne (x86::Emitter * a,
    T dest )
    +
    + +

    Create instruction sequence to generate 16-bit 1s.

    +
    Template Parameters
    + + +
    TRegister type of destination, e.g., x86::Ymm or x86::Zmm
    +
    +
    +
    Parameters
    + + +
    destOnce the instruction sequence is executed, dest[0:15] will have 0x0001, dest[16:31] will have 0x0001 and so on
    +
    +
    + +
    +
    + +

    ◆ gen8BitVectorOne()

    + +
    +
    +
    +template<typename T , typename std::enable_if< std::is_same< T, x86::Ymm >::value, int >::type = 0>
    + + + + + + + + + + + +
    void gen8BitVectorOne (x86::Emitter * a,
    T dest )
    +
    + +

    Create instruction sequence to generate 8-bit 1s.

    +
    Template Parameters
    + + +
    TRegister type of destination, e.g., x86::Ymm or x86::Zmm
    +
    +
    +
    Parameters
    + + +
    destOnce the instruction sequence is executed, dest[0:7] will have 0x01, dest[8:15] will have 0x01 and so on
    +
    +
    + +
    +
    + +

    ◆ GenerateEmbeddingSpMDM()

    + +
    +
    +
    +template<typename InType , typename IndexType , typename OffsetType = std::int32_t, typename OutType = float, bool THREAD_LOCAL = false>
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    EmbeddingSpMDMKernelSignature< InType, IndexType, OffsetType, OutType >::Type GenerateEmbeddingSpMDM (const std::int64_t block_size,
    bool has_weight,
    bool normalize_by_lengths,
    int prefetch = 16,
    bool is_weight_positional = false,
    bool use_offsets = true,
    bool is_bf16_out = false,
    bool is_bf16_in = false )
    +
    +
    Template Parameters
    + + + + +
    InTypecan be float, float16, or uint8_t
    IndexTypecan be int32_t or int64_t
    IndexTypecan be int32_t or int64_t
    +
    +
    +
    Parameters
    + + +
    use_offsetsIf true, the generated code assumes we will pass offsets instead of lengths that confirms PyTorch EmbeddingBag interface. In this case, the length of offsets array should be output_size + 1 and offsets[output_size] should be index_size. If false, the generate code assumes we will pass lengths that confirms Caffe2 SparseLengthsSum interface.
    +
    +
    + +
    +
    + +

    ◆ GenerateEmbeddingSpMDMFP8WithStrides()

    + +
    +
    +
    +template<typename IndexType , typename OffsetType = std::int32_t, typename OutType = float>
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    EmbeddingSpMDMKernelSignature< std::uint8_t, IndexType, OffsetType, OutType >::Type GenerateEmbeddingSpMDMFP8WithStrides (const std::int64_t block_size,
    bool normalize_by_lengths,
    bool is_weight_positional = false,
    bool use_offsets = true,
    std::int64_t output_stride = -1,
    std::int64_t input_stride = -1,
    int exponent_bits = 4,
    int exponent_bias = 7,
    bool is_bf16_out = false )
    +
    +
    Parameters
    + + + + + +
    output_strideIf -1, output_stride is same as block_size
    input_stridein Bytes. If -1, input_stride is same as block_size / num_elem_per_byte + 2 * sizeof(float16)
    exponent_bitsis the number of exponent bits in the FP8 encode (normally 4 or 5)
    exponent_biasis subtracted from the exponent to obtain the actual exponent for the floating-point number
    +
    +
    + +
    +
    + +

    ◆ GenerateEmbeddingSpMDMNBit()

    + +
    +
    +
    +template<typename IndexType , typename OffsetType = std::int32_t, typename OutType = float>
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    EmbeddingSpMDMKernelSignature< std::uint8_t, IndexType, OffsetType, OutType >::Type GenerateEmbeddingSpMDMNBit (int bit_rate,
    const std::int64_t block_size,
    bool has_weight,
    bool normalize_by_lengths,
    int prefetch = 16,
    bool is_weight_positional = false,
    bool use_offsets = true )
    +
    +
    Template Parameters
    + + + +
    IndexTypecan be int32_t or int64_t
    OffsetTypecan be int32_t or int64_t
    +
    +
    +
    Parameters
    + + +
    bit_ratecan be 2 or 4
    +
    +
    + +
    +
    + +

    ◆ GenerateEmbeddingSpMDMNBitRowWiseSparse()

    + +
    +
    +
    +template<typename IndexType , typename OffsetType = std::int32_t>
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    EmbeddingSpMDMRowWiseSparseKernelSignature< std::uint8_t, IndexType, OffsetType >::Type GenerateEmbeddingSpMDMNBitRowWiseSparse (int bit_rate,
    const std::int64_t block_size,
    bool has_weight,
    bool normalize_by_lengths,
    int prefetch = 16,
    bool is_weight_positional = false,
    bool use_offsets = true )
    +
    +
    Template Parameters
    + + + +
    IndexTypecan be int32_t or int64_t
    OffsetTypecan be int32_t or int64_t
    +
    +
    +
    Parameters
    + + +
    bit_ratecan be 2 or 4
    +
    +
    + +
    +
    + +

    ◆ GenerateEmbeddingSpMDMNBitWithStrides()

    + +
    +
    +
    +template<typename IndexType , typename OffsetType = std::int32_t, typename OutType = float, bool THREAD_LOCAL = false>
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    EmbeddingSpMDMKernelSignature< std::uint8_t, IndexType, OffsetType, OutType >::Type GenerateEmbeddingSpMDMNBitWithStrides (int bit_rate,
    const std::int64_t block_size,
    bool has_weight,
    bool normalize_by_lengths,
    int prefetch = 16,
    bool is_weight_positional = false,
    bool use_offsets = true,
    std::int64_t output_stride = -1,
    std::int64_t input_stride = -1,
    bool scale_bias_last = true,
    bool is_bf16_out = false )
    +
    +
    Parameters
    + + + + +
    output_strideIf -1, output_stride is same as block_size
    input_stridein Bytes. If -1, input_stride is same as block_size / num_elem_per_byte + 2 * sizeof(float16)
    scale_bias_lastif false, scale and bias appear at the beginning of each row and are in fp16 for table batched embedding (TBE) in FBGEMM_GPU. If false, it can also take -1 indices (output from pruned embedding id mapping)
    +
    +
    + +
    +
    + +

    ◆ GenerateEmbeddingSpMDMRowWiseSparse()

    + +
    +
    +
    +template<typename InType , typename IndexType , typename OffsetType = std::int32_t>
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    EmbeddingSpMDMRowWiseSparseKernelSignature< InType, IndexType, OffsetType >::Type GenerateEmbeddingSpMDMRowWiseSparse (const std::int64_t block_size,
    bool has_weight,
    bool normalize_by_lengths,
    int prefetch = 16,
    bool is_weight_positional = false,
    bool use_offsets = true )
    +
    +
    Template Parameters
    + + + + +
    InTypecan be float, float16, or uint8_t
    IndexTypecan be int32_t or int64_t
    OffsetTypecan be int32_t or int64_t
    +
    +
    + +
    +
    + +

    ◆ GenerateEmbeddingSpMDMWithStrides()

    + +
    +
    +
    +template<typename InType , typename IndexType , typename OffsetType = std::int32_t, typename OutType = float, bool THREAD_LOCAL = false>
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    EmbeddingSpMDMKernelSignature< InType, IndexType, OffsetType, OutType >::Type GenerateEmbeddingSpMDMWithStrides (const std::int64_t block_size,
    bool has_weight,
    bool normalize_by_lengths,
    int prefetch = 16,
    bool is_weight_positional = false,
    bool use_offsets = true,
    std::int64_t output_stride = -1,
    std::int64_t input_stride = -1,
    bool scale_bias_last = true,
    bool no_bag = false,
    bool is_bf16_out = false,
    bool is_bf16_in = false )
    +
    +
    Parameters
    + + + + +
    output_strideIf -1, output_stride is same as block_size
    input_strideIf -1, input_stride is same as block_size
    scale_bias_lastif false, scale and bias appear at the beginning of each row and are in fp16 for table batched embedding (TBE) in FBGEMM_GPU. If false, it can also take -1 indices (output from pruned embedding id mapping)
    +
    +
    + +
    +
    + +

    ◆ GenerateRowWiseSparseAdaGradFused()

    + +
    +
    +
    +template<typename IndexType , typename OffsetType = std::int32_t, typename DataType = float>
    + + + + + + + + + + + + + + + + + + + + + + + + + + +
    RowWiseSparseAdaGradFusedSignature< IndexType, OffsetType, DataType >::Type GenerateRowWiseSparseAdaGradFused (int block_size,
    int prefetch = 16,
    bool use_offsets = true,
    bool use_stochastic_rounding = true,
    int grad_stride = -1 )
    +
    +
    Parameters
    + + +
    grad_strideIf -1, grad_stride is same as block size
    +
    +
    + +
    +
    + +

    ◆ genU8I8S32FMA()

    + +
    +
    +
    +template<inst_set_t INST_SET, typename std::enable_if< INST_SET==inst_set_t::avx2||INST_SET==inst_set_t::avx512, int >::type = 0>
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    void genU8I8S32FMA (x86::Emitter * a,
    typename simd_info< INST_SET >::vec_reg_t aReg,
    typename simd_info< INST_SET >::vec_reg_t bReg,
    typename simd_info< INST_SET >::vec_reg_t cReg,
    typename simd_info< INST_SET >::vec_reg_t oneReg16Bit,
    typename simd_info< INST_SET >::vec_reg_t tmpReg )
    +
    + +

    Generates instruction sequence to compute s32 += U8 * I8.

    +
    Template Parameters
    + + +
    TRegister type of destination, e.g., x86::Ymm or x86::Zmm
    +
    +
    +
    Parameters
    + + +
    cRegcontains result
    +
    +
    + +
    +
    + +

    ◆ genU8Sum4()

    + +
    +
    +
    +template<inst_set_t INST_SET, typename std::enable_if< INST_SET==inst_set_t::avx2||INST_SET==inst_set_t::avx512, int >::type = 0>
    + + + + + + + + + + + + + + + + + + + + + + + + + + +
    void genU8Sum4 (x86::Emitter * a,
    typename simd_info< INST_SET >::vec_reg_t src,
    typename simd_info< INST_SET >::vec_reg_t dest,
    typename simd_info< INST_SET >::vec_reg_t oneReg16Bit,
    typename simd_info< INST_SET >::vec_reg_t tmpReg )
    +
    + +

    Add 4 consecutive numbers of type uint8 and emit their sum as 32-bit numbers. i.e., dest[0:31] contains src[0:7] + src[8:15] + src[16:23] + src[24:31].

    +
    Template Parameters
    + + +
    TRegister type of destination, e.g., x86::Ymm or x86::Zmm
    +
    +
    +
    Parameters
    + + +
    destcontains result
    +
    +
    + +
    +
    + +

    ◆ genU8Sum8()

    + +
    +
    +
    +template<typename T >
    + + + + + + + + + + + + + + + + + + + + + +
    void genU8Sum8 (x86::Emitter * a,
    T src,
    T dest,
    T tmpReg )
    +
    + +

    Add 8 consecutive numbers of type uint8 and emit their sum as 16-bit numbers. i.e., dest[0:15] contains src[0:7] + src[8:15] + src[16:23] + src[24:31] src[32:39] + src[40:47] + src[48:55] + src[56:63].

    +

    and

    +

    dest[64:79] contains src[64:71] + src[71:79] + src[80:87] + src[88:95] src[96:103] + src[104:111] + src[112:119] + src[120:127]

    +

    so on

    +
    Template Parameters
    + + +
    TRegister type of destination, e.g., x86::Ymm or x86::Zmm
    +
    +
    +
    Parameters
    + + +
    destcontains result
    +
    +
    + +
    +
    + +

    ◆ initCRegs()

    + +
    +
    + + + + + + + + + + + + + + + + +
    void initCRegs (x86::Emitter * a,
    int rowRegs,
    int colRegs )
    +
    + +

    Generate instructions for initializing the C registers to 0.

    +

    Generate instructions for initializing the C registers to 0 in 32-bit Accumulation kernel.

    + +
    +
    + +

    ◆ is_autovec_disabled()

    + +
    +
    + + + + + + + +
    bool is_autovec_disabled ()
    +
    +

    Choosing which kernel (autovec/asmjit/ref) to use for nbit-CPU-TBE Available kernels:

      +
    • ref: non-optimized, reference implementation that focuses on correctness, not performance
    • +
    • asmjit: hand-optimized kernel by having asmjit emit SIMD instructions during runtime. Only supports x86_64 CPUs with AVX2/AVX512 instruction sets
    • +
    • autovec: the kernel written in regular C++ code but in a way that makes compilers easier to generate vectorized SIMD instructions out of it. Supports both x86_64 and aarch64 CPUs. Currently only available on Linux. How to set environment variables:
    • +
    • No environment variables: on x86_64 we will default to asmjit kernel, and on aarch64 and linux we will default to autovec. On non-linux aarch64 we will fall back to ref.
    • +
    • Set FBGEMM_NO_AUTOVEC: on aarch64 linux we will use ref. On other platforms this will have no effect.
    • +
    • Set FBGEMM_NO_ASMJIT: on x86_64 we will use ref. On other platforms this will have no effect.
    • +
    • Set FBGEMM_NO_ASMJIT AND FBGEMM_FORCE_AUTOVEC: on x86_64 we will use autovec if these two variables are set at the same time. No effect on other platforms.
    • +
    • FBGEMM_FORCE_AUTOVEC will override FBGEMM_NO_AUTOVEC if they are set at the same time.
    • +
    • These variables are considered set as long as they exist regardless of content. That means assigning values like "1", "true", "y", "0", "false" or "no" has the same effect. The easiest way of setting a variable is to prepend <VARIABLE>=1 before the benchmarking command.
    • +
    + +
    +
    + +

    ◆ PackA()

    + +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + +
    void PackA (int nrow,
    int ncol,
    const float * from,
    int ldim,
    float * to )
    +
    + +

    Todo: make it fast with AVX2 transpose.

    +

    class that performs packing of matrix in row-major or col-major format into internal packed blocked-row major format

    + +
    +
    + +

    ◆ printMatrix()

    + +
    +
    +
    +template<typename T >
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    void printMatrix (matrix_op_t op,
    const T * inp,
    size_t R,
    size_t C,
    size_t ld,
    std::string name )
    +
    + +

    Debugging helper.

    +

    Print the matrix.

    +
    Parameters
    + + + + + + +
    opTranspose type of the matrix.
    RThe height of the matrix.
    CThe width of the matrix.
    ldThe leading dimension of the matrix.
    nameThe prefix string before printing the matrix.
    +
    +
    + +
    +
    + +

    ◆ Quantize()

    + +
    +
    +
    +template<typename T , bool LEGACY = true>
    + + + + + + + + + + + + + + + + + + + + + + + + + + +
    T Quantize (float src,
    std::int32_t zero_point,
    float scale,
    int result_precision,
    bool result_is_signed = std::is_signed<T>::value )
    +
    +

    Quantize src using zero_point and scale, clamp to the specified precision, and convert it to type T

    + +
    +
    + +

    ◆ requantize_u8acc32_ref() [1/2]

    + +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    void requantize_u8acc32_ref (int M,
    int N,
    int ld,
    const std::int32_t * inp,
    std::uint8_t * out,
    const float * C_multiplier,
    std::int32_t C_zero_point,
    std::int32_t A_zero_point,
    const std::int32_t * B_zero_point,
    const std::int32_t * row_offsets,
    const std::int32_t * col_offsets,
    const std::int32_t * bias,
    int ncols_per_quant_group,
    bool fuse_relu = false )
    +
    + +

    Reference implementation of requantization step. float multiplier.

    +
    Parameters
    + + + +
    biascan be nullptr
    ncols_per_quant_groupthe number of columns share the same quantization parameter. ncols_per_quant_group == N : per-tensor quantization ncols_per_quant_group == N / groups : per-group quantization ncols_per_quant_group == 1 : per-channel quantization
    +
    +
    + +
    +
    + +

    ◆ requantize_u8acc32_ref() [2/2]

    + +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    void requantize_u8acc32_ref (int M,
    int N,
    int ld,
    const std::int32_t * inp,
    std::uint8_t * out,
    std::int32_t C_multiplier,
    std::int32_t C_right_shift,
    std::int32_t C_zero_point,
    std::int32_t A_zero_point,
    std::int32_t B_zero_point,
    const std::int32_t * row_offsets,
    const std::int32_t * col_offsets,
    const std::int32_t * bias,
    bool fuse_relu = false )
    +
    + +

    Reference implementation of requantization step. int32 multiplier.

    +
    Parameters
    + + +
    biascan be nullptr
    +
    +
    + +
    +
    + +

    ◆ rowOffsetBufferSizeGConv()

    + +
    +
    +
    +template<int SPATIAL_DIM = 2>
    + + + + + + + +
    int rowOffsetBufferSizeGConv (const conv_param_t< SPATIAL_DIM > & conv_param)
    +
    +
    Returns
    Size of row offset buffer in number of elements needed for fbgemmGroupwiseConv
    + +
    +
    + +

    ◆ rowwise_sparse_adagrad_ref()

    + +
    +
    +
    +template<typename IndexType >
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    int rowwise_sparse_adagrad_ref (int num_rows,
    int block_size,
    std::uint64_t param_size,
    float * w,
    const float * g,
    float * h,
    const IndexType * indices,
    float epsilon,
    float lr,
    float weight_decay = 0.f,
    const double * counter = nullptr,
    const int64_t counter_halflife = 0 )
    +
    +
    Parameters
    + + + + + + + + + + +
    num_rowsnumber of rows reading
    block_sizenumber of parameters per rows
    param_sizetotal number of parameters
    winput parameters
    ginput gradients
    hinput momentum
    indicesindices of each row
    counterused for weight_decay adjusted for frequency. nullptr when frequency adjustment is not used. Ignored when weight_decay == 0
    counter_halflifeweight_decay is adjusted only after this number of iterations
    +
    +
    + +
    +
    + +

    ◆ sparse_adagrad_ref()

    + +
    +
    +
    +template<typename IndexType >
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    int sparse_adagrad_ref (int num_rows,
    int block_size,
    std::uint64_t param_size,
    float * w,
    const float * g,
    float * h,
    const IndexType * indices,
    float epsilon,
    float lr,
    float weight_decay = 0.f,
    const double * counter = nullptr,
    const int64_t counter_halflife = 0 )
    +
    +
    Parameters
    + + + + + + + + + + +
    num_rowsnumber of rows reading
    block_sizenumber of parameters per rows
    param_sizetotal number of parameters
    winput parameters
    ginput gradients
    hinput momentum
    indicesindices of each row
    counterused for weight_decay adjusted for frequency. nullptr when frequency adjustment is not used. Ignored when weight_decay == 0
    counter_halflifeweight_decay is adjusted only after this number of iterations
    +
    +
    + +
    +
    + +

    ◆ SparseDenseMM()

    + +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    void SparseDenseMM (int M,
    int N,
    const int * row_ptr,
    const int * col_idx,
    const float * values,
    const float * B,
    int ldb,
    float * C,
    int ldc,
    bool accum = false )
    +
    +
    Parameters
    + + +
    accumControls accumulation. 1 means we're accumulating to the C Matrix.
    +
    +
    +

    Note on matrix order and layout: Unlike other fbgemm functions that follow PyTorch convention where A matrix is activation (so in uint8_t for quantized FC/Conv or fp32) and B matrix is weight (so in int8_t for quantized FC/Conv or fp32), here A is weight matrix. This is because we mostly target sparsity in weights and for row-major layout it's more efficient to have A as a sparse matrix: for each non-zero of A at ith row and kth column, we can access kth row of B, whose elements are contiguous in memory. If B matrix was sparse, for each non-zero of B at kth row and jth column, we would've needed to access kth column of A, whose elements are not contiguous in memory with C/C++'s row-major layout. Alternatively, we can call this function as if we're computing C^T = B^T * A^T while maintaining PyTorch's convention that the lefthand side matrix B is activation. If B matrix is in column-major layout, we don't need to do an extra transposition. The C matrix will be output in column-major layout, so if we have a back-to-back Sparse-Dense matrix-matrix multiplications, B matrices of subsequent matrices will be already in column-major layout. Refer to SparseDenseMMFP32Benchmark.cc for an example.

    + +
    +
    + +

    ◆ spmdm_ref()

    + +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    void spmdm_ref (int M,
    const std::uint8_t * A,
    int lda,
    CompressedSparseColumn & B,
    bool accumulation,
    std::int32_t * C,
    int ldc,
    int groups = 1 )
    +
    + +

    Reference implementation of SPMDM (sparse matrix times dense matrix).

    +
    Parameters
    + + +
    groupswhen > 1, for gth group, we multiply A[:,g*(A.ncols/groups):(g+1)*(A.ncols/groups)] sub-matrix with B[:,g*(B.ncols/groups):(g+1)*(B.ncols/groups)] sub-matrix .
    +
    +
    + +
    +
    + +

    ◆ transpose_ref()

    + +
    +
    +
    +template<typename T >
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    void transpose_ref (int64_t M,
    int64_t N,
    const T * src,
    int64_t ld_src,
    T * dst,
    int64_t ld_dst )
    +
    + +

    Reference implementation of matrix transposition: B = A^T.

    +
    Parameters
    + + + + + + + +
    MThe height of the matrix.
    NThe width of the matrix.
    srcThe memory buffer of the source matrix A.
    ld_srcThe leading dimension of the source matrix A.
    dstThe memory buffer of the destination matrix B.
    ld_dstThe leading dimension of the destination matrix B.
    +
    +
    + +
    +
    + +

    ◆ transpose_simd()

    + +
    +
    +
    +template<typename T >
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    void transpose_simd (int64_t M,
    int64_t N,
    const T * src,
    int64_t ld_src,
    T * dst,
    int64_t ld_dst )
    +
    + +

    Transpose a matrix.

    +
    Parameters
    + + + +
    Mthe number of rows of input matrix
    Nthe number of columns of input matrix
    +
    +
    + +
    +
    +
    + + + + diff --git a/namespacemembers.html b/namespacemembers.html new file mode 100644 index 000000000..baa0f4c8a --- /dev/null +++ b/namespacemembers.html @@ -0,0 +1,244 @@ + + + + + + + +fbgemm_gpu: Namespace Members + + + + + + + + + + + +
    +
    + + + + + + +
    +
    fbgemm_gpu +
    +
    +
    + + + + + + + +
    + +
    +
    + + +
    +
    +
    +
    +
    +
    Loading...
    +
    Searching...
    +
    No Matches
    +
    +
    +
    +
    + +
    +
    Here is a list of all documented namespace members with links to the namespaces they belong to:
    + +

    - b -

      +
    • Bfloat16ToFloat_avx2() : fbgemm
    • +
    • Bfloat16ToFloat_avx512() : fbgemm
    • +
    • Bfloat16ToFloat_ref() : fbgemm
    • +
    • Bfloat16ToFloat_simd() : fbgemm
    • +
    • broadcast8Bit() : fbgemm
    • +
    + + +

    - c -

      +
    • cblas_sgemm_ref() : fbgemm
    • +
    • col_offsets_with_zero_pt_s8acc32_ref() : fbgemm
    • +
    • compare_buffers() : fbgemm
    • +
    • ConvFastPath() : fbgemm
    • +
    + + +

    - d -

      +
    • depthwise_2d_same_pad() : fbgemm
    • +
    • depthwise_3d_same_pad() : fbgemm
    • +
    + + +

    - e -

      +
    • emitExtractHalfVector() : fbgemm
    • +
    • emitLoadDWord() : fbgemm
    • +
    + + +

    - f -

      +
    • fbgemmAlignedAlloc() : fbgemm
    • +
    • fbgemmAlignedFree() : fbgemm
    • +
    • fbgemmConv() : fbgemm
    • +
    • fbgemmEnableAvx512Ymm() : fbgemm
    • +
    • fbgemmForceIsa() : fbgemm
    • +
    • fbgemmGet2DPartition() : fbgemm
    • +
    • fbgemmGetThreadPartition() : fbgemm
    • +
    • fbgemmGroupwiseConv() : fbgemm
    • +
    • fbgemmHasArmNeonSupport() : fbgemm
    • +
    • fbgemmHasArmSve2Support() : fbgemm
    • +
    • fbgemmHasAvx2Support() : fbgemm
    • +
    • fbgemmHasAvx512Support() : fbgemm
    • +
    • fbgemmHasAvx512VnniSupport() : fbgemm
    • +
    • fbgemmInstructionSet() : fbgemm
    • +
    • fbgemmIsIntelXeonD() : fbgemm
    • +
    • fbgemmOptimizedGConv() : fbgemm
    • +
    • fbgemmPacked() : fbgemm
    • +
    • fbgemmPartition1D() : fbgemm
    • +
    • fbgemmPartition1DBlocked() : fbgemm
    • +
    • fbgemmSupportedCPU() : fbgemm
    • +
    • FindMinMax() : fbgemm
    • +
    • Float16ToFloat_avx2() : fbgemm
    • +
    • Float16ToFloat_avx512() : fbgemm
    • +
    • Float16ToFloat_ref() : fbgemm
    • +
    • Float16ToFloat_simd() : fbgemm
    • +
    • Float8ToFloat_ref() : fbgemm
    • +
    • FloatOrHalfToFused8BitRowwiseQuantizedSBFloat() : fbgemm
    • +
    • FloatOrHalfToFused8BitRowwiseQuantizedSBFloatRef() : fbgemm
    • +
    • FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf() : fbgemm
    • +
    • FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfRef() : fbgemm
    • +
    • FloatToBfloat16_avx2() : fbgemm
    • +
    • FloatToBfloat16_avx512() : fbgemm
    • +
    • FloatToBfloat16_ref() : fbgemm
    • +
    • FloatToBfloat16_simd() : fbgemm
    • +
    • FloatToFloat16_avx2() : fbgemm
    • +
    • FloatToFloat16_avx512() : fbgemm
    • +
    • FloatToFloat16_ref() : fbgemm
    • +
    • FloatToFloat16_simd() : fbgemm
    • +
    • FloatToFloat8_ref() : fbgemm
    • +
    • Fused8BitRowwiseQuantizedSBFloatToFloatOrHalf() : fbgemm
    • +
    • Fused8BitRowwiseQuantizedSBFloatToFloatOrHalfRef() : fbgemm
    • +
    • FusedNBitRowwiseQuantizedSBHalfToFloatOrHalf() : fbgemm
    • +
    • FusedNBitRowwiseQuantizedSBHalfToFloatOrHalfRef() : fbgemm
    • +
    • FusedQuantizeDequantize() : fbgemm
    • +
    + + +

    - g -

      +
    • gen16BitVectorOne() : fbgemm
    • +
    • gen8BitVectorOne() : fbgemm
    • +
    • GenerateEmbeddingSpMDM() : fbgemm
    • +
    • GenerateEmbeddingSpMDMFP8WithStrides() : fbgemm
    • +
    • GenerateEmbeddingSpMDMNBit() : fbgemm
    • +
    • GenerateEmbeddingSpMDMNBitRowWiseSparse() : fbgemm
    • +
    • GenerateEmbeddingSpMDMNBitWithStrides() : fbgemm
    • +
    • GenerateEmbeddingSpMDMRowWiseSparse() : fbgemm
    • +
    • GenerateEmbeddingSpMDMWithStrides() : fbgemm
    • +
    • GenerateRowWiseSparseAdaGradFused() : fbgemm
    • +
    • genU8I8S32FMA() : fbgemm
    • +
    • genU8Sum4() : fbgemm
    • +
    • genU8Sum8() : fbgemm
    • +
    + + +

    - i -

    + + +

    - m -

      +
    • matmul_u8i8acc16_ref() : fbgemm
    • +
    • matmul_u8i8acc32_ref() : fbgemm
    • +
    • matrix_op_t : fbgemm
    • +
    + + +

    - o -

      +
    • optimized_conv_t : fbgemm
    • +
    + + +

    - p -

    + + +

    - q -

    + + +

    - r -

      +
    • radix_sort_parallel() : fbgemm
    • +
    • reduceAvx2() : fbgemm
    • +
    • requantize_u8acc32_ref() : fbgemm
    • +
    • requantizeOutputProcessingAvx2() : fbgemm
    • +
    • requantizeOutputProcessingGConvAvx512() : fbgemm
    • +
    • RoundToFloat16() : fbgemm
    • +
    • row_offsets_u8acc32_ref() : fbgemm
    • +
    • rowOffsetBufferSizeGConv() : fbgemm
    • +
    • rowwise_sparse_adagrad_ref() : fbgemm
    • +
    + + +

    - s -

    + + +

    - t -

      +
    • takeDepthWiseFastPath() : fbgemm
    • +
    • takePointWiseFastPath() : fbgemm
    • +
    • transpose_8rows() : fbgemm
    • +
    • transpose_ref() : fbgemm
    • +
    • transpose_simd() : fbgemm
    • +
    + + +

    - x -

    +
    + + + + diff --git a/namespacemembers_enum.html b/namespacemembers_enum.html new file mode 100644 index 000000000..062e44351 --- /dev/null +++ b/namespacemembers_enum.html @@ -0,0 +1,86 @@ + + + + + + + +fbgemm_gpu: Namespace Members + + + + + + + + + + + +
    +
    + + + + + + +
    +
    fbgemm_gpu +
    +
    +
    + + + + + + + +
    + +
    +
    + + +
    +
    +
    +
    +
    +
    Loading...
    +
    Searching...
    +
    No Matches
    +
    +
    +
    +
    + +
    +
    Here is a list of all documented namespace enums with links to the namespaces they belong to:
    +
    + + + + diff --git a/namespacemembers_func.html b/namespacemembers_func.html new file mode 100644 index 000000000..0bd201156 --- /dev/null +++ b/namespacemembers_func.html @@ -0,0 +1,236 @@ + + + + + + + +fbgemm_gpu: Namespace Members + + + + + + + + + + + +
    +
    + + + + + + +
    +
    fbgemm_gpu +
    +
    +
    + + + + + + + +
    + +
    +
    + + +
    +
    +
    +
    +
    +
    Loading...
    +
    Searching...
    +
    No Matches
    +
    +
    +
    +
    + +
    +
    Here is a list of all documented namespace functions with links to the namespaces they belong to:
    + +

    - b -

      +
    • Bfloat16ToFloat_avx2() : fbgemm
    • +
    • Bfloat16ToFloat_avx512() : fbgemm
    • +
    • Bfloat16ToFloat_ref() : fbgemm
    • +
    • Bfloat16ToFloat_simd() : fbgemm
    • +
    • broadcast8Bit() : fbgemm
    • +
    + + +

    - c -

      +
    • cblas_sgemm_ref() : fbgemm
    • +
    • col_offsets_with_zero_pt_s8acc32_ref() : fbgemm
    • +
    • compare_buffers() : fbgemm
    • +
    • ConvFastPath() : fbgemm
    • +
    + + +

    - d -

      +
    • depthwise_2d_same_pad() : fbgemm
    • +
    • depthwise_3d_same_pad() : fbgemm
    • +
    + + +

    - e -

      +
    • emitExtractHalfVector() : fbgemm
    • +
    • emitLoadDWord() : fbgemm
    • +
    + + +

    - f -

      +
    • fbgemmAlignedAlloc() : fbgemm
    • +
    • fbgemmAlignedFree() : fbgemm
    • +
    • fbgemmConv() : fbgemm
    • +
    • fbgemmEnableAvx512Ymm() : fbgemm
    • +
    • fbgemmForceIsa() : fbgemm
    • +
    • fbgemmGet2DPartition() : fbgemm
    • +
    • fbgemmGetThreadPartition() : fbgemm
    • +
    • fbgemmGroupwiseConv() : fbgemm
    • +
    • fbgemmHasArmNeonSupport() : fbgemm
    • +
    • fbgemmHasArmSve2Support() : fbgemm
    • +
    • fbgemmHasAvx2Support() : fbgemm
    • +
    • fbgemmHasAvx512Support() : fbgemm
    • +
    • fbgemmHasAvx512VnniSupport() : fbgemm
    • +
    • fbgemmInstructionSet() : fbgemm
    • +
    • fbgemmIsIntelXeonD() : fbgemm
    • +
    • fbgemmOptimizedGConv() : fbgemm
    • +
    • fbgemmPacked() : fbgemm
    • +
    • fbgemmPartition1D() : fbgemm
    • +
    • fbgemmPartition1DBlocked() : fbgemm
    • +
    • fbgemmSupportedCPU() : fbgemm
    • +
    • FindMinMax() : fbgemm
    • +
    • Float16ToFloat_avx2() : fbgemm
    • +
    • Float16ToFloat_avx512() : fbgemm
    • +
    • Float16ToFloat_ref() : fbgemm
    • +
    • Float16ToFloat_simd() : fbgemm
    • +
    • Float8ToFloat_ref() : fbgemm
    • +
    • FloatOrHalfToFused8BitRowwiseQuantizedSBFloat() : fbgemm
    • +
    • FloatOrHalfToFused8BitRowwiseQuantizedSBFloatRef() : fbgemm
    • +
    • FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf() : fbgemm
    • +
    • FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfRef() : fbgemm
    • +
    • FloatToBfloat16_avx2() : fbgemm
    • +
    • FloatToBfloat16_avx512() : fbgemm
    • +
    • FloatToBfloat16_ref() : fbgemm
    • +
    • FloatToBfloat16_simd() : fbgemm
    • +
    • FloatToFloat16_avx2() : fbgemm
    • +
    • FloatToFloat16_avx512() : fbgemm
    • +
    • FloatToFloat16_ref() : fbgemm
    • +
    • FloatToFloat16_simd() : fbgemm
    • +
    • FloatToFloat8_ref() : fbgemm
    • +
    • Fused8BitRowwiseQuantizedSBFloatToFloatOrHalf() : fbgemm
    • +
    • Fused8BitRowwiseQuantizedSBFloatToFloatOrHalfRef() : fbgemm
    • +
    • FusedNBitRowwiseQuantizedSBHalfToFloatOrHalf() : fbgemm
    • +
    • FusedNBitRowwiseQuantizedSBHalfToFloatOrHalfRef() : fbgemm
    • +
    • FusedQuantizeDequantize() : fbgemm
    • +
    + + +

    - g -

      +
    • gen16BitVectorOne() : fbgemm
    • +
    • gen8BitVectorOne() : fbgemm
    • +
    • GenerateEmbeddingSpMDM() : fbgemm
    • +
    • GenerateEmbeddingSpMDMFP8WithStrides() : fbgemm
    • +
    • GenerateEmbeddingSpMDMNBit() : fbgemm
    • +
    • GenerateEmbeddingSpMDMNBitRowWiseSparse() : fbgemm
    • +
    • GenerateEmbeddingSpMDMNBitWithStrides() : fbgemm
    • +
    • GenerateEmbeddingSpMDMRowWiseSparse() : fbgemm
    • +
    • GenerateEmbeddingSpMDMWithStrides() : fbgemm
    • +
    • GenerateRowWiseSparseAdaGradFused() : fbgemm
    • +
    • genU8I8S32FMA() : fbgemm
    • +
    • genU8Sum4() : fbgemm
    • +
    • genU8Sum8() : fbgemm
    • +
    + + +

    - i -

      +
    • initCRegs() : fbgemm
    • +
    • is_autovec_disabled() : fbgemm
    • +
    • is_radix_sort_accelerated_with_openmp() : fbgemm
    • +
    • isYmm() : fbgemm
    • +
    • isZmm() : fbgemm
    • +
    + + +

    - m -

      +
    • matmul_u8i8acc16_ref() : fbgemm
    • +
    • matmul_u8i8acc32_ref() : fbgemm
    • +
    + + +

    - p -

    + + +

    - q -

    + + +

    - r -

      +
    • radix_sort_parallel() : fbgemm
    • +
    • reduceAvx2() : fbgemm
    • +
    • requantize_u8acc32_ref() : fbgemm
    • +
    • requantizeOutputProcessingAvx2() : fbgemm
    • +
    • requantizeOutputProcessingGConvAvx512() : fbgemm
    • +
    • RoundToFloat16() : fbgemm
    • +
    • row_offsets_u8acc32_ref() : fbgemm
    • +
    • rowOffsetBufferSizeGConv() : fbgemm
    • +
    • rowwise_sparse_adagrad_ref() : fbgemm
    • +
    + + +

    - s -

    + + +

    - t -

      +
    • takeDepthWiseFastPath() : fbgemm
    • +
    • takePointWiseFastPath() : fbgemm
    • +
    • transpose_8rows() : fbgemm
    • +
    • transpose_ref() : fbgemm
    • +
    • transpose_simd() : fbgemm
    • +
    + + +

    - x -

    +
    + + + + diff --git a/objects.inv b/objects.inv index ee023fc7a9633af3216a18045177a9d1a2bcb235..567cb56ec2a918327a2dc9de541b93cfca30aa8a 100644 GIT binary patch delta 12459 zcmV;cFjUX)ZM1KYd4F4X<2JJHdwzx1&UwhJS?ch9(Ob9M>3F+4@whwLv!A#?0F*f` zk%~mQ+w-)4$T@#*|B_SSMWQ59B1M4K1<6Wm^HTNI1%)aA1b%sN{;@<+68@W#FYzyv z7$x4E1_L~sje=!_lQ513>LBpPbUO9Mv*i%(4ACt7`LFynFMpmb-aIAK$&cR6!^5+o z>AzIde7L!{mtmARUUF|O*qf`L4>!SmcpnaiKh(TC8c{z?78|f0&>T%q1Kamm9MLH8 z{1BZg#xR;KlM}@j;dpYYdW+`qw^PLyFOyR+Dro3ixPYs%7o_jv*|KW@siOIKZ+)TB zBA)yAHo&qBFMp{AFd7B%{2Q7RF6!X`hhGEa04DPsk0dWM+Bc>!BA>$O6Z&55BP)KX zOa76BlMQaFvL^6wA3VUnmBdG*361uM#*NLi7baWU62&GIr!wXhzt4X~lK?`@azV*& z@51CUbDLH4`TUMOaFk=*DYu5vhRbvKaPQu+R`~^5L4O|D!iV2JCq>k|ZHap{T9Hdd zNa%DnK?!Z8#*>f(k}Yoz$dC7ilMv%j%sm?!Jjjri(`a!KvK+{$&Owlanb#`{0klf_ zT=#0OnkFQN!ERQ#$`>IyyeIXD_|Hbn0?>BRVNC|2!pIb<8?=!&zW7p^KA3)U6ZY4d>r<9z%r(&%K_O z{u$3z%{i)1u(Efbfr?i(gE8(li)GXieZ2oolUoBjX}tIMu;$r*Is)OVLqS9wjgxeQ#y{=;17?4 z(^r#*1TBAeN?4QZs3}8f%48E=O`p*M^l0?ni-TZ6yLY~?DFheKcufI#PDk`4#}k?a zUB`*ty_B0Z#o}pRqsBZiEuh|-0O@)5-fT{BxBxs|xDr%Ii?8fYha>~_IXO~=~Hn#l|zZo!Y zcsPGgq4UDUYYy%xJn760Ki}CQ*LX)8N5Y3{gqIX8D;yu67!u<1!?Y-g^BUyZw$ z^cG0J3rwEVk3E9sRSNeAnP+nQD#+PD6Q}9c9>MdQBjyRZwhZB4T0kMxmfJ>t-}YsG zMX6ZWQLh?qw1%}U6|MssuO&in-qHj=noNJ#^N`zngg%FuHgJn6osX$Ei{l9;9-aC$ zop9JpHS8V2zkN=uXNr?}epikhx3}qE@6%^)s}SWH%bstX1oaLwLBKGn(&DS3^ihJJ z*kmuiqd6Ww6sls0SFVJZ|@w!{|mM)OKOQE2DizlO=yW z53tWCfs(CDUU)zNfAzwMfY-ubOM)6W=Blmij0BmrWLImSwtg1jaJ@F_Hyx{r6VDib zG`U+fu=+dYm|6>9G!8`*@mQLnyZsZ;EC!;5nHuJ&d5mKq)s{vi+Jo z{{{ZnFqvJMHnje-H*P%7k8)OV&Ns>l!E%FvS;mE{nF`QL+dREFr?l)>7A_- zL6dMCO=*-!o%f=17STN!(tCeYcshWT-CyPJQun>+e)&9=Rch~Am!0S8egU1lQ{mmobjKz(9H%M`;e-iG1L2B9EBy}jADc3x$y2UYG z;^Y(cOR&RlG;gn+H-2(L`18L`5qtJUUGWu9%>fr( zKCZPJaVRg$Z}bAMZ&H7cg|~U6O*b8BdJWALTfd$!ze9t%q2P4NcPk8>eN}V!Qm%9Y zfxg)W&c2OzyJ=(}<;D>BX{0-xqN#U|YfU)pe)X8|*w^T7enGAetX#ji?+3D|Ju7>H z&v{!)5vVydHO<*a_)w$g+?HB`iG;=E=ui+Wf77t{4W65Hafg4#v)|x^i)#lGwQp=~ zL2}${wMWc6d%j)(3+Vp%7ZPQy{B-I#ADzHU3|no zBJ`lY5B+&MNay)4UO-W@oHs3ds)rR!?HG3L#~PZyL5f$%k-A_m1=rYzglHbpj$>XI zt00`rJ@^6M>7johp>wIGS~X>p{@E5AQcaa=ik4F^pBIn~u3km6co8Pyv(!)obV)dica!M&Vye+IG{c z=4)9ry@pHWnw5G`qY;j$vvhtFl6e6iitq!PFa8p??2CUNQnruM3Bmjec_>LDFXi#I z@dj+)XVApb6)TgSid(57+Pt}J?JY|x2Un_Cj7IHOTp4U zc*gL)K6~SNyqqmAK%gv?d_8wj{`%^rlW%7(#)AHpSEO-tJ|+>gR7rE1E&=LcVNwe) zd$Y_gL&JZ%evo#HB$RBVp$9387^6H9f%uIko`697#{Eu27)~Q}uPhCRv9wp0;2WK1 zvp8oh2iM`LSC(Zto1KA8;zd&Wd}c9;6Rx8hs$0RK08ma-cApxJZur;So$x$C_xgH} z;5U!WjOS&+dRtf=6FR0*@$J*fbRD*4H=MTIa{qs7@MBxFLqDtEuG^jV(EWzj3o=}9 z9AU&Gw#<}9J$<7R`t0%A23nf+WX#G=rabESO=;Y-Gp=m8mt@>*`bEa%OSP;t=>?gz zyxjvv?Z)G!dC$naG8|u=aoLKB^xF*8^4?3sUZ7!hO&azh4eLm|UYcFuE!kQx(W-{D z>IHvVRYu^8vZ|cexjd5|@nO5P>*?Ed7RM9z0GAW8rCm?Pu16iNA`N@`hRqHXk?GO} znUrlSEk}ee&a6is%^^*DKBk>b5T>m@$(PnWAM3U_-9?)9)Xm!PBdA(Kn%XfOr7?vW zljjOzY21@DF3*+3(!3{UUj8eJrIAnF$g+PWU!svYuP~MtK2Zxx(_WxyMXWfMWhTA3`Zd#ctoq;)UYx-v3fq;=&)(&d_# z=gMVi-;=X1*A>jtz9(m2zAKrffltoByjCl>|kw-sV_y`xw0T!t;ZOnOY@N&9XPNmC9mlN!AVMOTn{J?`Lnn{n79w*G>cz*OE{Y?5~D(%B~<#F^tB#*O=fG-+xqY} z8PlYfiS;tRU*Sqmlb+Vk!9>#2q^I?AQj&Bu>1aJ1-6Y*ix>*lrNl7=8Zq~!$R?^L+ zn~6ScOZu4fv0je*s(p>1oo_M4#9ueN6h;ejnTVMoNErob)(h z9>;TQ59w#p&-yw=QTm$nwH}Xkltu|R-qw$|ao%D^dYtsQ zrXI&2A{9nz@@H-A%e%f9@tbQ|WGfxSL!JEPbsv zU*ozpjdVEaa0eWY?-n)E;iSVIa5&Ce)ku$%9(TawxNTV@eNFmWBVT{xzIBcCJn4A{ zJdf)ZHqzguza8*5?pxVNpOZe<(C2t=X(QcEx?MxJPJyV>-^L0m65#TGyj7Hx*o;K|bG)J@dg!$!szfZ_YJp}IW>#2o za)bV$b8T_`!RCKIAk!mwo&lpT)LszJzoB{O8eCWQ(?0FY_tv(3Jq6#Sd; z>&e2gMT-++5)_-G%;MQ{f)eWaF9~g0VP&&#*|x_%N`MYo&&6#e2K!k3rlDfu?6&;- zYI>~ZKCjJ_@)6`KP=3JeYIxX0OYv{qR z7g}sRdp1hwtz;9BHfA4^@BAaNp*H5*q!(!; zM}Lv%RfSy62I*r%8UzcHUS<37uc14zA7m*+%l> zvj5YRItiz#si(6m5_%bhe=X@1T6}!DI@)g6xrX0L|3HLUdAu4N~67e`*(ju`t0pXv1`()@HJJ}rOvs?FFLPDkv*>d zx>ChTKjo@Uk>B54kwW|W%=L;?Ds<5Ox-{9_N5@yBRPn>^SEWwb5Ajr^9I|Vt&Q&@T zOqLKI`HvVYRV;X`VdpdZ3Z7!eb5BUj-o8FRIhnN@S4nnjBsY72zX4B5 zJTIc(E=j~_IrZ4LilS%U`jN0ancxwM=jmZ`0`_T9nzG%u<3&RfJvFn}lu7I)5O>Mq zugjT$@>i|oai|yCU$-BBvrguoR+E36i!#FK8BIcx)st7Ohn^O2oQ*mvdb^5a+pkqA zqJ|;PMH79cU1y^Z@e@TSrwOPeyr+1Pu;Itq>LkYoDaDjdZP60f^qbi5()Fd0`&-jshPs7tfc zYiM>u#Hry41IMN5aX<&%Xb|#idz<8%;P5MfKxMEf2Ngap&kr5H(7+KBzu0g*sAm*~ zlL_J%(>1G}YzsfHCcr652!_ZdR&>W$92Ood7wlD^CEiexc{rZ>P`-M2wrpVMjdwQ2 zIL!tOHW6;*gh$`1KDVp^n9+YIO`2ORb-O6>!w6571S$dqfx6`s)P*S`->5KLBv+|` z-1yh4fEk^83pAZgTCbZp8#U1RbeXUfdGRD0Cs=rBnZ(|Bj!2^pem*Y0T{X(!?qyQy zrw;#e2yr_0oChtBgws~8dy=vgkDRq2mtv5MFh=R}naa>GDQ$pS*AIU(;*MN&jCwtp z1T~M>5$MtkbWz3;6T^C!W!zUB8w)`G+Hp*rOs~{b#0Ye02D&KYyiOeBE=xA$%r7f* z8?tM4%w6qY&BYTQ@=jZDQBCi3rM^m2U!R!;h2fJFc-(K49``TaX+-H)6Z}=1@2g0C zm8QNvGrwk(7A-LJ0$zVOolQ7Rxj6fH@Db$k-ajDAXoMz{_?t(TphAoi%5}W?L_`!k z-*iG!ikkd55fLxnh!<`^LGxO#Djny6mFh{le(I3k22oB($X@4*dzbLyO?cVn>vJ7I zpTdp%h?e8NM0w3<*|U(Z(A;{I@ZwE)+2-p%T5=U`zKno6n$LgH%j#3lMsae5+lyyg z-zn7)&DmD|sE3j+x2E5{Ic47lF+DX^^XnmOyE?vCq-{Q)?hn{WYOL9=CG}X0I;uy4 z5z!XJW8~Kwgka^<9{58$6|J!iPr;O47%k{L@s?5c#RDZ>I||)u!eJlCc34Lv0*|-e zK;ne(r+d$)8#sTQHF$0Tp?@LH*LTi9=e)XItDf%BfVeh3;RyM4xnhuV>vqM!J6Y`s z?0hRZZ~!T`*Xjecxb8|EdSU&PILIfZ6-RW6Z8Tvc*z_^iy)p) zcW`Qlm{jf4u->Gh0m{*+bfu|cPXagF^DUq#S_E_WP zEr!*O<^EOs?P!fg*-;ni7&ZIJ+aRwi++|87pyl^3P_rcGV0(8P@~Yk%r**^6#u@~9 z)vE|2q8sU34o-1cY08WvZh-*Q(uU zh4dfn)jxl>x8%C>eE3(>|B(?+w>ka*!Yn6f9wsmBuHyN(y0V}5^8?$dYYAW-!1{2X z@7two@YmzLs`=?b*2&9^dPx{Q-fP;59$IO5;U#zOySulay)Xa$&_LkP=&kqpzdwDt z{qlHslS-e^>5LuJwrQ}7R>A7wt5tyXf%kWhw>N)Zu2uS9{`h?BJ>Gu&<60H)D?H0y zxK{c9;eB{~``fijpOW2^u3WV6VKlo|(I@P{k}DQI-kYRfwDeaj`uzF?|8NsW&%oJK zg_U?-sNCvyBM7f4et!ku*(ju+^6z}qA^l@hAvfN`?W6ZWDnsq9Qal@V^insTjW&9y zBF}$D7vgKmvr&cU3iE6fA->)`Csl|p!Olh(QW3c#<`=I9_C08}Sk0Tp?7-#aRKO9+ zRz0KI1IEwf_tHTY^+JwD@7pfzl-g~fU4Iz_bnb;js#k=1v*TIQWy z*#u$A{*a0mtLWvkOp!%sIRECZ<#&ft#Yd}H24;b(PT~a0k*x|_YFdD%As)+AS)hJ% z-O6augN|hq*ICV#r(QRh_NYBpTT@@-AniIy*eo}mED7}{XpyvSlxHMlmZESuAO3&T zgv*U~nZgM;*#6Z-WT^!UkKU9{Jw%?v1#Ptd^E95nc+2gnW7VyoxB8T{6v8SVV;d-B z7Z~XZD0?@H6@h0#C&B9=n9XTG*$x3{@e<(<2GQ(2bXPEb)3K^J@r=<&le?9>?1z<3 z`9KKslCDY;i;{;KxgabY08~ceDDi)Sc>cAEZH+pj7MZ$8rf*QH+?-|O z2{!Z;Cox;d)LFvzytyd00oygX!R%!;jmdIC2TGaC>?L`Mqm)ri$+q5>?8LB@B2d^$PI`Nx#daFc(~upspI&wm1! zyG<^ivYQ9MvCm*R!2?qJ05VRkA(mHC50-_(TR|(67_LdQHYF>W<}`w=jDx;b#OgHm zxZ<{7XOQdANjzise}1s8Er_EcDUXoMEie}ze< z8V+;)F@HPz(TrhwWe|UE3w#9gl+tj6(y>~|K`jU=sRsG=J%a6X*@|cM5v%R*Zgm_u=-_r@_bUMhlpb zbq>JyCtW?x{~cTZ}#?8++K=LO4vmuPxGK_-w zX#V-@`OjuN^GIpLzMu-{`7G;^{PQO_NagY8v2?8&u5lcXCp4jpqr0~fg%9bkU*Okf z%%9Mld_!}}MJ)xfO~w3LuxPIu_6M3w()gZtDRDeG8edK>?MLz+B_(%PVv}_!U)c3# z7-M*A&I4UaXam>plYt!-8HWw>>5I*9Eod@W&LFNr1{1T7_DK$oM$eP49TfpHlf@k* zf9Hb5`_k0{9UHDoQN5XE-<7^$!|oOQ(;BvUL}O29dduk!8Qq(QKZd_Ylb20nS72aY zmG2;cni{_PLC6j;1(Dg7NpHHpGraBqr)`At?x3C6n0&-1!% zGfRh(W}`%Fh@2Hlz2Roewo#W0FowB6m<`We*SY*9&&p zyMug+@;1BL5bK-l#I1f24>P92GR7pvZ_q4+6lb=HCOs#1hdMmYhDF)%XkL_gA0GY~ zFh_d(a92e*8{=<7JAb1@K6i);e-0(jYs#YhN{Pq&_2q3Qg-V-pSUq1N|9gJzhhnnz zaKGK6)pzKx+jzjP^|vhF)+W{VsIW!-;bOZ(=lt~l2 zZ#|xyzI3%6yAett2&WS{Ei_OC;>Q)Gk`msosJBB^zR($1`D)ofjg_{sjQD_oc2^{` zR3v@zj@@d(X;g4bueMM_#j5E1k70VN++dnx+nLHfpo!|K@2&Kr9mkg8s%G-NWyq>P z(JpM&!Dt)0c7fz8jIt};e-UkGWd*qwCaXFy>42Zqp0PCenBEzDDej=dk)(=PORuRv z^QoodRAXu9JA1&ib)QN&*~LM!sbU`ZvOB{~$|b-@7`MF8@_`Du*?5+q+t(9HgHd-@G^D*^iQ1ouBuRN%P%C|CoJfxS`Y0Ba#e*~lSdMnk`WBM_@ zS<=s^l=xA&;GO9*Y3t5=(UnM+Fg}zR9fveCM9HeW@TQlC>&s=-?W|wyOW7ctmmjTI zq}G^C%c#A>wr#ZDqO174(BO4Bq*RKt1JdE9d_udik zLHTYYBy7D&O9b1mq;>(&b%1@XW{R1#1QNf>?YKPAOj{RJmrP2_V*gcL&3|@f^tZvXQ1BNH!hbdGB2HMMXaxo@-~aYodoloc+U8iZoHKcv$IJ24fS3DcHFHjNxX}(WhF@}qH%B#yP0)o0&B7UX=*2QJM!Qmg z++7C`cMr|QXtad6DemUWm>VFQ#fv2G$I}jIG59Hn=Y#wu=!QCFd28xf_qG@U@!^{I z;DJ4me*wSA+5ZCU->=!X6lK-ZWYoH1U-dT`xfJfI3rw%Iz}IZmcw>C6R*pBom&U=) zu+jyF8I_&YyiGyyN(@sh8;N1vGRy|Zj$K^e6fExr_RU3N4W3__lFPvw`c}n~gJ-+ti$H($Sf+QbgLYgF-=o} z;~n?g69sgcCY~Yav600^SKKMlyxpydGSL5(vdbFVpT(W#h}R)M|KOnu(sgcF5c{&q z3iD&onj>3Aga%;*k!0~Q^;tU6xy)UW9OfkR;`WaftIs*By(MiqL90x=(Uh=ZJ~9tk zJW0)AWpdHoXM~3Z;iAWX`{{kUN;JECG;HvA5+VVxGr~KtfBW<9{oUKnu=6FkJXeC3 zYQm@V{JmB>|6-e@G)s;|rI{uSeNQ<9TO zB`in|ADhEb%P3;QX{Fr8rxAXdqItteUbzh8$DD3Cr76*U%(<2_ns=+aU9hufIK$3J zRv=~+nuAS0TiZ?}(+yYJHiv5D`G>HRy(J@myN;~_bvoS9Nyj5Kj&A-{LS;5uvqu(Z9o9rQ(UCAz_-?z;{z$s0j^2pZg z?e*HWA1JS>4@=bFu6h2xx_Tu3{KG9WC<($=Ek(t84gpdXJclLm)*Q={-W2HGmeJjR ztkH3hU#z6nQt`u|Z55lugZx`9UdU6r!z9kXg&({x_+(EXiS{Xk5_iHv%b@z}@D7L> z?O{rL{U!OYjNiwU-`1DpZIN$&3deivgw4mA?3B&eMDL} z)Wch{cW9c;j;J`St3&5o)P8I7)0cO*f9ry+_G_A-5}*F9oqn61Qc|jlo641cA5W5< zXo3iNgO*7=-dI*B&&_OB1BMZ(1Co7FP{p=tL!u)zk&~(8Un_aVf{<$NMV?o|KclhQr=tjK%}@v3{>qLAOpw=u~6 zSiOfcM9Z|eNwF;#)tcKB<;VQ-;c`(W)tc-WYO$r?h-CighcNkd=~s|iOwjhii5g1T zlb_lDH|P{<17Qgr#6dbFyu?X8(GAuA$X*-9!LWGkCMR=~{awdv%fgL+7?s|sG1-%(y&EdoqwNM%3@k~e%XSz z_2_-R-fcZl-wXolCpR^ABoa1Ayh&*NIcO8V+{9IM;XI%fYr-pk(CyC@Y5)>Co5U|^ z=$y^z?l4yqU~3w~g2wwb*@x`XK_K0fv-x@}738X`F!|+74%KO$2Wtvt2g$s4Sqsl zbTr5v1rFCVOEd0&nw@W#r5%{IkwCvvR4n-(q#; zaJ;G;^Wv&$PgX9}>N>Mc4n-Zlw}U%e%(fF!fK*MyIE;IdW+4Uc@f^*b8aYiVlPM79 zFXvc3q<=Ny%D!m%ek9ZE-JIt~^o9O&Kg-o2Zi_+odESD5<}>S1FC7&$%b?cOhuQaf z1$9=P?Z(l^HJI#UzRz1=ngrJ^!HT%Nh?jGA8gWC9tCFG!{u!yv(;m0je9qV$_OR^7 zmU?G-y4x0EMr^B1tcW$stn+eff0b4qq^K&;(tVA0US02YJh)@O{xF`$%bDjfJk#_% z!-c1gk)rB$aj2j2znle!4<@uFOo`RIx>|PN=5nhHc?MaY>-- zieZ|>RrxZrEZs76gbB46D0^t)%RT#s5V!BW>(-hhHdILMJ^?(9$=elRJR=; z+pez$T$%YM(h2e%U3DGDS25Ks2V-m+rf-|R!PgsoTXB3%w=E5+s_NL(w~($O-3k<{ z8%XEM4EsoPDA9F;D7H#%7poKz)pj-ARjEt3G9xBch^8yHW!d18zDaF8P_RNNR!tI^ zT$xdS2iuxKU4`hXW|+2V1=#Qv;@W25LaXD;%nvBRfkQ1twUI@PzzTfT@=XXUSTQJ9 zW`SZsoG=Uu2Gb1{+OtJX*Mhi015I%{HtO{zWq7t^>@;i{hGj!LwlH=v3TzGgE@;+O zbzf7}V|F~>$Ik>C5u{_&$7(V_Sv>?xN_n z;c}F3Nl@CI0@VPz9n(-$$0fwDeTZMC<(N+3Xo1h2AtVaEf?U?)_%?x_L3e#K@G(Mv zU`dK??hJjOI*y_$N}wvbgx4)hGJ;;Jt7 ziEmNv3@K8Hj;Ll4NW5GfL*4+!^G(y&iA95;E5rQgmF#%vVRki5A)xsi&>~FDv~AsV zeZ;z0m9Mhz#r7N21i(qN^E;v-6V><{S7-3U~9ui|kg=|36IkPl>)s(5P z`WmD+kS7MN4QZkReKY}yee6PTuyl@0Q*R7$nwPPtf?ReI&EtD zVz}A}DkbR6;D3f@A|E@@RR>hp!H*HGPWHQVMW%{;CD1J$A}$G>AV3PVV-?bK)}-8H zxn@UA)qs{tu;W{fd(62O`}n!Yx0JxAwyBsp)&gA( zY@!FMMFIqsRM3&%&bq3izg9I@2@D16K4c`0ZWE#?F4h$&r%avL_}Jqan(#JF4LVc9 z3QQl2N|+b>5Fjku&;nL5isWrUT~8ncJH)hos@R(CK)UHGP;r?-VE96Rc@yYKHLRGX z3K@eDKtAm&P>{PmWY|`NVxa?XSBFA~Ktv35NFgW+Tm*Rq#Z-X=81h4rypiqNEKO3N zSW+C*HgxFpAwMK8hHyp1@1!#+EKV|{W&n%%lOqLob zYS7j&x_-w=x|WD_4OPvUObvuwp5`;+T{*9^Vk5SW{usgOlwtsREf- zlbAEoB^7kEV^2~(__G)(h2ZK#<*c$%ansR=g%zluEe9OPRxxUQxagCQG#mn~+>@;| z8y<&!t|uQ90-2*_vux5KY-NiBoe05Dv_m^pEH0;wljbxc0SNb-}0gK)mOGXLAqOZX-nbF57K@G z7BhBSMf%<-6D@tEv|qyJ=W4g!Bnr&Dh{TMpsQ5Y570|H@zU;(yuV)nhW9{OsM_-#;0e z{#!N8`<(OWs=x_U7v6{Y~&bd>;;mKh?ZD8c{z?78|hc(Hu?B1KW>T9MLH8 z{1BZh#xR;KlQYE@;dpYcdW+`q_jAP-FOzdHDro3?xPYs%7o_jv*|KW@siOJt-ugj!100Uj>|LD}^@IPz=^S4J7kZO{sTlqe|U@SiLY}+%sqq% zLCL-G3PCBLL5sopB5Xblqchg#3=u*Jw{p6im{gH26_M4cN!3f-YHUI1xTka+vB4i6 z38yEMg#;~s4@y{*?5HV2Y06|1T}_|S0`zF~){BE+LA!Uprxb#VXPi<1p3@OM$?=FL zLDz9&cQ55;O|f{I*QhZMObe*DCO~?gy*Hav94-J)*Er#}=2n+h`uWIzOLDxL+8GGK zwaJ}gAtBlIc6w1Hbp>3mGRSsYI&@#xg2 z>4d{(s$uUC{`4iWo+wV@`CU13+}@^ty-lCFtwNM*EPK9j64X1$1OdaON{c5$>7xWc zvdLb!Pd#|U`xhFI;etlI>2lJ*)cNyC%3!C9P!BLzdED+VhtZ8jsO`dp6Qg}XlO=tB z8DO8z0wr6Qyzqbk{^o@d0k4I>mIO6$%vD?283{6L$*$HuZT&35;d*V9G;wcm2jv$6KdgLlQ=;W@56b54WZmlyeXQ^fahes^e}=J1Et(-$@XjV z{Ac)I!(?_bn}kUgi@hSFt}X@*tvTazX;}WfXlZr#yZnBLc5&aY((kH_ZKdCTBNnhr zeRtA#b_aR}`Xg5&ye4(ui|)rLd4#+zdti|u>GVA*{t+!6(~qa&@9APWm1^%r?UOK~ zsPOleWwum(H`T*k7GbE=cR%|6QQVz|i-nhjQwr*z%AU1v^-HHeh1OdRU`V}p(mPuv zf+pcOn$jqdI`2j2ETVfdr1z+Q@N@tvyT8icrS5yt{qkultJL1LE<4c^Xj=|Ho62{0{-P!AXRz>`1QtDJaYnKV zPbmA5mHvSGk4>4;%f+JO_y!fBcSTI_RxW^opD>lFKpB*;G}y8sc|jh~zm{`{|V#GZXoSA4}&bHGKH zk8ABl9Lfvx8@<5mo77`};cXsi(@jU3UPE)m*01Nw@6e!bC^((+-3kL|U)9{blq;P; zpl`N;vv1?wZW`G~xiJKO8tD$FXzHEgS`!YtUp?kK_BDE&Uy$nqE7vdX`++QK&&r_1UKGf(rx22X~B4IH(Iur!U-!$xfgXbn)+@bM*>^C^!;@W{k?HgNL zkR10~?GZE2p05|c0=hr`xcPR|;_JBD5Rv4-Yvkm40`q%N3C!8P_FA)1G@D(SB znOlI_8!@_+`MQ3Pc8h>QHbTll$|CB`GZBbiWq1Yx@vGs^L>Nxh)`_LzP#2wCg70*m zjmW)dIk=V@CzfS7o4o*=#EYb~6nZg<6Rw%6>Q-aw91{v9QL&DFWx5XA>>5s6Zn=Lo_^~b8p`X=n*X=ZT=zhcN1sSe4 zjxgd8+n-6Jp1)BEee!s1=_$>6HfCj~j2(59nl$c*C{4!XOSP;t=>?gz z{OK-EYhY>Ki!`qc#}{W@HanKCvsEqctu*Wf8dleTq+u`8u#U9rrP&qUlCAX;t!hZC zUZ7QF1imP%%88xJGwBf@DN4JZzg^iG&g=m$r=3c>o{e3P+9+Qd_WTW-Z4NKfr3*4C z+agqs2w$98kKATmn)c&j^QYcDa@EW zR}f2o5DZo-xbKxz~^gVE-R6xfzQ{#j^Hy>0&vRE+<`1u*-!Jp+E8i52k1)Jx+R@FpuN;(Tnsm>1X}z=P$iYdRu>g z2MkDWlit?Ti3ifvq^pTOf1)#0dhj)wtx0d|!`oy` zlU^p)%lLkUD?LqmT0aL9Nl%lW*2_sr($S=&^>B2PbTjE@J)9*a-AuY!4~JVxHl8)lYtq+xJl0XVn{>B6&Y6^cCjCsz!IzfNmAmw=%l0m=Tfj(fliqf~ z+eB>zBVA9rUL)7zz9o$GJn4Boo`)>`P5N8^2Q*8slU~>ViP6&Qq}TO)gtc^kIq7nJ zovAH-P5Rm?4*8aTC;hIu-*MV@LHeBZx!zB@mtH5mu8G&l7$?20A8+Hl#f2Xax zj?Y#z(&MDZHT5`0I-GR4o*a(X)-%%Gq`Nh9H$%FcbhrN8O?IZz-TH7hxf)pdT5rC_ zb!!^waMIxpI2_+CYNW$ShdbbZaGbZQksc>K?tsT}+pKl|E-seoVS&T*tItiLO<`LZfg~nq_Je7D!>=C$18%ULZzm!qiZUm~c zOLNotQvF(Ao~L(FvhuV_EZKD6ho2e}4aRTa`K=zn8 z@de#a;=~j4^I3w00)f@p1;Nfvt5wrI0PKBQk*G#ilDwYJ(end;4Q}6)TA*2lnH83t z+@L?`Tw7d!u=x+j^a!43z~~FL7sT`LXx_O7*OmSB4*)+JH~&U@I!So8+0#kEzX_jC z7LF}ioDh?s*c@dR&z2LEP|tr(XwwQSn|;f+J@!!objW%xZYwd^$Lf=Yiixw^^6#tZ zv6}n5elLw!?}K@N#J2WVr=f0Jv0tgWj@&-{>a@k~w%S$a`ZN?q6wSSrTeaCz^7@o@ z+;((Vtf!WXW3NtAM}3%cbz17Q!2jyBCEHvh{m9I`p`Kc-z`PC}@h^7u8ZM{MgI_PS z*n0NGD51BK^@V9fd{z2l6e6musp$ z3`e6kLM!w0lC{U#uk#XI>R$BdlLfFNz94=OAE)^CgKmFOshSNO=GjrTj%?J9}8QN%pF)`L%x>ka08` zqvVm!OXCA#BI$VwKZ)_T;($T$tZR~dE^>v+naW%~P4Zi|^Tskt==>^maBbGlHj)>Y z{hz1QNjObSJ)K>V(90g;U|{}rjUpR)Y&_1I^bdqvt5xyJi?^eO)9 zi|f&4UjrPMugKm$cDWv%_BtMM@!FJ~4@WJ^aSyKA6!o#vg=pl&d;Let&mG3hnDN*DF$~&_Va>(qwNR9bb`B#Sgn*l{#fV#8ZuO$gZ6_SLskN zSwei|KVqy@vEZ$SozLtmc#0j*JtHxD`}+KU>|_={!hA*|i!Th%Qdn`_M7 z0%wW(+shV`CC*pjzT$JS6AL?uF=E0jy!_LA?s=209^h6hRO5`VlI+$alMXMbEtTBVli|oai|yCU$-BBvrguoR+GFGWrWcanuH{)C$Cr!JuTpPG3u!3?JACKzgDG) z8isf&n&>O-dNB$SKT&jcnt)2edyE$e8-ARvPI7FJQcUSIohz zhGsWJoEn}ma9o-m2XxSl1|h$;w@I!E4!;rzR0fN3P~qe9{Lt|W4IDA?iw(zvdO}e+ znIL{KU9;-Rw(#?60-U0RV2E5|MR$zFVd24Y!Cv)Q;tdsT}B)fEkU_q`B2nw~G=#jPPVhpdvsJs9R1!U6>;BjS9m>a+M0m zjeorgn9;enK-1Zz^}2}{qXs&kE)%vQFP>!M1Pc!>lh_;25oy%H&&TDrt40~zy-Z5| z)Zt$aAx_7h^PuICaN5ds&r+6u;*l3E$fX$MB8*YG{6b}Dn3Og^t?LIFaYrsXMx9P3 zLCxbd0$rMcF3LD!Vp#98jQfgXV*$ud9mmAUbfTsrMxaYG&_x;NG;xf(EZLMZzpTt{ z$gb5fceQ^t7f*P|J8!{7HNEqd`YKI*eP$LEhR;&qalcV|+`o9|5v5OmCiqF4?@6S- zN>g8-nNJy|MGFkQfEP|@6HZeu&ORP|1bKMxACP4r+YLYu8mJPLVjJY7^K{~T`};^R(k?F z-%1V~K#J|P`amtNyAp?9SU)8W@>ywlPBBW>V>8)`k8Dr6rv1|dJ;MuLGDp!Oi09KC zoZ2BKRr@roH)&{pfO0e{U1_S=v%t;vdnR z5@AGiBYn%kDGn=5nQ_D|Fh-q591+2ZuOxphQIv%Lrk~S)_wVO1WYY_v`ugdIR_#VB zr2k;A{;|C!*PZ9XzncD!jBvWm@jDP^IYILwn)j5O_3t?S1+0j~{Qp zKHS};(kFC(I%5a5Z5r&NRj_*aY84=T;O*VR?akL~mHxLszTA2bw;%qvRt5YH&$1V; zRsMf??;c)%x>o5^vU}2%ixxhNX4fkEgdJFN#lpvXlk|(0{)$DPU!UObZsO<(IGd`l z63+{jTitF1;gsU{SMa?Uh4fSYy&QE&|JYQ>jdy>4`{2Ek%20c&6km)wdZ`;yU8y^y2P+qO$PrFL6r*Ixz!oqHjX>J_2h?0D96nKC<)%FEX1NmUC_ zHK9*`ZO{3U`fZ`#NjwfQn&j$|0Yi|Mg`X%cb-dQGj2GgxT>Pne5SgWhg=$!4$bD#e zHbI!OKcu3?Dth@OQ)Ceu&cC^9`Q4#Z@zE-lfmxuclQ@BLWUIoKniimGh{rNj7O3A` zw=!DvpktZDbyjobsn-prJ!+5D*3?rRq+KU}37h4{lO>_v1TB)bjq(KvnWZRP&WArY z;c}y0rf>odwm+GOEVW?a(VNn#hsaa7ppEu_p2qWMZ@E2nthyESR-clVLRiIPYy*Yt z0wY}kW$$LOBJeEeBsdL%*_;NH?GS(#&k^om5Y660cLn2$OU2H0H87wM~N52^KV^jYt#`f?;uh43@5!b#;|~3gvOnMi&~;n zL!MKk-Nz}38y_iBLtR;usV2@zii3PSJc*-DCb71O?PJ7AtR_6O$katLeS=cv<}4de zu%V|oiP=J?&Jwog%|)>d*sjS9X3wL4X-t+AI#9}7X3xoE9HoqEO1AZ;6xsX3|Fc2( z6^ZfU75$MWSgT(k#sFQyxRwk$rV*W^$*=IrlGS?Yim_ksQwSN0E!z8Q$rRZa_Q+TE z#GCAa+v5HkG+x}qgaX$s97oE}z(J6~kFVIJjL56v`D`Cv$l|sR z=Wisb*$(i__<-57m$=i@oenPK(c}@cQ%Uz;cs-MIgw3aixtNvQM-0{ut>-VBJf~%Xo2ydyrg)* zu7^oT7u%%M<-ck7G(t~5hT4FC9TrDd@qLhW1sUH7^6BI-=buwT!%afNg3#+f{|Q{~ zHo1JvZXN)~K7-{14@m6;$T+ozSYAmzSQZLz1+7eCxF*rsl&oZ$(+ILM4*FUVtJB!S zirapjL9Rn5@r>R7_02Mzkh^G+%$I2^sq*Z-&u31aULKUMS26S1Q(@_Up%Ge4{}m>c zYBB@$l|i^I@Da>YO2Z9G$7&%5wOA~{$-all<6w;OK-K=<5nD=d$OsOB z;1C*9jZDNR`q4S9Yx#F8nM0JGe?borM!RKNb=m!oLNLn_q8#faFd7XG0jXWf%qV z(fsq*^PkOl=8=efK^4yPS=J-@=TB~s%Hz*N=~^>f<2W8qXhIi9cW)&M@6unt!mrJk zKcYGLj^>n$S_)#Diut`@(Oxy|4>Xyi@jdTS;&^g2zMNdzkK{c{O75=2ChJhXvg^$- z#_-mh2fCEd2CmN*De2aTW^+12a~2)edOH%+iqu+XgZq>J9Tyi*;XIDmYsUfw+QTKJ z!_lyyoolj8`wNpT9wdL4g2mg?)dC$Gu1it9nq}XWzGB1f75vj0ws}NjPiK0|=?)p) zoBKb8pQFk1rm-t9urEx7fMA4g{&@TP5CZry6@nlo{P6i^T897}a5+mg@G6|Lxg4G- zQsSu=h8gix7jI+y4dTNcyRsip9^XVk%E=dDl!2?m1I6q7E zlm(_W`E|Qx!@__0Xv0>=96!{@@z@@;KWZI23D)E_RkJ5+w)AhN+~T7B?& zM&}4}hs8lIY*1wo7mwGr+daphVb(TR1a6p1wBXKi^lt1~GFjr!a@-aGR)2LvvAR@( z($E|mOH_Xk7=3rOE9>Uc7w_1u7Mw-}$Mk9o=_^*;=6?*+TV-Q3$F?&~dO#D^)2drt zS38a^n?lXRZp-FSfudcLsDshAS?mJISFdDOx+B`|@e6V-OjdOm&jCNFJ-=u0A-yyB zTHMJdGh6d-={5D|3AJ>bYAo%1XAhXR?o$aTyEuPHHdV|6Uv_7>Nx2002;-I)@f~og zo%Md1f5@gOh2bLOZxek5CC|Q*k-t_WteYuRf zo%MfcV{lB7*yJ1Q^zJafJFKc9TgldP1I=zWhrYEM>6iK?Y)wIzS0*)?`8Wb(-CDGxARd_HXaaw4 zwU+C7Itin1om_K;Yej4Y>q~y61v1vrvSkCcOHdla$|>b5P%76WGy%l=@9tn&H=YC8 zUXsuh1j(jaJJ0N}g`1*(t5m1B@W$`tW|l~P(^x`RwBY7Z!1xxoWbL8^N3M+;0{mpK)H zQaTmOELuQmur#wRs&>mWn`6y#&g5|(FUOAqUhbdO%sJKJMmxwDe$5%)9MyjyHbEC2 zGz(|kp%+V(80|^{a(5lv-`zJCqtWu%rns9gk!^r%7B7;#A5S}=#o(79o)7Yupd0Fx z<*lh_-P>Xa#D{C*gM0Qs2K**x{~NG>yJp`~lvPiYQR|9*)!$^~Qn;%wFum3SU$Yg- zjq$ZwmD~Vd8V5VWN*AwXRCa$>iZ%toD=|#5Y$S$tOZ*xjJ9goAQ?R@jZ#Nf-HF$ny zN-hU$=vx&_4xX_)wsJ{cDE+VK>t_DE^B{me-?L|BVLF6 zd@+3&r0d+UAog{Y73N25HAl9L2o1srBFW-;>a%pBbD6s$Im}7s#qA#}R$p>fdrR7K zf>xP!qbXs-d}JQ7lA6P+*P^@62oDRwMUVgbObL;_%Egm++n`t$DX z-RsV<^W~mAR~eUT!p8>m!_ z3$0ZgF6e&U=9-4vKeM~XV^5>00PJb7ti_(j#0u=qzXz>Z!PWAE(k5S<>>-(5$u6)3 zmhAZU$0<#q^2pZg?e*HWAHS}t4@=ZvuX%o6T|E+i{^1rGlmua`mZD-khXAPxp2L!O zYmQ|}Zwhp;%jkb@*629MFILiOsrcc~wjoX8LH?~4FXSoRVG`%xSr6V8e6pvHMEe** zi92DTWl;U)aRh2#BM{^ny%zQXT@Kl>m`5d56@ zCi>sfi(J49Xs1P*|x#E*XpCi#u z(GLhY(KvbJr)Qu!ENFfN`X932k3?9U#>ER=LGN?+^GH-rXtJdIK;`8AOn>~;aHs5= zl<(l?+D#mOACcA#_3(Dz9hzpd;|~t&>d@HN#HW92r{AV0 zV3exjrgDGfM_**8WgtRcp=A<}x0jV3J|H81f{h$4hCpXn(0QHleOrE$#kbdRfA}ta zi!D&e=kVCM7`vEuHZ!Tlb2FROfMEpcfMj13RI#nv?B)nf^$L z&uFanz-3kG27rR&uj!Z_PX5?vQ#&_{-8a%d2Y-JoHeR2H{xU6qYbX@=*F=gdD>5HK zyy{-JDCD>DZ49zMR`1~q(K0P=Qf$jbwdOWO`8j`lxLj08wI(};T5RbzBAGw>E=+!3 z`W2)W6STc>qJ~oT&bVKz&v)6`kFf3lX$;sSgf7kKa zvT%PRMx}Qu4PMOxdWA{u31~d0={~rFU;YD6(0}}`HE-y8a$>_3G4{RF8W6B69Pjm- z-T(g2|GoL}`hWlD|NX5RZN9gBLpb@<**5Xj1SciuZ$Pnlys9m%XwH^w4Ltp8xjUv? zPno?mT)lM9NIc|!{Zt!ytJ-XJcy1htYr=orVNM$Xu_oYE1@Oa^OM$*kK;NSBW%d|G zKMr%to-4loPT$$&b{~gPLg#g6VJCT^3?C`I%jjbJIvJt*qX+$pz(H1_CC9G5J)%WY`)$~1^MkQ^Ng1vp_4GG z>nj@Oa-{}WqWCUnx!L62tsXc886HYFc~wrT!l^!#n6y%8J|BKy1$`56`2=_ETKV1e z@y(#^>WzJDgC9{C9Sw3vfx|V;(u{xmrpdn+j~oO(qc{vgT;-)lcxXYc%0=0J89B2G z|E%-PtX!(?w^&^{9Ixueytr!Gla&j#y3TBqLs5tC?cfd=SxA9< zJV&#~Mov@8WD11&%Q=>J>0gbwvM*YG9LY3$H|O~QeWm~0&vJD@*kX`gLCc$+}up%xm;^my3blcG5s-!4_e?}_v zw1+J=Uotj_JuLgNrQTVd?zTmk5!-4LD`L$u>%83BU!|1?DXI#zbkE$KSJyk~4(`~m zKaJ<{a^`sq&on(NZ{ew9q^N(oq9S5BR$!a9f;E*oDpoa(XxK)9bHpBc>_EmEW~Lu# z*w9V*&u|q3Bf~VX<+_?G9!RcukL z6RIh?VOuy*ToUNIVwfgzRldwDOScRiIS8ACI=W)%L)osVew(F|_S7yG6bb@?GS6#>PRZMlu!5Eu{>D#7n@byODRvcf`ZA(L{ zsya6HEu?Emw*rOg2GY4Q!#>g+N_5>Iimg)H#VSQawOvhjRq7J1%!o-9qUnllSvI(& zZ&F(i6s%B+Rg(lJS7v|I!M0{lS0TEp8K!Mo0XBSvxV9O%(CYXy^8-q7;806ZZDbK6 zumWGTd=tV7Rt(COS)f=DCk%sv!E{4~_H0qpwIHt0KvSHKje5OF89OW<*lE}@49kXg zY+>wT6xbT}UC^wn>b|C`$LyxPkDm!PB1p%kkJW%WKCtuypCErt)v!j9$(^5X5}mlv zvRv?f!*N|?7^*_l0Q{36!kZsfRZ~}V(^L?GxM*Y5R(wPQOZPGH6@@pyz{F14%ydmr zOvBP$$F>X&+(prC!{sR7lAyFZ1*!pbJEozij!TGP`w+iO%Q2n6(E^`4Lr4^S1-Y!p z@ofS*Z(+!^{lbsR-ilt5K<3%Rx%K(|L6+cXWsCJJ|kM8%pyk%pk_ zfn>;X9q1Wc#Z_JE6W^lT8B(MY9Z}69ka)Q|hP(lc=bNUl6N?5xSBCkq9@){u!|ZCB zLO}C3phcLPY1_K#`iOP0$i>9eY~5}(fLr3150a+)nPz{1bcmjQXpE)qxSC?<4mF8p z5<_vJ+x0Ei40JOltaKxY`KH6zEFee}-lvA3M-b22|HwtRY&RX?N#} zOcnV`pj$eGI}$iSfD~v%D&)o*c(Z#f*X*2!d?7DmWB_d)0)V5L*d;b}B9JH=CUppQ ze9LinP=yePh=C6M zH${Jei=b1dm@1HcLUJaOH?m!uibZ@h~ZsiQcyxeE=4p9?qwni z@_p<;l>s?TV7t0l<`ABVO?3yVLrBXZPqlwc+c5$k(qaUiv&n(^_PH5_(xK{x?p!p*EDDbx}o!7pB;aH zu|BpT&2C*CQa;0ij0|cbZ~_9|YJjl{;T3~Dx*ZWwVxFCDQD(nt@0uDA4U&gIA=J`T z(=|0$H!bQ1Hucp&?E`}jHJWW&pnbpTNCY_Ta zGCu-z9+P7-fdYv}ljbs&0YsC0GaVVIR)H0=bzKRtVky4jn3Odh-w>f#Q(<#+ld?0Z z0%=c^T{O~v zSH)-58`Rg|O#RHB&UU+jr-#MU#h1}{Q4s&M{_ZN@Z=v+bRkk-kx>a>)Ki|!-!F~r8 zGj>))`rarLEq$f5S;6MFYLEFVwXpM*hiqr<(r&|>->9vKv;sMeNyUd|v*%%PBNO~% ox@lDY$yw>{f!FenL^s|QUQo1iZc277-@GEbf6vhUe}Zg{a4bAcVE_OC diff --git a/py-modindex.html b/py-modindex.html index b29ef9189..8d1005ba0 100644 --- a/py-modindex.html +++ b/py-modindex.html @@ -299,6 +299,7 @@
  • Combine Input Operators
  • Layout Transformation Operators
  • Embedding Operators
  • +
  • Experimental Operators
  • FBGEMM_GPU Python API

    FBGEMM_GPU Python API

      diff --git a/search/all_1.js b/search/all_1.js index 51844bfbc..8ac094f64 100644 --- a/search/all_1.js +++ b/search/all_1.js @@ -1,6 +1,8 @@ var searchData= [ ['addr_0',['addr',['../classfbgemm_1_1_pack_a_matrix.html#a04b923ddb781752fe65992543729422c',1,'fbgemm::PackAMatrix::addr()'],['../classfbgemm_1_1_pack_b_matrix.html#a04b923ddb781752fe65992543729422c',1,'fbgemm::PackBMatrix::addr()'],['../classfbgemm_1_1_pack_a_with_row_offset.html#a04b923ddb781752fe65992543729422c',1,'fbgemm::PackAWithRowOffset::addr()'],['../classfbgemm_1_1_pack_a_with_quant_row_offset.html#a04b923ddb781752fe65992543729422c',1,'fbgemm::PackAWithQuantRowOffset::addr()']]], - ['avx2_1',['Quantization Utilities (AVX2)',['../group__fbgemm-quant-utils-avx2.html',1,'']]], - ['avx512_2',['Quantization Utilities (AVX512)',['../group__fbgemm-quant-utils-avx512.html',1,'']]] + ['ai_20attention_1',['Experimental-gen-ai-attention',['../group__experimental-gen-ai-attention.html',1,'']]], + ['attention_2',['Experimental-gen-ai-attention',['../group__experimental-gen-ai-attention.html',1,'']]], + ['avx2_3',['Quantization Utilities (AVX2)',['../group__fbgemm-quant-utils-avx2.html',1,'']]], + ['avx512_4',['Quantization Utilities (AVX512)',['../group__fbgemm-quant-utils-avx512.html',1,'']]] ]; diff --git a/search/all_10.js b/search/all_10.js index e438411e3..4183158cc 100644 --- a/search/all_10.js +++ b/search/all_10.js @@ -1,46 +1,48 @@ var searchData= [ ['pack_0',['pack',['../classfbgemm_1_1_pack_matrix.html#a0c1765c6a94482209b1fc0cd334ad44e',1,'fbgemm::PackMatrix::pack()'],['../classfbgemm_1_1_pack_a_matrix.html#a58a5c6b7f15d3c2a796942e383a97f36',1,'fbgemm::PackAMatrix::pack()'],['../classfbgemm_1_1_pack_b_matrix.html#ae2d8887226e140ed6ddc140cd338910d',1,'fbgemm::PackBMatrix::pack()'],['../classfbgemm_1_1_pack_weight_matrix_for_g_conv.html#a915ffc82b17862ab1d2a466a79d23a3f',1,'fbgemm::PackWeightMatrixForGConv::pack()'],['../classfbgemm_1_1_pack_a_with_im2_col.html#a58a5c6b7f15d3c2a796942e383a97f36',1,'fbgemm::PackAWithIm2Col::pack()'],['../classfbgemm_1_1_pack_a_with_row_offset.html#a58a5c6b7f15d3c2a796942e383a97f36',1,'fbgemm::PackAWithRowOffset::pack()'],['../classfbgemm_1_1_pack_a_with_quant_row_offset.html#a58a5c6b7f15d3c2a796942e383a97f36',1,'fbgemm::PackAWithQuantRowOffset::pack()'],['../structfbgemm_1_1_b_c_s_r_matrix.html#ae1871cae73e37637f6a2d65a14f0512f',1,'fbgemm::BCSRMatrix::pack(const DTYPE *src, size_t ld)'],['../structfbgemm_1_1_b_c_s_r_matrix.html#ac86c58878f6bcd10610f66eefbe53a90',1,'fbgemm::BCSRMatrix::pack(const DTYPE *src)']]], - ['packamatrix_1',['PackAMatrix',['../classfbgemm_1_1_pack_a_matrix.html',1,'fbgemm']]], - ['packawithim2col_2',['PackAWithIm2Col',['../classfbgemm_1_1_pack_a_with_im2_col.html',1,'PackAWithIm2Col< T, accT, SPATIAL_DIM >'],['../classfbgemm_1_1_pack_a_with_im2_col.html#a37d96dcba66f792135549702d2f25e4a',1,'fbgemm::PackAWithIm2Col::PackAWithIm2Col()']]], - ['packawithquantrowoffset_3',['PackAWithQuantRowOffset',['../classfbgemm_1_1_pack_a_with_quant_row_offset.html',1,'PackAWithQuantRowOffset< T, accT >'],['../classfbgemm_1_1_pack_a_with_quant_row_offset.html#a44ca398424d2d534802de6b892bf3a6a',1,'fbgemm::PackAWithQuantRowOffset::PackAWithQuantRowOffset()']]], - ['packawithrowoffset_4',['PackAWithRowOffset',['../classfbgemm_1_1_pack_a_with_row_offset.html',1,'PackAWithRowOffset< T, accT >'],['../classfbgemm_1_1_pack_a_with_row_offset.html#a8dceb15ed761dfbf804244ffd2fc7f66',1,'fbgemm::PackAWithRowOffset::PackAWithRowOffset()']]], - ['packbmatrix_5',['PackBMatrix',['../classfbgemm_1_1_pack_b_matrix.html',1,'PackBMatrix< T, accT >'],['../classfbgemm_1_1_pack_b_matrix.html#a1afee702206695dfcd20de0474408b07',1,'fbgemm::PackBMatrix::PackBMatrix()']]], - ['packbmatrix_3c_20std_3a_3aint8_5ft_2c_20std_3a_3aint32_5ft_20_3e_6',['PackBMatrix< std::int8_t, std::int32_t >',['../classfbgemm_1_1_pack_b_matrix.html',1,'fbgemm']]], - ['packedblock_7',['packedBlock',['../classfbgemm_1_1_pack_matrix.html#a9c6a626fc1b0a20479c167862d7a91be',1,'fbgemm::PackMatrix']]], - ['packedbuffersize_8',['packedBufferSize',['../classfbgemm_1_1_pack_matrix.html#ab11bd74e390ac73323a514cf2d6e6b98',1,'fbgemm::PackMatrix']]], - ['packedcolstart_9',['packedColStart',['../classfbgemm_1_1_pack_matrix.html#aa981736a44501513eb4c0f8cb72a11c8',1,'fbgemm::PackMatrix']]], - ['packedgemmmatrixb_10',['PackedGemmMatrixB',['../classfbgemm_1_1_packed_gemm_matrix_b.html',1,'fbgemm']]], - ['packedrowstart_11',['packedRowStart',['../classfbgemm_1_1_pack_matrix.html#ae9e47d9b93f5049504203ff55472e075',1,'fbgemm::PackMatrix']]], - ['packingtraits_12',['PackingTraits',['../structfbgemm_1_1_packing_traits.html',1,'fbgemm']]], - ['packingtraits_3c_20float_2c_20float_2c_20inst_5fset_5ft_3a_3aavx2_20_3e_13',['PackingTraits< float, float, inst_set_t::avx2 >',['../struct_packing_traits_3_01float_00_01float_00_01inst__set__t_1_1avx2_01_4.html',1,'']]], - ['packingtraits_3c_20float16_2c_20float_2c_20inst_5fset_5ft_3a_3aavx2_20_3e_14',['PackingTraits< float16, float, inst_set_t::avx2 >',['../struct_packing_traits_3_01float16_00_01float_00_01inst__set__t_1_1avx2_01_4.html',1,'']]], - ['packingtraits_3c_20int64_5ft_2c_20int64_5ft_2c_20inst_5fset_5ft_3a_3aavx512_20_3e_15',['PackingTraits< int64_t, int64_t, inst_set_t::avx512 >',['../struct_packing_traits_3_01int64__t_00_01int64__t_00_01inst__set__t_1_1avx512_01_4.html',1,'']]], - ['packingtraits_3c_20t_2c_20acct_2c_20inst_5fset_5ft_3a_3aavx512_5fvnni_2c_20typename_20std_3a_3aenable_5fif_3c_20is_5f8bit_3c_20t_20_3e_3a_3avalue_20_26_26is_5f16or32bit_3c_20acct_20_3e_3a_3avalue_20_3e_3a_3atype_20_3e_16',['PackingTraits< T, accT, inst_set_t::avx512_vnni, typename std::enable_if< is_8bit< T >::value &&is_16or32bit< accT >::value >::type >',['../struct_packing_traits_3_01_t_00_01acc_t_00_01inst__set__t_1_1avx512__vnni_00_01typename_01std_1_495ec4d719e603d9e79f7a55acd55e37.html',1,'']]], - ['packingtraits_3c_20t_2c_20acct_2c_20inst_5fset_5ft_3a_3aavx512_5fvnni_5fymm_2c_20typename_20std_3a_3aenable_5fif_3c_20is_5f8bit_3c_20t_20_3e_3a_3avalue_20_26_26is_5f16or32bit_3c_20acct_20_3e_3a_3avalue_20_3e_3a_3atype_20_3e_17',['PackingTraits< T, accT, inst_set_t::avx512_vnni_ymm, typename std::enable_if< is_8bit< T >::value &&is_16or32bit< accT >::value >::type >',['../struct_packing_traits_3_01_t_00_01acc_t_00_01inst__set__t_1_1avx512__vnni__ymm_00_01typename_01s0dc87ea23078ad687de8b8ea67c6d3f3.html',1,'']]], - ['packingtraits_3c_20t_2c_20std_3a_3aint16_5ft_2c_20inst_5fset_5ft_3a_3aavx2_2c_20typename_20std_3a_3aenable_5fif_3c_20is_5f8bit_3c_20t_20_3e_3a_3avalue_20_3e_3a_3atype_20_3e_18',['PackingTraits< T, std::int16_t, inst_set_t::avx2, typename std::enable_if< is_8bit< T >::value >::type >',['../struct_packing_traits_3_01_t_00_01std_1_1int16__t_00_01inst__set__t_1_1avx2_00_01typename_01std_858291a64a7808d94f01c15180f04f2c.html',1,'']]], - ['packingtraits_3c_20t_2c_20std_3a_3aint16_5ft_2c_20inst_5fset_5ft_3a_3aavx512_2c_20typename_20std_3a_3aenable_5fif_3c_20is_5f8bit_3c_20t_20_3e_3a_3avalue_20_3e_3a_3atype_20_3e_19',['PackingTraits< T, std::int16_t, inst_set_t::avx512, typename std::enable_if< is_8bit< T >::value >::type >',['../struct_packing_traits_3_01_t_00_01std_1_1int16__t_00_01inst__set__t_1_1avx512_00_01typename_01sta3c205cd2e965b8e751c31d57cbb32f1.html',1,'']]], - ['packingtraits_3c_20t_2c_20std_3a_3aint16_5ft_2c_20inst_5fset_5ft_3a_3aavx512_5fymm_2c_20typename_20std_3a_3aenable_5fif_3c_20is_5f8bit_3c_20t_20_3e_3a_3avalue_20_3e_3a_3atype_20_3e_20',['PackingTraits< T, std::int16_t, inst_set_t::avx512_ymm, typename std::enable_if< is_8bit< T >::value >::type >',['../struct_packing_traits_3_01_t_00_01std_1_1int16__t_00_01inst__set__t_1_1avx512__ymm_00_01typenamea22ccba6542408684108d40af5374bf6.html',1,'']]], - ['packingtraits_3c_20t_2c_20std_3a_3aint32_5ft_2c_20inst_5fset_5ft_3a_3aavx2_2c_20typename_20std_3a_3aenable_5fif_3c_20is_5f8bit_3c_20t_20_3e_3a_3avalue_20_3e_3a_3atype_20_3e_21',['PackingTraits< T, std::int32_t, inst_set_t::avx2, typename std::enable_if< is_8bit< T >::value >::type >',['../struct_packing_traits_3_01_t_00_01std_1_1int32__t_00_01inst__set__t_1_1avx2_00_01typename_01std_fe37d46c6e9c6ab5afbe4d3665c382fb.html',1,'']]], - ['packingtraits_3c_20t_2c_20std_3a_3aint32_5ft_2c_20inst_5fset_5ft_3a_3aavx512_2c_20typename_20std_3a_3aenable_5fif_3c_20is_5f8bit_3c_20t_20_3e_3a_3avalue_20_3e_3a_3atype_20_3e_22',['PackingTraits< T, std::int32_t, inst_set_t::avx512, typename std::enable_if< is_8bit< T >::value >::type >',['../struct_packing_traits_3_01_t_00_01std_1_1int32__t_00_01inst__set__t_1_1avx512_00_01typename_01st563fe14c40d9d54cf9fe6113c26e66c0.html',1,'']]], - ['packingtraits_3c_20t_2c_20std_3a_3aint32_5ft_2c_20inst_5fset_5ft_3a_3aavx512_5fymm_2c_20typename_20std_3a_3aenable_5fif_3c_20is_5f8bit_3c_20t_20_3e_3a_3avalue_20_3e_3a_3atype_20_3e_23',['PackingTraits< T, std::int32_t, inst_set_t::avx512_ymm, typename std::enable_if< is_8bit< T >::value >::type >',['../struct_packing_traits_3_01_t_00_01std_1_1int32__t_00_01inst__set__t_1_1avx512__ymm_00_01typename23c91419ea08f5673443445db549693f.html',1,'']]], - ['packmatrix_24',['PackMatrix',['../classfbgemm_1_1_pack_matrix.html',1,'PackMatrix< PT, inpType, accType >'],['../classfbgemm_1_1_pack_matrix.html#ac15276b97315df2567c4ab36d48b8da0',1,'fbgemm::PackMatrix::PackMatrix()']]], - ['packmatrix_3c_20fbgemm_3a_3apackbmatrix_3c_20int8_5ft_2c_20typename_20packingamatrix_3a_3aacctype_20_3e_2c_20int8_5ft_2c_20typename_20packingamatrix_3a_3aacctype_20_3e_25',['PackMatrix< fbgemm::PackBMatrix< int8_t, typename packingAMatrix::accType >, int8_t, typename packingAMatrix::accType >',['../classfbgemm_1_1_pack_matrix.html',1,'fbgemm']]], - ['packmatrix_3c_20packamatrix_3c_20t_2c_20std_3a_3aint32_5ft_20_3e_2c_20t_2c_20std_3a_3aint32_5ft_20_3e_26',['PackMatrix< PackAMatrix< T, std::int32_t >, T, std::int32_t >',['../classfbgemm_1_1_pack_matrix.html',1,'fbgemm']]], - ['packmatrix_3c_20packawithim2col_3c_20t_2c_20std_3a_3aint32_5ft_2c_202_20_3e_2c_20t_2c_20std_3a_3aint32_5ft_20_3e_27',['PackMatrix< PackAWithIm2Col< T, std::int32_t, 2 >, T, std::int32_t >',['../classfbgemm_1_1_pack_matrix.html',1,'fbgemm']]], - ['packmatrix_3c_20packawithquantrowoffset_3c_20t_2c_20std_3a_3aint32_5ft_20_3e_2c_20t_2c_20std_3a_3aint32_5ft_20_3e_28',['PackMatrix< PackAWithQuantRowOffset< T, std::int32_t >, T, std::int32_t >',['../classfbgemm_1_1_pack_matrix.html',1,'fbgemm']]], - ['packmatrix_3c_20packawithrowoffset_3c_20t_2c_20std_3a_3aint32_5ft_20_3e_2c_20t_2c_20std_3a_3aint32_5ft_20_3e_29',['PackMatrix< PackAWithRowOffset< T, std::int32_t >, T, std::int32_t >',['../classfbgemm_1_1_pack_matrix.html',1,'fbgemm']]], - ['packmatrix_3c_20packbmatrix_3c_20std_3a_3aint8_5ft_2c_20std_3a_3aint32_5ft_20_3e_2c_20std_3a_3aint8_5ft_2c_20std_3a_3aint32_5ft_20_3e_30',['PackMatrix< PackBMatrix< std::int8_t, std::int32_t >, std::int8_t, std::int32_t >',['../classfbgemm_1_1_pack_matrix.html',1,'fbgemm']]], - ['packmatrix_3c_20packbmatrix_3c_20t_2c_20std_3a_3aint32_5ft_20_3e_2c_20t_2c_20std_3a_3aint32_5ft_20_3e_31',['PackMatrix< PackBMatrix< T, std::int32_t >, T, std::int32_t >',['../classfbgemm_1_1_pack_matrix.html',1,'fbgemm']]], - ['packmatrix_3c_20packingamatrix_2c_20typename_20packingamatrix_3a_3ainptype_2c_20typename_20packingamatrix_3a_3aacctype_20_3e_32',['PackMatrix< packingAMatrix, typename packingAMatrix::inpType, typename packingAMatrix::accType >',['../classfbgemm_1_1_pack_matrix.html',1,'fbgemm']]], - ['packmatrix_3c_20packingamatrix_2c_20uint8_5ft_2c_20typename_20packingamatrix_3a_3aacctype_20_3e_33',['PackMatrix< packingAMatrix, uint8_t, typename packingAMatrix::accType >',['../classfbgemm_1_1_pack_matrix.html',1,'fbgemm']]], - ['packmatrix_3c_20packingbmatrix_2c_20typename_20packingbmatrix_3a_3ainptype_2c_20typename_20packingbmatrix_3a_3aacctype_20_3e_34',['PackMatrix< packingBMatrix, typename packingBMatrix::inpType, typename packingBMatrix::accType >',['../classfbgemm_1_1_pack_matrix.html',1,'fbgemm']]], - ['packweightmatrixforgconv_35',['PackWeightMatrixForGConv',['../classfbgemm_1_1_pack_weight_matrix_for_g_conv.html',1,'PackWeightMatrixForGConv< T, accT, SPATIAL_DIM >'],['../classfbgemm_1_1_pack_weight_matrix_for_g_conv.html#ac4aac545b455c64f161fc78ac724d3e3',1,'fbgemm::PackWeightMatrixForGConv::PackWeightMatrixForGConv()']]], - ['packweightmatrixforgconv_3c_20std_3a_3aint8_5ft_2c_20std_3a_3aint32_5ft_2c_202_20_3e_36',['PackWeightMatrixForGConv< std::int8_t, std::int32_t, 2 >',['../classfbgemm_1_1_pack_weight_matrix_for_g_conv.html',1,'fbgemm']]], - ['packweightsforconv_37',['PackWeightsForConv',['../classfbgemm_1_1_pack_weights_for_conv.html',1,'fbgemm']]], - ['permute_20pooled_20embeddings_20operators_20cpu_38',['Permute Pooled Embeddings Operators (CPU)',['../group__permute-pooled-embs-cpu.html',1,'']]], - ['permute_20pooled_20embeddings_20operators_20cuda_39',['Permute Pooled Embeddings Operators (CUDA)',['../group__permute-pooled-embs-gpu.html',1,'']]], - ['pooled_20embeddings_20operators_20cpu_40',['Permute Pooled Embeddings Operators (CPU)',['../group__permute-pooled-embs-cpu.html',1,'']]], - ['pooled_20embeddings_20operators_20cuda_41',['Permute Pooled Embeddings Operators (CUDA)',['../group__permute-pooled-embs-gpu.html',1,'']]], - ['printpackedmatrix_42',['printPackedMatrix',['../classfbgemm_1_1_pack_matrix.html#a6efc512f013eacc053ed29b2dee722d5',1,'fbgemm::PackMatrix::printPackedMatrix()'],['../classfbgemm_1_1_pack_a_matrix.html#a6efc512f013eacc053ed29b2dee722d5',1,'fbgemm::PackAMatrix::printPackedMatrix()'],['../classfbgemm_1_1_pack_b_matrix.html#ab19db6d7505e9ed131b2a101f90d5093',1,'fbgemm::PackBMatrix::printPackedMatrix()'],['../classfbgemm_1_1_pack_a_with_im2_col.html#a6efc512f013eacc053ed29b2dee722d5',1,'fbgemm::PackAWithIm2Col::printPackedMatrix()'],['../classfbgemm_1_1_pack_a_with_row_offset.html#a6efc512f013eacc053ed29b2dee722d5',1,'fbgemm::PackAWithRowOffset::printPackedMatrix()'],['../classfbgemm_1_1_pack_a_with_quant_row_offset.html#a6efc512f013eacc053ed29b2dee722d5',1,'fbgemm::PackAWithQuantRowOffset::printPackedMatrix()']]] + ['packa_1',['PackA',['../namespacefbgemm.html#a28c69d65ed666a9f46dc4763be70cdf6',1,'fbgemm']]], + ['packamatrix_2',['PackAMatrix',['../classfbgemm_1_1_pack_a_matrix.html',1,'fbgemm']]], + ['packawithim2col_3',['PackAWithIm2Col',['../classfbgemm_1_1_pack_a_with_im2_col.html',1,'PackAWithIm2Col< T, accT, SPATIAL_DIM >'],['../classfbgemm_1_1_pack_a_with_im2_col.html#a37d96dcba66f792135549702d2f25e4a',1,'fbgemm::PackAWithIm2Col::PackAWithIm2Col()']]], + ['packawithquantrowoffset_4',['PackAWithQuantRowOffset',['../classfbgemm_1_1_pack_a_with_quant_row_offset.html',1,'PackAWithQuantRowOffset< T, accT >'],['../classfbgemm_1_1_pack_a_with_quant_row_offset.html#a44ca398424d2d534802de6b892bf3a6a',1,'fbgemm::PackAWithQuantRowOffset::PackAWithQuantRowOffset()']]], + ['packawithrowoffset_5',['PackAWithRowOffset',['../classfbgemm_1_1_pack_a_with_row_offset.html',1,'PackAWithRowOffset< T, accT >'],['../classfbgemm_1_1_pack_a_with_row_offset.html#a8dceb15ed761dfbf804244ffd2fc7f66',1,'fbgemm::PackAWithRowOffset::PackAWithRowOffset()']]], + ['packbmatrix_6',['PackBMatrix',['../classfbgemm_1_1_pack_b_matrix.html',1,'PackBMatrix< T, accT >'],['../classfbgemm_1_1_pack_b_matrix.html#a1afee702206695dfcd20de0474408b07',1,'fbgemm::PackBMatrix::PackBMatrix()']]], + ['packbmatrix_3c_20std_3a_3aint8_5ft_2c_20std_3a_3aint32_5ft_20_3e_7',['PackBMatrix< std::int8_t, std::int32_t >',['../classfbgemm_1_1_pack_b_matrix.html',1,'fbgemm']]], + ['packedblock_8',['packedBlock',['../classfbgemm_1_1_pack_matrix.html#a9c6a626fc1b0a20479c167862d7a91be',1,'fbgemm::PackMatrix']]], + ['packedbuffersize_9',['packedBufferSize',['../classfbgemm_1_1_pack_matrix.html#ab11bd74e390ac73323a514cf2d6e6b98',1,'fbgemm::PackMatrix']]], + ['packedcolstart_10',['packedColStart',['../classfbgemm_1_1_pack_matrix.html#aa981736a44501513eb4c0f8cb72a11c8',1,'fbgemm::PackMatrix']]], + ['packedgemmmatrixb_11',['PackedGemmMatrixB',['../classfbgemm_1_1_packed_gemm_matrix_b.html',1,'fbgemm']]], + ['packedrowstart_12',['packedRowStart',['../classfbgemm_1_1_pack_matrix.html#ae9e47d9b93f5049504203ff55472e075',1,'fbgemm::PackMatrix']]], + ['packingtraits_13',['PackingTraits',['../structfbgemm_1_1_packing_traits.html',1,'fbgemm']]], + ['packingtraits_3c_20float_2c_20float_2c_20inst_5fset_5ft_3a_3aavx2_20_3e_14',['PackingTraits< float, float, inst_set_t::avx2 >',['../struct_packing_traits_3_01float_00_01float_00_01inst__set__t_1_1avx2_01_4.html',1,'']]], + ['packingtraits_3c_20float16_2c_20float_2c_20inst_5fset_5ft_3a_3aavx2_20_3e_15',['PackingTraits< float16, float, inst_set_t::avx2 >',['../struct_packing_traits_3_01float16_00_01float_00_01inst__set__t_1_1avx2_01_4.html',1,'']]], + ['packingtraits_3c_20int64_5ft_2c_20int64_5ft_2c_20inst_5fset_5ft_3a_3aavx512_20_3e_16',['PackingTraits< int64_t, int64_t, inst_set_t::avx512 >',['../struct_packing_traits_3_01int64__t_00_01int64__t_00_01inst__set__t_1_1avx512_01_4.html',1,'']]], + ['packingtraits_3c_20t_2c_20acct_2c_20inst_5fset_5ft_3a_3aavx512_5fvnni_2c_20typename_20std_3a_3aenable_5fif_3c_20is_5f8bit_3c_20t_20_3e_3a_3avalue_20_26_26is_5f16or32bit_3c_20acct_20_3e_3a_3avalue_20_3e_3a_3atype_20_3e_17',['PackingTraits< T, accT, inst_set_t::avx512_vnni, typename std::enable_if< is_8bit< T >::value &&is_16or32bit< accT >::value >::type >',['../struct_packing_traits_3_01_t_00_01acc_t_00_01inst__set__t_1_1avx512__vnni_00_01typename_01std_1_495ec4d719e603d9e79f7a55acd55e37.html',1,'']]], + ['packingtraits_3c_20t_2c_20acct_2c_20inst_5fset_5ft_3a_3aavx512_5fvnni_5fymm_2c_20typename_20std_3a_3aenable_5fif_3c_20is_5f8bit_3c_20t_20_3e_3a_3avalue_20_26_26is_5f16or32bit_3c_20acct_20_3e_3a_3avalue_20_3e_3a_3atype_20_3e_18',['PackingTraits< T, accT, inst_set_t::avx512_vnni_ymm, typename std::enable_if< is_8bit< T >::value &&is_16or32bit< accT >::value >::type >',['../struct_packing_traits_3_01_t_00_01acc_t_00_01inst__set__t_1_1avx512__vnni__ymm_00_01typename_01s0dc87ea23078ad687de8b8ea67c6d3f3.html',1,'']]], + ['packingtraits_3c_20t_2c_20std_3a_3aint16_5ft_2c_20inst_5fset_5ft_3a_3aavx2_2c_20typename_20std_3a_3aenable_5fif_3c_20is_5f8bit_3c_20t_20_3e_3a_3avalue_20_3e_3a_3atype_20_3e_19',['PackingTraits< T, std::int16_t, inst_set_t::avx2, typename std::enable_if< is_8bit< T >::value >::type >',['../struct_packing_traits_3_01_t_00_01std_1_1int16__t_00_01inst__set__t_1_1avx2_00_01typename_01std_858291a64a7808d94f01c15180f04f2c.html',1,'']]], + ['packingtraits_3c_20t_2c_20std_3a_3aint16_5ft_2c_20inst_5fset_5ft_3a_3aavx512_2c_20typename_20std_3a_3aenable_5fif_3c_20is_5f8bit_3c_20t_20_3e_3a_3avalue_20_3e_3a_3atype_20_3e_20',['PackingTraits< T, std::int16_t, inst_set_t::avx512, typename std::enable_if< is_8bit< T >::value >::type >',['../struct_packing_traits_3_01_t_00_01std_1_1int16__t_00_01inst__set__t_1_1avx512_00_01typename_01sta3c205cd2e965b8e751c31d57cbb32f1.html',1,'']]], + ['packingtraits_3c_20t_2c_20std_3a_3aint16_5ft_2c_20inst_5fset_5ft_3a_3aavx512_5fymm_2c_20typename_20std_3a_3aenable_5fif_3c_20is_5f8bit_3c_20t_20_3e_3a_3avalue_20_3e_3a_3atype_20_3e_21',['PackingTraits< T, std::int16_t, inst_set_t::avx512_ymm, typename std::enable_if< is_8bit< T >::value >::type >',['../struct_packing_traits_3_01_t_00_01std_1_1int16__t_00_01inst__set__t_1_1avx512__ymm_00_01typenamea22ccba6542408684108d40af5374bf6.html',1,'']]], + ['packingtraits_3c_20t_2c_20std_3a_3aint32_5ft_2c_20inst_5fset_5ft_3a_3aavx2_2c_20typename_20std_3a_3aenable_5fif_3c_20is_5f8bit_3c_20t_20_3e_3a_3avalue_20_3e_3a_3atype_20_3e_22',['PackingTraits< T, std::int32_t, inst_set_t::avx2, typename std::enable_if< is_8bit< T >::value >::type >',['../struct_packing_traits_3_01_t_00_01std_1_1int32__t_00_01inst__set__t_1_1avx2_00_01typename_01std_fe37d46c6e9c6ab5afbe4d3665c382fb.html',1,'']]], + ['packingtraits_3c_20t_2c_20std_3a_3aint32_5ft_2c_20inst_5fset_5ft_3a_3aavx512_2c_20typename_20std_3a_3aenable_5fif_3c_20is_5f8bit_3c_20t_20_3e_3a_3avalue_20_3e_3a_3atype_20_3e_23',['PackingTraits< T, std::int32_t, inst_set_t::avx512, typename std::enable_if< is_8bit< T >::value >::type >',['../struct_packing_traits_3_01_t_00_01std_1_1int32__t_00_01inst__set__t_1_1avx512_00_01typename_01st563fe14c40d9d54cf9fe6113c26e66c0.html',1,'']]], + ['packingtraits_3c_20t_2c_20std_3a_3aint32_5ft_2c_20inst_5fset_5ft_3a_3aavx512_5fymm_2c_20typename_20std_3a_3aenable_5fif_3c_20is_5f8bit_3c_20t_20_3e_3a_3avalue_20_3e_3a_3atype_20_3e_24',['PackingTraits< T, std::int32_t, inst_set_t::avx512_ymm, typename std::enable_if< is_8bit< T >::value >::type >',['../struct_packing_traits_3_01_t_00_01std_1_1int32__t_00_01inst__set__t_1_1avx512__ymm_00_01typename23c91419ea08f5673443445db549693f.html',1,'']]], + ['packmatrix_25',['PackMatrix',['../classfbgemm_1_1_pack_matrix.html',1,'PackMatrix< PT, inpType, accType >'],['../classfbgemm_1_1_pack_matrix.html#ac15276b97315df2567c4ab36d48b8da0',1,'fbgemm::PackMatrix::PackMatrix()']]], + ['packmatrix_3c_20fbgemm_3a_3apackbmatrix_3c_20int8_5ft_2c_20typename_20packingamatrix_3a_3aacctype_20_3e_2c_20int8_5ft_2c_20typename_20packingamatrix_3a_3aacctype_20_3e_26',['PackMatrix< fbgemm::PackBMatrix< int8_t, typename packingAMatrix::accType >, int8_t, typename packingAMatrix::accType >',['../classfbgemm_1_1_pack_matrix.html',1,'fbgemm']]], + ['packmatrix_3c_20packamatrix_3c_20t_2c_20std_3a_3aint32_5ft_20_3e_2c_20t_2c_20std_3a_3aint32_5ft_20_3e_27',['PackMatrix< PackAMatrix< T, std::int32_t >, T, std::int32_t >',['../classfbgemm_1_1_pack_matrix.html',1,'fbgemm']]], + ['packmatrix_3c_20packawithim2col_3c_20t_2c_20std_3a_3aint32_5ft_2c_202_20_3e_2c_20t_2c_20std_3a_3aint32_5ft_20_3e_28',['PackMatrix< PackAWithIm2Col< T, std::int32_t, 2 >, T, std::int32_t >',['../classfbgemm_1_1_pack_matrix.html',1,'fbgemm']]], + ['packmatrix_3c_20packawithquantrowoffset_3c_20t_2c_20std_3a_3aint32_5ft_20_3e_2c_20t_2c_20std_3a_3aint32_5ft_20_3e_29',['PackMatrix< PackAWithQuantRowOffset< T, std::int32_t >, T, std::int32_t >',['../classfbgemm_1_1_pack_matrix.html',1,'fbgemm']]], + ['packmatrix_3c_20packawithrowoffset_3c_20t_2c_20std_3a_3aint32_5ft_20_3e_2c_20t_2c_20std_3a_3aint32_5ft_20_3e_30',['PackMatrix< PackAWithRowOffset< T, std::int32_t >, T, std::int32_t >',['../classfbgemm_1_1_pack_matrix.html',1,'fbgemm']]], + ['packmatrix_3c_20packbmatrix_3c_20std_3a_3aint8_5ft_2c_20std_3a_3aint32_5ft_20_3e_2c_20std_3a_3aint8_5ft_2c_20std_3a_3aint32_5ft_20_3e_31',['PackMatrix< PackBMatrix< std::int8_t, std::int32_t >, std::int8_t, std::int32_t >',['../classfbgemm_1_1_pack_matrix.html',1,'fbgemm']]], + ['packmatrix_3c_20packbmatrix_3c_20t_2c_20std_3a_3aint32_5ft_20_3e_2c_20t_2c_20std_3a_3aint32_5ft_20_3e_32',['PackMatrix< PackBMatrix< T, std::int32_t >, T, std::int32_t >',['../classfbgemm_1_1_pack_matrix.html',1,'fbgemm']]], + ['packmatrix_3c_20packingamatrix_2c_20typename_20packingamatrix_3a_3ainptype_2c_20typename_20packingamatrix_3a_3aacctype_20_3e_33',['PackMatrix< packingAMatrix, typename packingAMatrix::inpType, typename packingAMatrix::accType >',['../classfbgemm_1_1_pack_matrix.html',1,'fbgemm']]], + ['packmatrix_3c_20packingamatrix_2c_20uint8_5ft_2c_20typename_20packingamatrix_3a_3aacctype_20_3e_34',['PackMatrix< packingAMatrix, uint8_t, typename packingAMatrix::accType >',['../classfbgemm_1_1_pack_matrix.html',1,'fbgemm']]], + ['packmatrix_3c_20packingbmatrix_2c_20typename_20packingbmatrix_3a_3ainptype_2c_20typename_20packingbmatrix_3a_3aacctype_20_3e_35',['PackMatrix< packingBMatrix, typename packingBMatrix::inpType, typename packingBMatrix::accType >',['../classfbgemm_1_1_pack_matrix.html',1,'fbgemm']]], + ['packweightmatrixforgconv_36',['PackWeightMatrixForGConv',['../classfbgemm_1_1_pack_weight_matrix_for_g_conv.html',1,'PackWeightMatrixForGConv< T, accT, SPATIAL_DIM >'],['../classfbgemm_1_1_pack_weight_matrix_for_g_conv.html#ac4aac545b455c64f161fc78ac724d3e3',1,'fbgemm::PackWeightMatrixForGConv::PackWeightMatrixForGConv()']]], + ['packweightmatrixforgconv_3c_20std_3a_3aint8_5ft_2c_20std_3a_3aint32_5ft_2c_202_20_3e_37',['PackWeightMatrixForGConv< std::int8_t, std::int32_t, 2 >',['../classfbgemm_1_1_pack_weight_matrix_for_g_conv.html',1,'fbgemm']]], + ['packweightsforconv_38',['PackWeightsForConv',['../classfbgemm_1_1_pack_weights_for_conv.html',1,'fbgemm']]], + ['permute_20pooled_20embeddings_20operators_20cpu_39',['Permute Pooled Embeddings Operators (CPU)',['../group__permute-pooled-embs-cpu.html',1,'']]], + ['permute_20pooled_20embeddings_20operators_20cuda_40',['Permute Pooled Embeddings Operators (CUDA)',['../group__permute-pooled-embs-gpu.html',1,'']]], + ['pooled_20embeddings_20operators_20cpu_41',['Permute Pooled Embeddings Operators (CPU)',['../group__permute-pooled-embs-cpu.html',1,'']]], + ['pooled_20embeddings_20operators_20cuda_42',['Permute Pooled Embeddings Operators (CUDA)',['../group__permute-pooled-embs-gpu.html',1,'']]], + ['printmatrix_43',['printMatrix',['../namespacefbgemm.html#adfee356e154f8b2f88c725885b1dbc11',1,'fbgemm']]], + ['printpackedmatrix_44',['printPackedMatrix',['../classfbgemm_1_1_pack_matrix.html#a6efc512f013eacc053ed29b2dee722d5',1,'fbgemm::PackMatrix::printPackedMatrix()'],['../classfbgemm_1_1_pack_a_matrix.html#a6efc512f013eacc053ed29b2dee722d5',1,'fbgemm::PackAMatrix::printPackedMatrix()'],['../classfbgemm_1_1_pack_b_matrix.html#ab19db6d7505e9ed131b2a101f90d5093',1,'fbgemm::PackBMatrix::printPackedMatrix()'],['../classfbgemm_1_1_pack_a_with_im2_col.html#a6efc512f013eacc053ed29b2dee722d5',1,'fbgemm::PackAWithIm2Col::printPackedMatrix()'],['../classfbgemm_1_1_pack_a_with_row_offset.html#a6efc512f013eacc053ed29b2dee722d5',1,'fbgemm::PackAWithRowOffset::printPackedMatrix()'],['../classfbgemm_1_1_pack_a_with_quant_row_offset.html#a6efc512f013eacc053ed29b2dee722d5',1,'fbgemm::PackAWithQuantRowOffset::printPackedMatrix()']]] ]; diff --git a/search/all_11.js b/search/all_11.js index 5fdc303d4..b866da46f 100644 --- a/search/all_11.js +++ b/search/all_11.js @@ -4,6 +4,7 @@ var searchData= ['quantization_20utilities_20avx2_1',['Quantization Utilities (AVX2)',['../group__fbgemm-quant-utils-avx2.html',1,'']]], ['quantization_20utilities_20avx512_2',['Quantization Utilities (AVX512)',['../group__fbgemm-quant-utils-avx512.html',1,'']]], ['quantization_20utilities_20generic_3',['Quantization Utilities (Generic)',['../group__fbgemm-quant-utils-generic.html',1,'']]], - ['quantize_20data_20cpu_20operators_4',['Quantize Data CPU Operators',['../group__quantize-data-cpu.html',1,'']]], - ['quantizegroupwise_5',['QuantizeGroupwise',['../group__fbgemm-quant-utils-generic.html#ga7a5705b5705425abc8f72fe339c2ae91',1,'fbgemm']]] + ['quantize_4',['Quantize',['../namespacefbgemm.html#a3350c03dc2d62e8e434332d088f6a895',1,'fbgemm']]], + ['quantize_20data_20cpu_20operators_5',['Quantize Data CPU Operators',['../group__quantize-data-cpu.html',1,'']]], + ['quantizegroupwise_6',['QuantizeGroupwise',['../group__fbgemm-quant-utils-generic.html#ga7a5705b5705425abc8f72fe339c2ae91',1,'fbgemm']]] ]; diff --git a/search/all_12.js b/search/all_12.js index f507e5586..07dce4ac5 100644 --- a/search/all_12.js +++ b/search/all_12.js @@ -1,14 +1,21 @@ var searchData= [ - ['real_5fmultiplier_0',['real_multiplier',['../structfbgemm_1_1_requantization_params.html#ad0e106dd418b27919550747b4bdd2e83',1,'fbgemm::RequantizationParams']]], - ['reluoutput_1',['ReluOutput',['../classfbgemm_1_1_relu_output.html',1,'fbgemm']]], - ['requantizationforfloatparams_5ft_2',['requantizationForFloatParams_t',['../structfbgemm_1_1requantization_for_float_params__t.html',1,'fbgemm']]], - ['requantizationparams_3',['RequantizationParams',['../structfbgemm_1_1_requantization_params.html',1,'fbgemm']]], - ['requantizationparams_5ft_4',['requantizationParams_t',['../structfbgemm_1_1requantization_params__t.html',1,'fbgemm']]], - ['requantizeforfloat_5',['ReQuantizeForFloat',['../classfbgemm_1_1_re_quantize_for_float.html',1,'ReQuantizeForFloat< FUSE_RELU, Q_GRAN, outT, inT, nextOPType >'],['../classfbgemm_1_1_re_quantize_for_float.html#a7ac7e62127705921ee912811a72697c6',1,'fbgemm::ReQuantizeForFloat::ReQuantizeForFloat()']]], - ['requantizeoutput_6',['ReQuantizeOutput',['../classfbgemm_1_1_re_quantize_output.html',1,'ReQuantizeOutput< FUSE_RELU, Q_GRAN, BIAS_TYPE, outT, inT, nextOPType >'],['../classfbgemm_1_1_re_quantize_output.html#ab36806f951ba9ce3733448d78633de16',1,'fbgemm::ReQuantizeOutput::ReQuantizeOutput()']]], - ['requantizeoutputprocessingavx2_7',['requantizeOutputProcessingAvx2',['../group__fbgemm-quant-utils-avx2.html#ga92e2b96889b039f101e24855e163021b',1,'fbgemm']]], - ['requantizeoutputprocessinggconvavx512_8',['requantizeOutputProcessingGConvAvx512',['../group__fbgemm-quant-utils-avx512.html#gab1b2ed3537f97d130f8ed039bc9aa463',1,'fbgemm']]], - ['row_5finterleave_9',['ROW_INTERLEAVE',['../struct_packing_traits_3_01_t_00_01std_1_1int32__t_00_01inst__set__t_1_1avx2_00_01typename_01std_fe37d46c6e9c6ab5afbe4d3665c382fb.html#a7b230df4f85b2d8c182b0da1d27b64a0',1,'PackingTraits< T, std::int32_t, inst_set_t::avx2, typename std::enable_if< is_8bit< T >::value >::type >::ROW_INTERLEAVE'],['../struct_packing_traits_3_01_t_00_01std_1_1int16__t_00_01inst__set__t_1_1avx2_00_01typename_01std_858291a64a7808d94f01c15180f04f2c.html#a7b230df4f85b2d8c182b0da1d27b64a0',1,'PackingTraits< T, std::int16_t, inst_set_t::avx2, typename std::enable_if< is_8bit< T >::value >::type >::ROW_INTERLEAVE'],['../struct_packing_traits_3_01float_00_01float_00_01inst__set__t_1_1avx2_01_4.html#a7b230df4f85b2d8c182b0da1d27b64a0',1,'PackingTraits< float, float, inst_set_t::avx2 >::ROW_INTERLEAVE'],['../struct_packing_traits_3_01_t_00_01std_1_1int32__t_00_01inst__set__t_1_1avx512_00_01typename_01st563fe14c40d9d54cf9fe6113c26e66c0.html#a7b230df4f85b2d8c182b0da1d27b64a0',1,'PackingTraits< T, std::int32_t, inst_set_t::avx512, typename std::enable_if< is_8bit< T >::value >::type >::ROW_INTERLEAVE'],['../struct_packing_traits_3_01_t_00_01std_1_1int32__t_00_01inst__set__t_1_1avx512__ymm_00_01typename23c91419ea08f5673443445db549693f.html#a7b230df4f85b2d8c182b0da1d27b64a0',1,'PackingTraits< T, std::int32_t, inst_set_t::avx512_ymm, typename std::enable_if< is_8bit< T >::value >::type >::ROW_INTERLEAVE'],['../struct_packing_traits_3_01_t_00_01std_1_1int16__t_00_01inst__set__t_1_1avx512_00_01typename_01sta3c205cd2e965b8e751c31d57cbb32f1.html#a7b230df4f85b2d8c182b0da1d27b64a0',1,'PackingTraits< T, std::int16_t, inst_set_t::avx512, typename std::enable_if< is_8bit< T >::value >::type >::ROW_INTERLEAVE'],['../struct_packing_traits_3_01_t_00_01std_1_1int16__t_00_01inst__set__t_1_1avx512__ymm_00_01typenamea22ccba6542408684108d40af5374bf6.html#a7b230df4f85b2d8c182b0da1d27b64a0',1,'PackingTraits< T, std::int16_t, inst_set_t::avx512_ymm, typename std::enable_if< is_8bit< T >::value >::type >::ROW_INTERLEAVE'],['../struct_packing_traits_3_01_t_00_01acc_t_00_01inst__set__t_1_1avx512__vnni_00_01typename_01std_1_495ec4d719e603d9e79f7a55acd55e37.html#a7b230df4f85b2d8c182b0da1d27b64a0',1,'PackingTraits< T, accT, inst_set_t::avx512_vnni, typename std::enable_if< is_8bit< T >::value &&is_16or32bit< accT >::value >::type >::ROW_INTERLEAVE'],['../struct_packing_traits_3_01_t_00_01acc_t_00_01inst__set__t_1_1avx512__vnni__ymm_00_01typename_01s0dc87ea23078ad687de8b8ea67c6d3f3.html#a7b230df4f85b2d8c182b0da1d27b64a0',1,'PackingTraits< T, accT, inst_set_t::avx512_vnni_ymm, typename std::enable_if< is_8bit< T >::value &&is_16or32bit< accT >::value >::type >::ROW_INTERLEAVE']]], - ['rowoffsetbuffersize_10',['rowOffsetBufferSize',['../classfbgemm_1_1_pack_a_with_im2_col.html#aa77e3ad795b908ab26006e954e19fa19',1,'fbgemm::PackAWithIm2Col::rowOffsetBufferSize()'],['../classfbgemm_1_1_pack_a_with_row_offset.html#aa77e3ad795b908ab26006e954e19fa19',1,'fbgemm::PackAWithRowOffset::rowOffsetBufferSize()'],['../classfbgemm_1_1_pack_a_with_quant_row_offset.html#aa77e3ad795b908ab26006e954e19fa19',1,'fbgemm::PackAWithQuantRowOffset::rowOffsetBufferSize()']]] + ['radix_5fsort_5fparallel_0',['radix_sort_parallel',['../namespacefbgemm.html#a3d34903420acd55fcebd0f8a19d8a84d',1,'fbgemm']]], + ['real_5fmultiplier_1',['real_multiplier',['../structfbgemm_1_1_requantization_params.html#ad0e106dd418b27919550747b4bdd2e83',1,'fbgemm::RequantizationParams']]], + ['reduceavx2_2',['reduceAvx2',['../namespacefbgemm.html#ac2d8c325cbc2893ed9a32f71c6a3596b',1,'fbgemm']]], + ['reluoutput_3',['ReluOutput',['../classfbgemm_1_1_relu_output.html',1,'fbgemm']]], + ['requantizationforfloatparams_5ft_4',['requantizationForFloatParams_t',['../structfbgemm_1_1requantization_for_float_params__t.html',1,'fbgemm']]], + ['requantizationparams_5',['RequantizationParams',['../structfbgemm_1_1_requantization_params.html',1,'fbgemm']]], + ['requantizationparams_5ft_6',['requantizationParams_t',['../structfbgemm_1_1requantization_params__t.html',1,'fbgemm']]], + ['requantize_5fu8acc32_5fref_7',['requantize_u8acc32_ref',['../namespacefbgemm.html#ab87b23be5587e267db6ab7d2b97c6915',1,'fbgemm::requantize_u8acc32_ref(int M, int N, int ld, const std::int32_t *inp, std::uint8_t *out, std::int32_t C_multiplier, std::int32_t C_right_shift, std::int32_t C_zero_point, std::int32_t A_zero_point, std::int32_t B_zero_point, const std::int32_t *row_offsets, const std::int32_t *col_offsets, const std::int32_t *bias, bool fuse_relu=false)'],['../namespacefbgemm.html#a798fea9136d48e1cd4c8a2926fb869ed',1,'fbgemm::requantize_u8acc32_ref(int M, int N, int ld, const std::int32_t *inp, std::uint8_t *out, const float *C_multiplier, std::int32_t C_zero_point, std::int32_t A_zero_point, const std::int32_t *B_zero_point, const std::int32_t *row_offsets, const std::int32_t *col_offsets, const std::int32_t *bias, int ncols_per_quant_group, bool fuse_relu=false)']]], + ['requantizeforfloat_8',['ReQuantizeForFloat',['../classfbgemm_1_1_re_quantize_for_float.html',1,'ReQuantizeForFloat< FUSE_RELU, Q_GRAN, outT, inT, nextOPType >'],['../classfbgemm_1_1_re_quantize_for_float.html#a7ac7e62127705921ee912811a72697c6',1,'fbgemm::ReQuantizeForFloat::ReQuantizeForFloat()']]], + ['requantizeoutput_9',['ReQuantizeOutput',['../classfbgemm_1_1_re_quantize_output.html',1,'ReQuantizeOutput< FUSE_RELU, Q_GRAN, BIAS_TYPE, outT, inT, nextOPType >'],['../classfbgemm_1_1_re_quantize_output.html#ab36806f951ba9ce3733448d78633de16',1,'fbgemm::ReQuantizeOutput::ReQuantizeOutput()']]], + ['requantizeoutputprocessingavx2_10',['requantizeOutputProcessingAvx2',['../group__fbgemm-quant-utils-avx2.html#ga92e2b96889b039f101e24855e163021b',1,'fbgemm']]], + ['requantizeoutputprocessinggconvavx512_11',['requantizeOutputProcessingGConvAvx512',['../group__fbgemm-quant-utils-avx512.html#gab1b2ed3537f97d130f8ed039bc9aa463',1,'fbgemm']]], + ['roundtofloat16_12',['RoundToFloat16',['../namespacefbgemm.html#a3bf47d3d99c8b3cb2af625d90c5494ab',1,'fbgemm']]], + ['row_5finterleave_13',['ROW_INTERLEAVE',['../struct_packing_traits_3_01_t_00_01std_1_1int32__t_00_01inst__set__t_1_1avx2_00_01typename_01std_fe37d46c6e9c6ab5afbe4d3665c382fb.html#a7b230df4f85b2d8c182b0da1d27b64a0',1,'PackingTraits< T, std::int32_t, inst_set_t::avx2, typename std::enable_if< is_8bit< T >::value >::type >::ROW_INTERLEAVE'],['../struct_packing_traits_3_01_t_00_01std_1_1int16__t_00_01inst__set__t_1_1avx2_00_01typename_01std_858291a64a7808d94f01c15180f04f2c.html#a7b230df4f85b2d8c182b0da1d27b64a0',1,'PackingTraits< T, std::int16_t, inst_set_t::avx2, typename std::enable_if< is_8bit< T >::value >::type >::ROW_INTERLEAVE'],['../struct_packing_traits_3_01float_00_01float_00_01inst__set__t_1_1avx2_01_4.html#a7b230df4f85b2d8c182b0da1d27b64a0',1,'PackingTraits< float, float, inst_set_t::avx2 >::ROW_INTERLEAVE'],['../struct_packing_traits_3_01_t_00_01std_1_1int32__t_00_01inst__set__t_1_1avx512_00_01typename_01st563fe14c40d9d54cf9fe6113c26e66c0.html#a7b230df4f85b2d8c182b0da1d27b64a0',1,'PackingTraits< T, std::int32_t, inst_set_t::avx512, typename std::enable_if< is_8bit< T >::value >::type >::ROW_INTERLEAVE'],['../struct_packing_traits_3_01_t_00_01std_1_1int32__t_00_01inst__set__t_1_1avx512__ymm_00_01typename23c91419ea08f5673443445db549693f.html#a7b230df4f85b2d8c182b0da1d27b64a0',1,'PackingTraits< T, std::int32_t, inst_set_t::avx512_ymm, typename std::enable_if< is_8bit< T >::value >::type >::ROW_INTERLEAVE'],['../struct_packing_traits_3_01_t_00_01std_1_1int16__t_00_01inst__set__t_1_1avx512_00_01typename_01sta3c205cd2e965b8e751c31d57cbb32f1.html#a7b230df4f85b2d8c182b0da1d27b64a0',1,'PackingTraits< T, std::int16_t, inst_set_t::avx512, typename std::enable_if< is_8bit< T >::value >::type >::ROW_INTERLEAVE'],['../struct_packing_traits_3_01_t_00_01std_1_1int16__t_00_01inst__set__t_1_1avx512__ymm_00_01typenamea22ccba6542408684108d40af5374bf6.html#a7b230df4f85b2d8c182b0da1d27b64a0',1,'PackingTraits< T, std::int16_t, inst_set_t::avx512_ymm, typename std::enable_if< is_8bit< T >::value >::type >::ROW_INTERLEAVE'],['../struct_packing_traits_3_01_t_00_01acc_t_00_01inst__set__t_1_1avx512__vnni_00_01typename_01std_1_495ec4d719e603d9e79f7a55acd55e37.html#a7b230df4f85b2d8c182b0da1d27b64a0',1,'PackingTraits< T, accT, inst_set_t::avx512_vnni, typename std::enable_if< is_8bit< T >::value &&is_16or32bit< accT >::value >::type >::ROW_INTERLEAVE'],['../struct_packing_traits_3_01_t_00_01acc_t_00_01inst__set__t_1_1avx512__vnni__ymm_00_01typename_01s0dc87ea23078ad687de8b8ea67c6d3f3.html#a7b230df4f85b2d8c182b0da1d27b64a0',1,'PackingTraits< T, accT, inst_set_t::avx512_vnni_ymm, typename std::enable_if< is_8bit< T >::value &&is_16or32bit< accT >::value >::type >::ROW_INTERLEAVE']]], + ['row_5foffsets_5fu8acc32_5fref_14',['row_offsets_u8acc32_ref',['../namespacefbgemm.html#a0a160cf468a51c4634688b4f43851324',1,'fbgemm']]], + ['rowoffsetbuffersize_15',['rowOffsetBufferSize',['../classfbgemm_1_1_pack_a_with_im2_col.html#aa77e3ad795b908ab26006e954e19fa19',1,'fbgemm::PackAWithIm2Col::rowOffsetBufferSize()'],['../classfbgemm_1_1_pack_a_with_row_offset.html#aa77e3ad795b908ab26006e954e19fa19',1,'fbgemm::PackAWithRowOffset::rowOffsetBufferSize()'],['../classfbgemm_1_1_pack_a_with_quant_row_offset.html#aa77e3ad795b908ab26006e954e19fa19',1,'fbgemm::PackAWithQuantRowOffset::rowOffsetBufferSize()']]], + ['rowoffsetbuffersizegconv_16',['rowOffsetBufferSizeGConv',['../namespacefbgemm.html#a8f972dca3254066120f58af5cf3b304c',1,'fbgemm']]], + ['rowwise_5fsparse_5fadagrad_5fref_17',['rowwise_sparse_adagrad_ref',['../namespacefbgemm.html#a3bee8daea3756d030209a6815db314d9',1,'fbgemm']]] ]; diff --git a/search/all_13.js b/search/all_13.js index 10cd0acf2..0bd6e046d 100644 --- a/search/all_13.js +++ b/search/all_13.js @@ -7,7 +7,11 @@ var searchData= ['simd_5finfo_3c_20inst_5fset_5ft_3a_3aavx512_5fymm_20_3e_4',['simd_info< inst_set_t::avx512_ymm >',['../structfbgemm_1_1simd__info.html',1,'fbgemm']]], ['sparse_20data_20cpu_20operators_5',['Sparse Data CPU Operators',['../group__sparse-data-cpu.html',1,'']]], ['sparse_20data_20cuda_20operators_6',['Sparse Data CUDA Operators',['../group__sparse-data-cuda.html',1,'']]], - ['sparseadagradsignature_7',['SparseAdaGradSignature',['../classfbgemm_1_1_sparse_ada_grad_signature.html',1,'fbgemm']]], - ['spmdm_8',['SpMDM',['../classfbgemm_1_1_compressed_sparse_column.html#a9f8530a8442a8fd99bfe3896d0fff5de',1,'fbgemm::CompressedSparseColumn']]], - ['storecregs_9',['storeCRegs',['../classfbgemm_1_1_code_gen_base.html#a01bcc02f063a515df6d7fda518ef1d12',1,'fbgemm::CodeGenBase::storeCRegs(x86::Emitter *a, int rowRegs, int colRegs, x86::Gp C_Offset, x86::Gp ldcReg, bool accum)'],['../classfbgemm_1_1_code_gen_base.html#a12a9f2428ed6fd0dd90c91fd4477e271',1,'fbgemm::CodeGenBase::storeCRegs(x86::Emitter *a, int rowRegs, int colRegs, x86::Gp C_Offset, x86::Gp ldcReg, bool accum)'],['../classfbgemm_1_1_code_gen_base.html#a87e3f3cd0d070bf371466f4c7521266d',1,'fbgemm::CodeGenBase::storeCRegs(x86::Emitter *a, int rowRegs, int colRegs, x86::Gp C_Offset, x86::Gp ldcReg, bool accum)'],['../classfbgemm_1_1_code_gen_base.html#a87e3f3cd0d070bf371466f4c7521266d',1,'fbgemm::CodeGenBase::storeCRegs(x86::Emitter *a, int rowRegs, int colRegs, x86::Gp C_Offset, x86::Gp ldcReg, bool accum)']]] + ['sparse_5fadagrad_5fref_7',['sparse_adagrad_ref',['../namespacefbgemm.html#a3f04df11e31dd656955d1bd1f8a7893d',1,'fbgemm']]], + ['sparseadagradsignature_8',['SparseAdaGradSignature',['../classfbgemm_1_1_sparse_ada_grad_signature.html',1,'fbgemm']]], + ['sparsedensemm_9',['SparseDenseMM',['../namespacefbgemm.html#a1671cc912f6aa4bab678a0d255c8a690',1,'fbgemm']]], + ['spmdm_10',['SpMDM',['../classfbgemm_1_1_compressed_sparse_column.html#a9f8530a8442a8fd99bfe3896d0fff5de',1,'fbgemm::CompressedSparseColumn']]], + ['spmdm_5fref_11',['spmdm_ref',['../namespacefbgemm.html#a4f19d1389f9e99cc0daded599b1f1fd4',1,'fbgemm']]], + ['spmdmkernelavx2_12',['spmdmKernelAvx2',['../namespacefbgemm.html#a8b547effff25521017d20a5c4ddb8fcc',1,'fbgemm']]], + ['storecregs_13',['storeCRegs',['../classfbgemm_1_1_code_gen_base.html#a01bcc02f063a515df6d7fda518ef1d12',1,'fbgemm::CodeGenBase::storeCRegs(x86::Emitter *a, int rowRegs, int colRegs, x86::Gp C_Offset, x86::Gp ldcReg, bool accum)'],['../classfbgemm_1_1_code_gen_base.html#a12a9f2428ed6fd0dd90c91fd4477e271',1,'fbgemm::CodeGenBase::storeCRegs(x86::Emitter *a, int rowRegs, int colRegs, x86::Gp C_Offset, x86::Gp ldcReg, bool accum)'],['../classfbgemm_1_1_code_gen_base.html#a87e3f3cd0d070bf371466f4c7521266d',1,'fbgemm::CodeGenBase::storeCRegs(x86::Emitter *a, int rowRegs, int colRegs, x86::Gp C_Offset, x86::Gp ldcReg, bool accum)'],['../classfbgemm_1_1_code_gen_base.html#a87e3f3cd0d070bf371466f4c7521266d',1,'fbgemm::CodeGenBase::storeCRegs(x86::Emitter *a, int rowRegs, int colRegs, x86::Gp C_Offset, x86::Gp ldcReg, bool accum)']]] ]; diff --git a/search/all_14.js b/search/all_14.js index c53028077..0fe2208e9 100644 --- a/search/all_14.js +++ b/search/all_14.js @@ -1,10 +1,15 @@ var searchData= [ - ['tensor_20cuda_20operators_0',['Jagged Tensor CUDA Operators',['../group__jagged-tensor-ops-cuda.html',1,'']]], - ['tensor_20operators_1',['Jagged Tensor Operators',['../group__jagged-tensor-ops-cpu.html',1,'']]], - ['tensorquantizationparams_2',['TensorQuantizationParams',['../structfbgemm_1_1_tensor_quantization_params.html',1,'fbgemm']]], - ['thread_5ftype_5ft_3',['thread_type_t',['../structfbgemm_1_1thread__type__t.html',1,'fbgemm']]], - ['tostring_4',['toString',['../structfbgemm_1_1conv__param__t.html#a1fe5121d6528fdea3f243321b3fa3a49',1,'fbgemm::conv_param_t']]], - ['transformation_20cpu_20operators_5',['Layout Transformation CPU Operators',['../group__layout-transform-cpu.html',1,'']]], - ['transformation_20cuda_20operators_6',['Layout Transformation CUDA Operators',['../group__layout-transform-cuda.html',1,'']]] + ['takedepthwisefastpath_0',['takeDepthWiseFastPath',['../namespacefbgemm.html#a523727ffa987158ac9021cc0d9b97e0b',1,'fbgemm']]], + ['takepointwisefastpath_1',['takePointWiseFastPath',['../namespacefbgemm.html#affb3e7487c8a1c6c7d1549eb7090aee1',1,'fbgemm']]], + ['tensor_20cuda_20operators_2',['Jagged Tensor CUDA Operators',['../group__jagged-tensor-ops-cuda.html',1,'']]], + ['tensor_20operators_3',['Jagged Tensor Operators',['../group__jagged-tensor-ops-cpu.html',1,'']]], + ['tensorquantizationparams_4',['TensorQuantizationParams',['../structfbgemm_1_1_tensor_quantization_params.html',1,'fbgemm']]], + ['thread_5ftype_5ft_5',['thread_type_t',['../structfbgemm_1_1thread__type__t.html',1,'fbgemm']]], + ['tostring_6',['toString',['../structfbgemm_1_1conv__param__t.html#a1fe5121d6528fdea3f243321b3fa3a49',1,'fbgemm::conv_param_t']]], + ['transformation_20cpu_20operators_7',['Layout Transformation CPU Operators',['../group__layout-transform-cpu.html',1,'']]], + ['transformation_20cuda_20operators_8',['Layout Transformation CUDA Operators',['../group__layout-transform-cuda.html',1,'']]], + ['transpose_5f8rows_9',['transpose_8rows',['../namespacefbgemm.html#a9ee41553113b6cd89e0e336022acf250',1,'fbgemm']]], + ['transpose_5fref_10',['transpose_ref',['../namespacefbgemm.html#a18832d0507cb6d8bce78371b97f66479',1,'fbgemm']]], + ['transpose_5fsimd_11',['transpose_simd',['../namespacefbgemm.html#a75b4ad78ae16c6e6782f82e1ff4012a8',1,'fbgemm']]] ]; diff --git a/search/all_2.js b/search/all_2.js index a8c300901..5133f9337 100644 --- a/search/all_2.js +++ b/search/all_2.js @@ -2,12 +2,17 @@ var searchData= [ ['bcol_5f_0',['bcol_',['../classfbgemm_1_1_pack_matrix.html#a39ff9fbd497e19e41a2666fb158b61bb',1,'fbgemm::PackMatrix']]], ['bcsrmatrix_1',['BCSRMatrix',['../structfbgemm_1_1_b_c_s_r_matrix.html',1,'fbgemm']]], - ['block_5ftype_5ft_2',['block_type_t',['../structfbgemm_1_1block__type__t.html',1,'fbgemm']]], - ['blockcols_3',['blockCols',['../classfbgemm_1_1_pack_matrix.html#abb3166a23e502ffb0bc12243ec205fc0',1,'fbgemm::PackMatrix']]], - ['blockcolsize_4',['blockColSize',['../classfbgemm_1_1_pack_matrix.html#a70ac7f71f0b18449dc35c7ecc1162f84',1,'fbgemm::PackMatrix']]], - ['blocking_5fparams_5',['blocking_params',['../classfbgemm_1_1_pack_matrix.html#aa94e292b3bf63b26e96c8d00654170a0',1,'fbgemm::PackMatrix']]], - ['blockingfactors_6',['BlockingFactors',['../structfbgemm_1_1_blocking_factors.html',1,'fbgemm']]], - ['blockrows_7',['blockRows',['../classfbgemm_1_1_pack_matrix.html#abf7a4f4bb1702ee01325f06409038631',1,'fbgemm::PackMatrix']]], - ['blockrowsize_8',['blockRowSize',['../classfbgemm_1_1_pack_matrix.html#a0f90dade3e2b75f0cbd459e24f94723d',1,'fbgemm::PackMatrix']]], - ['brow_5f_9',['brow_',['../classfbgemm_1_1_pack_matrix.html#a8d6df6d285a6ae4c23253f657c70efe0',1,'fbgemm::PackMatrix']]] + ['bfloat16tofloat_5favx2_2',['Bfloat16ToFloat_avx2',['../namespacefbgemm.html#ae121dec17e2e8a7648b3077f970f8c49',1,'fbgemm']]], + ['bfloat16tofloat_5favx512_3',['Bfloat16ToFloat_avx512',['../namespacefbgemm.html#a874e15e8f1c021008e76a24e8714024c',1,'fbgemm']]], + ['bfloat16tofloat_5fref_4',['Bfloat16ToFloat_ref',['../namespacefbgemm.html#af84f2b20490beb1dd0da4b03cf93afac',1,'fbgemm']]], + ['bfloat16tofloat_5fsimd_5',['Bfloat16ToFloat_simd',['../namespacefbgemm.html#afb00b526459a0db53a2c6ffe0276dd3e',1,'fbgemm']]], + ['block_5ftype_5ft_6',['block_type_t',['../structfbgemm_1_1block__type__t.html',1,'fbgemm']]], + ['blockcols_7',['blockCols',['../classfbgemm_1_1_pack_matrix.html#abb3166a23e502ffb0bc12243ec205fc0',1,'fbgemm::PackMatrix']]], + ['blockcolsize_8',['blockColSize',['../classfbgemm_1_1_pack_matrix.html#a70ac7f71f0b18449dc35c7ecc1162f84',1,'fbgemm::PackMatrix']]], + ['blocking_5fparams_9',['blocking_params',['../classfbgemm_1_1_pack_matrix.html#aa94e292b3bf63b26e96c8d00654170a0',1,'fbgemm::PackMatrix']]], + ['blockingfactors_10',['BlockingFactors',['../structfbgemm_1_1_blocking_factors.html',1,'fbgemm']]], + ['blockrows_11',['blockRows',['../classfbgemm_1_1_pack_matrix.html#abf7a4f4bb1702ee01325f06409038631',1,'fbgemm::PackMatrix']]], + ['blockrowsize_12',['blockRowSize',['../classfbgemm_1_1_pack_matrix.html#a0f90dade3e2b75f0cbd459e24f94723d',1,'fbgemm::PackMatrix']]], + ['broadcast8bit_13',['broadcast8Bit',['../namespacefbgemm.html#a4840e075e8c46a94cb7a489c3fa6aee4',1,'fbgemm']]], + ['brow_5f_14',['brow_',['../classfbgemm_1_1_pack_matrix.html#a8d6df6d285a6ae4c23253f657c70efe0',1,'fbgemm::PackMatrix']]] ]; diff --git a/search/all_3.js b/search/all_3.js index 9ee0b3958..814489e1f 100644 --- a/search/all_3.js +++ b/search/all_3.js @@ -1,21 +1,25 @@ var searchData= [ - ['codecache_0',['CodeCache',['../classfbgemm_1_1_code_cache.html',1,'fbgemm']]], - ['codecache_3c_20kernel_5fsig_5ft_2c_20jit_5fconv_5fkernel_5ffp_20_3e_1',['CodeCache< kernel_sig_t, jit_conv_kernel_fp >',['../classfbgemm_1_1_code_cache.html',1,'fbgemm']]], - ['codecache_3c_20std_3a_3atuple_3c_20bool_2c_20int_2c_20int_2c_20int_20_3e_2c_20jit_5fmicro_5fkernel_5ffp_5fconvt_20_3e_2',['CodeCache< std::tuple< bool, int, int, int >, jit_micro_kernel_fp_convT >',['../classfbgemm_1_1_code_cache.html',1,'fbgemm']]], - ['codecache_3c_20std_3a_3atuple_3c_20bool_2c_20int_2c_20int_2c_20int_2c_20int_2c_20int_2c_20int_20_3e_2c_20jit_5fmicro_5fkernel_5ffp_20_3e_3',['CodeCache< std::tuple< bool, int, int, int, int, int, int >, jit_micro_kernel_fp >',['../classfbgemm_1_1_code_cache.html',1,'fbgemm']]], - ['codegenbase_4',['CodeGenBase',['../classfbgemm_1_1_code_gen_base.html',1,'CodeGenBase< TA, TB, TC, accT >'],['../classfbgemm_1_1_code_gen_base.html#a843f4289cb9de379bac477ed0dcba1cf',1,'fbgemm::CodeGenBase::CodeGenBase()']]], - ['codegenbase_3c_20packingamatrix_3a_3ainptype_2c_20packingbmatrix_3a_3ainptype_2c_20ct_2c_20packingbmatrix_3a_3aacctype_20_3e_5',['CodeGenBase< packingAMatrix::inpType, packingBMatrix::inpType, cT, packingBMatrix::accType >',['../classfbgemm_1_1_code_gen_base.html',1,'fbgemm']]], - ['codegenbase_3c_20uint8_5ft_2c_20int8_5ft_2c_20int32_5ft_2c_20packingamatrix_3a_3aacctype_20_3e_6',['CodeGenBase< uint8_t, int8_t, int32_t, packingAMatrix::accType >',['../classfbgemm_1_1_code_gen_base.html',1,'fbgemm']]], - ['combine_20input_20operators_7',['Combine Input Operators',['../group__input-combine.html',1,'']]], - ['comparator_8',['Comparator',['../structfbgemm__gpu_1_1_comparator.html',1,'fbgemm_gpu']]], - ['compressedsparsecolumn_9',['CompressedSparseColumn',['../classfbgemm_1_1_compressed_sparse_column.html',1,'fbgemm']]], - ['conv_5fparam_5ft_10',['conv_param_t',['../structfbgemm_1_1conv__param__t.html',1,'conv_param_t< SPATIAL_DIM >'],['../structfbgemm_1_1conv__param__t.html#a926431139532f74306f6cd6dc08d171a',1,'fbgemm::conv_param_t::conv_param_t()']]], - ['conv_5fparam_5ft_3c_202_20_3e_11',['conv_param_t< 2 >',['../structfbgemm_1_1conv__param__t.html',1,'fbgemm']]], - ['conv_5fparam_5ft_3c_20spatial_5fdim_20_3e_12',['conv_param_t< SPATIAL_DIM >',['../structfbgemm_1_1conv__param__t.html',1,'fbgemm']]], - ['cpu_13',['Permute Pooled Embeddings Operators (CPU)',['../group__permute-pooled-embs-cpu.html',1,'']]], - ['cpu_20operators_14',['CPU Operators',['../group__embedding-cpu.html',1,'Embedding CPU Operators'],['../group__layout-transform-cpu.html',1,'Layout Transformation CPU Operators'],['../group__quantize-data-cpu.html',1,'Quantize Data CPU Operators'],['../group__sparse-data-cpu.html',1,'Sparse Data CPU Operators']]], - ['cuda_15',['CUDA',['../group__permute-pooled-embs-gpu.html',1,'Permute Pooled Embeddings Operators (CUDA)'],['../group__quantize-ops-cuda.html',1,'Quantization Operators (CUDA)']]], - ['cuda_20memory_20operators_16',['CUDA Memory Operators',['../group__cumem-utils.html',1,'']]], - ['cuda_20operators_17',['CUDA Operators',['../group__table-batched-embed-cuda.html',1,'CUDA Operators'],['../group__embedding-cuda.html',1,'Embedding CUDA Operators'],['../group__jagged-tensor-ops-cuda.html',1,'Jagged Tensor CUDA Operators'],['../group__layout-transform-cuda.html',1,'Layout Transformation CUDA Operators'],['../group__sparse-data-cuda.html',1,'Sparse Data CUDA Operators']]] + ['cblas_5fsgemm_5fref_0',['cblas_sgemm_ref',['../namespacefbgemm.html#a88e97a715133ac27ca83ae5ab05010ed',1,'fbgemm']]], + ['codecache_1',['CodeCache',['../classfbgemm_1_1_code_cache.html',1,'fbgemm']]], + ['codecache_3c_20kernel_5fsig_5ft_2c_20jit_5fconv_5fkernel_5ffp_20_3e_2',['CodeCache< kernel_sig_t, jit_conv_kernel_fp >',['../classfbgemm_1_1_code_cache.html',1,'fbgemm']]], + ['codecache_3c_20std_3a_3atuple_3c_20bool_2c_20int_2c_20int_2c_20int_20_3e_2c_20jit_5fmicro_5fkernel_5ffp_5fconvt_20_3e_3',['CodeCache< std::tuple< bool, int, int, int >, jit_micro_kernel_fp_convT >',['../classfbgemm_1_1_code_cache.html',1,'fbgemm']]], + ['codecache_3c_20std_3a_3atuple_3c_20bool_2c_20int_2c_20int_2c_20int_2c_20int_2c_20int_2c_20int_20_3e_2c_20jit_5fmicro_5fkernel_5ffp_20_3e_4',['CodeCache< std::tuple< bool, int, int, int, int, int, int >, jit_micro_kernel_fp >',['../classfbgemm_1_1_code_cache.html',1,'fbgemm']]], + ['codegenbase_5',['CodeGenBase',['../classfbgemm_1_1_code_gen_base.html',1,'CodeGenBase< TA, TB, TC, accT >'],['../classfbgemm_1_1_code_gen_base.html#a843f4289cb9de379bac477ed0dcba1cf',1,'fbgemm::CodeGenBase::CodeGenBase()']]], + ['codegenbase_3c_20packingamatrix_3a_3ainptype_2c_20packingbmatrix_3a_3ainptype_2c_20ct_2c_20packingbmatrix_3a_3aacctype_20_3e_6',['CodeGenBase< packingAMatrix::inpType, packingBMatrix::inpType, cT, packingBMatrix::accType >',['../classfbgemm_1_1_code_gen_base.html',1,'fbgemm']]], + ['codegenbase_3c_20uint8_5ft_2c_20int8_5ft_2c_20int32_5ft_2c_20packingamatrix_3a_3aacctype_20_3e_7',['CodeGenBase< uint8_t, int8_t, int32_t, packingAMatrix::accType >',['../classfbgemm_1_1_code_gen_base.html',1,'fbgemm']]], + ['col_5foffsets_5fwith_5fzero_5fpt_5fs8acc32_5fref_8',['col_offsets_with_zero_pt_s8acc32_ref',['../namespacefbgemm.html#a0f66af5e8e787dc1ff6893ac75ae161f',1,'fbgemm']]], + ['combine_20input_20operators_9',['Combine Input Operators',['../group__input-combine.html',1,'']]], + ['comparator_10',['Comparator',['../structfbgemm__gpu_1_1_comparator.html',1,'fbgemm_gpu']]], + ['compare_5fbuffers_11',['compare_buffers',['../namespacefbgemm.html#a9d995b583abb4b09927c90f66e3b1463',1,'fbgemm']]], + ['compressedsparsecolumn_12',['CompressedSparseColumn',['../classfbgemm_1_1_compressed_sparse_column.html',1,'fbgemm']]], + ['conv_5fparam_5ft_13',['conv_param_t',['../structfbgemm_1_1conv__param__t.html',1,'conv_param_t< SPATIAL_DIM >'],['../structfbgemm_1_1conv__param__t.html#a926431139532f74306f6cd6dc08d171a',1,'fbgemm::conv_param_t::conv_param_t()']]], + ['conv_5fparam_5ft_3c_202_20_3e_14',['conv_param_t< 2 >',['../structfbgemm_1_1conv__param__t.html',1,'fbgemm']]], + ['conv_5fparam_5ft_3c_20spatial_5fdim_20_3e_15',['conv_param_t< SPATIAL_DIM >',['../structfbgemm_1_1conv__param__t.html',1,'fbgemm']]], + ['convfastpath_16',['ConvFastPath',['../namespacefbgemm.html#ad5bda89769bca9a01ddf81591f20ef02',1,'fbgemm']]], + ['cpu_17',['Permute Pooled Embeddings Operators (CPU)',['../group__permute-pooled-embs-cpu.html',1,'']]], + ['cpu_20operators_18',['CPU Operators',['../group__embedding-cpu.html',1,'Embedding CPU Operators'],['../group__layout-transform-cpu.html',1,'Layout Transformation CPU Operators'],['../group__quantize-data-cpu.html',1,'Quantize Data CPU Operators'],['../group__sparse-data-cpu.html',1,'Sparse Data CPU Operators']]], + ['cuda_19',['CUDA',['../group__permute-pooled-embs-gpu.html',1,'Permute Pooled Embeddings Operators (CUDA)'],['../group__quantize-ops-cuda.html',1,'Quantization Operators (CUDA)']]], + ['cuda_20memory_20operators_20',['CUDA Memory Operators',['../group__cumem-utils.html',1,'']]], + ['cuda_20operators_21',['CUDA Operators',['../group__table-batched-embed-cuda.html',1,'CUDA Operators'],['../group__embedding-cuda.html',1,'Embedding CUDA Operators'],['../group__jagged-tensor-ops-cuda.html',1,'Jagged Tensor CUDA Operators'],['../group__layout-transform-cuda.html',1,'Layout Transformation CUDA Operators'],['../group__sparse-data-cuda.html',1,'Sparse Data CUDA Operators']]] ]; diff --git a/search/all_4.js b/search/all_4.js index 052da5884..794911b99 100644 --- a/search/all_4.js +++ b/search/all_4.js @@ -3,12 +3,14 @@ var searchData= ['data_20cpu_20operators_0',['Data CPU Operators',['../group__quantize-data-cpu.html',1,'Quantize Data CPU Operators'],['../group__sparse-data-cpu.html',1,'Sparse Data CPU Operators']]], ['data_20cuda_20operators_1',['Sparse Data CUDA Operators',['../group__sparse-data-cuda.html',1,'']]], ['density_2',['Density',['../classfbgemm_1_1_compressed_sparse_column.html#a6629bcd3b06c396540c2d5b7e4852164',1,'fbgemm::CompressedSparseColumn']]], - ['direct_5fmapped_5flru_5fcache_5fpopulate_5fbyte_5fcuda_3',['direct_mapped_lru_cache_populate_byte_cuda',['../group__table-batched-embed-cuda.html#gae019b6879bd9f89a146e0700d5a4bd8b',1,'split_embeddings_cache_cuda.cuh']]], - ['direct_5fmapped_5flxu_5fcache_5flookup_5fcuda_4',['direct_mapped_lxu_cache_lookup_cuda',['../group__table-batched-embed-cuda.html#gab305ebdd3822794c5ac462bf5df4bb49',1,'split_embeddings_cache_cuda.cuh']]], - ['donothing_5',['DoNothing',['../classfbgemm_1_1_do_nothing.html',1,'fbgemm']]], - ['donothing_3c_20float_2c_20float_20_3e_6',['DoNothing< float, float >',['../classfbgemm_1_1_do_nothing.html',1,'fbgemm']]], - ['donothing_3c_20std_3a_3aint32_5ft_2c_20std_3a_3aint32_5ft_20_3e_7',['DoNothing< std::int32_t, std::int32_t >',['../classfbgemm_1_1_do_nothing.html',1,'fbgemm']]], - ['donothing_3c_20std_3a_3auint8_5ft_2c_20std_3a_3auint8_5ft_20_3e_8',['DoNothing< std::uint8_t, std::uint8_t >',['../classfbgemm_1_1_do_nothing.html',1,'fbgemm']]], - ['dosconvoninpbuffer_9',['DoSConvOnInpBuffer',['../classfbgemm_1_1_do_s_conv_on_inp_buffer.html',1,'fbgemm']]], - ['dospmdmoninpbuffer_10',['DoSpmdmOnInpBuffer',['../classfbgemm_1_1_do_spmdm_on_inp_buffer.html',1,'fbgemm']]] + ['depthwise_5f2d_5fsame_5fpad_3',['depthwise_2d_same_pad',['../namespacefbgemm.html#ac2e9634d4e2366ed6f2181ae7e7b17b2',1,'fbgemm']]], + ['depthwise_5f3d_5fsame_5fpad_4',['depthwise_3d_same_pad',['../namespacefbgemm.html#a0bfa499fd1b485bc3e457842343bca57',1,'fbgemm']]], + ['direct_5fmapped_5flru_5fcache_5fpopulate_5fbyte_5fcuda_5',['direct_mapped_lru_cache_populate_byte_cuda',['../group__table-batched-embed-cuda.html#gae019b6879bd9f89a146e0700d5a4bd8b',1,'split_embeddings_cache_cuda.cuh']]], + ['direct_5fmapped_5flxu_5fcache_5flookup_5fcuda_6',['direct_mapped_lxu_cache_lookup_cuda',['../group__table-batched-embed-cuda.html#gab305ebdd3822794c5ac462bf5df4bb49',1,'split_embeddings_cache_cuda.cuh']]], + ['donothing_7',['DoNothing',['../classfbgemm_1_1_do_nothing.html',1,'fbgemm']]], + ['donothing_3c_20float_2c_20float_20_3e_8',['DoNothing< float, float >',['../classfbgemm_1_1_do_nothing.html',1,'fbgemm']]], + ['donothing_3c_20std_3a_3aint32_5ft_2c_20std_3a_3aint32_5ft_20_3e_9',['DoNothing< std::int32_t, std::int32_t >',['../classfbgemm_1_1_do_nothing.html',1,'fbgemm']]], + ['donothing_3c_20std_3a_3auint8_5ft_2c_20std_3a_3auint8_5ft_20_3e_10',['DoNothing< std::uint8_t, std::uint8_t >',['../classfbgemm_1_1_do_nothing.html',1,'fbgemm']]], + ['dosconvoninpbuffer_11',['DoSConvOnInpBuffer',['../classfbgemm_1_1_do_s_conv_on_inp_buffer.html',1,'fbgemm']]], + ['dospmdmoninpbuffer_12',['DoSpmdmOnInpBuffer',['../classfbgemm_1_1_do_spmdm_on_inp_buffer.html',1,'fbgemm']]] ]; diff --git a/search/all_5.js b/search/all_5.js index c7e8bd291..7b2327a66 100644 --- a/search/all_5.js +++ b/search/all_5.js @@ -4,10 +4,13 @@ var searchData= ['embedding_20cuda_20operators_1',['Embedding CUDA Operators',['../group__embedding-cuda.html',1,'']]], ['embeddings_20operators_20cpu_2',['Permute Pooled Embeddings Operators (CPU)',['../group__permute-pooled-embs-cpu.html',1,'']]], ['embeddings_20operators_20cuda_3',['Permute Pooled Embeddings Operators (CUDA)',['../group__permute-pooled-embs-gpu.html',1,'']]], - ['equals_4',['equals',['../classfbgemm_1_1_pack_b_matrix.html#a5299773354edb62a96e39dc55ab5d770',1,'fbgemm::PackBMatrix']]], - ['example_20method_20group_5',['Example Method Group',['../group__example-method-group.html',1,'']]], - ['example_5fmethod_6',['example_method',['../group__example-method-group.html#ga56a504c1752577359ba5b75a9cd52737',1,'example_code.cpp']]], - ['executekernel_7',['ExecuteKernel',['../classfbgemm_1_1_execute_kernel.html',1,'ExecuteKernel< packingAMatrix, packingBMatrix, cT, processOutputType >'],['../classfbgemm_1_1_execute_kernel_3_01packing_a_matrix_00_01_pack_b_matrix_3_01int8__t_00_01typenam9894617fba2431fcc8042b1a22b96270.html#ae4a4e6063c0cb62d64d6159d102a899b',1,'fbgemm::ExecuteKernel< packingAMatrix, PackBMatrix< int8_t, typename packingAMatrix::accType >, cT, processOutputType >::ExecuteKernel()']]], - ['executekernel_3c_20packingamatrix_2c_20packbmatrix_3c_20int8_5ft_2c_20typename_20packingamatrix_3a_3aacctype_20_3e_2c_20ct_2c_20processoutputtype_20_3e_8',['ExecuteKernel< packingAMatrix, PackBMatrix< int8_t, typename packingAMatrix::accType >, cT, processOutputType >',['../classfbgemm_1_1_execute_kernel_3_01packing_a_matrix_00_01_pack_b_matrix_3_01int8__t_00_01typenam9894617fba2431fcc8042b1a22b96270.html',1,'fbgemm']]], - ['expand_5finto_5fjagged_5fpermute_5fcuda_9',['expand_into_jagged_permute_cuda',['../group__sparse-data-cuda.html#gab7344d63216dd37673733b26954aaec4',1,'fbgemm_gpu']]] + ['emitextracthalfvector_4',['emitExtractHalfVector',['../namespacefbgemm.html#a602ac18c0e6c32448ff8d21818bded38',1,'fbgemm']]], + ['emitloaddword_5',['emitLoadDWord',['../namespacefbgemm.html#aa56904bd84399e10104d286e0e10a2d7',1,'fbgemm']]], + ['equals_6',['equals',['../classfbgemm_1_1_pack_b_matrix.html#a5299773354edb62a96e39dc55ab5d770',1,'fbgemm::PackBMatrix']]], + ['example_20method_20group_7',['Example Method Group',['../group__example-method-group.html',1,'']]], + ['example_5fmethod_8',['example_method',['../group__example-method-group.html#ga56a504c1752577359ba5b75a9cd52737',1,'example_code.cpp']]], + ['executekernel_9',['ExecuteKernel',['../classfbgemm_1_1_execute_kernel.html',1,'ExecuteKernel< packingAMatrix, packingBMatrix, cT, processOutputType >'],['../classfbgemm_1_1_execute_kernel_3_01packing_a_matrix_00_01_pack_b_matrix_3_01int8__t_00_01typenam9894617fba2431fcc8042b1a22b96270.html#ae4a4e6063c0cb62d64d6159d102a899b',1,'fbgemm::ExecuteKernel< packingAMatrix, PackBMatrix< int8_t, typename packingAMatrix::accType >, cT, processOutputType >::ExecuteKernel()']]], + ['executekernel_3c_20packingamatrix_2c_20packbmatrix_3c_20int8_5ft_2c_20typename_20packingamatrix_3a_3aacctype_20_3e_2c_20ct_2c_20processoutputtype_20_3e_10',['ExecuteKernel< packingAMatrix, PackBMatrix< int8_t, typename packingAMatrix::accType >, cT, processOutputType >',['../classfbgemm_1_1_execute_kernel_3_01packing_a_matrix_00_01_pack_b_matrix_3_01int8__t_00_01typenam9894617fba2431fcc8042b1a22b96270.html',1,'fbgemm']]], + ['expand_5finto_5fjagged_5fpermute_5fcuda_11',['expand_into_jagged_permute_cuda',['../group__sparse-data-cuda.html#gab7344d63216dd37673733b26954aaec4',1,'fbgemm_gpu']]], + ['experimental_20gen_20ai_20attention_12',['Experimental-gen-ai-attention',['../group__experimental-gen-ai-attention.html',1,'']]] ]; diff --git a/search/all_6.js b/search/all_6.js index 31fa5b3e7..507f453b5 100644 --- a/search/all_6.js +++ b/search/all_6.js @@ -1,6 +1,48 @@ var searchData= [ - ['findminmax_0',['FindMinMax',['../group__fbgemm-quant-utils-avx2.html#ga38920438e5d25d4092a1b695f3420b8e',1,'fbgemm']]], - ['floatorhalftofusednbitrowwisequantizedsbhalf_1',['FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf',['../group__fbgemm-quant-utils-generic.html#ga6b77c8540e630305db9a5f30a84e7e5b',1,'fbgemm']]], - ['fusedquantizedequantize_2',['FusedQuantizeDequantize',['../group__fbgemm-quant-utils-generic.html#gaa51ab0f363fbcdf8ffa0a561884225d9',1,'fbgemm']]] + ['fbgemm_0',['fbgemm',['../namespacefbgemm.html',1,'']]], + ['fbgemmalignedalloc_1',['fbgemmAlignedAlloc',['../namespacefbgemm.html#ab55953ea0d8867577ef9b096d68cfce4',1,'fbgemm']]], + ['fbgemmalignedfree_2',['fbgemmAlignedFree',['../namespacefbgemm.html#a539e708e033ffe98b075a6b6c5cb4b46',1,'fbgemm']]], + ['fbgemmconv_3',['fbgemmConv',['../namespacefbgemm.html#adc9d2af7ea01634fd2e5bef0e6baa0ab',1,'fbgemm']]], + ['fbgemmenableavx512ymm_4',['fbgemmEnableAvx512Ymm',['../namespacefbgemm.html#a9d17a4894f822f7afd5c79407bbf91d5',1,'fbgemm']]], + ['fbgemmforceisa_5',['fbgemmForceIsa',['../namespacefbgemm.html#af150495d47d5ccb1670524166fd9b45d',1,'fbgemm']]], + ['fbgemmget2dpartition_6',['fbgemmGet2DPartition',['../namespacefbgemm.html#a26e1a7f0b1935835ad95a52aa4f56eb8',1,'fbgemm']]], + ['fbgemmgetthreadpartition_7',['fbgemmGetThreadPartition',['../namespacefbgemm.html#a38cca353e8e2984e37704be8bbc327fe',1,'fbgemm']]], + ['fbgemmgroupwiseconv_8',['fbgemmGroupwiseConv',['../namespacefbgemm.html#a5780ef4a16a1682740af29283360caa4',1,'fbgemm']]], + ['fbgemmhasarmneonsupport_9',['fbgemmHasArmNeonSupport',['../namespacefbgemm.html#af727aa5e29e172f994653d01e444973e',1,'fbgemm']]], + ['fbgemmhasarmsve2support_10',['fbgemmHasArmSve2Support',['../namespacefbgemm.html#aae9bb60bfb4acb2e62976adcd98ccaa0',1,'fbgemm']]], + ['fbgemmhasavx2support_11',['fbgemmHasAvx2Support',['../namespacefbgemm.html#a52e1a4ce201a6b89ad8b3dee69c59d40',1,'fbgemm']]], + ['fbgemmhasavx512support_12',['fbgemmHasAvx512Support',['../namespacefbgemm.html#ae0e6eeaba3d5c4265d9aee5e898fb329',1,'fbgemm']]], + ['fbgemmhasavx512vnnisupport_13',['fbgemmHasAvx512VnniSupport',['../namespacefbgemm.html#ae6e5fa9178cd2a70a01ef78a571802f5',1,'fbgemm']]], + ['fbgemminstructionset_14',['fbgemmInstructionSet',['../namespacefbgemm.html#a2be92a96ebd3c0d9bc9f9c0d0c537969',1,'fbgemm']]], + ['fbgemmisintelxeond_15',['fbgemmIsIntelXeonD',['../namespacefbgemm.html#a84685dfa70eedf3c2befcb8d02cf9d27',1,'fbgemm']]], + ['fbgemmoptimizedgconv_16',['fbgemmOptimizedGConv',['../namespacefbgemm.html#a19ec32cc9a1932f774bd8b2e0b047afe',1,'fbgemm']]], + ['fbgemmpacked_17',['fbgemmPacked',['../namespacefbgemm.html#a1f01b8b3f8fea3e9c8ccc2aed30ba70a',1,'fbgemm']]], + ['fbgemmpartition1d_18',['fbgemmPartition1D',['../namespacefbgemm.html#abf9cb71c5c3a79935f7146f05510bb19',1,'fbgemm']]], + ['fbgemmpartition1dblocked_19',['fbgemmPartition1DBlocked',['../namespacefbgemm.html#ae6d6321b283eaa5a8ddaaa96ea22c62f',1,'fbgemm']]], + ['fbgemmsupportedcpu_20',['fbgemmSupportedCPU',['../namespacefbgemm.html#a713e97500428aba767f6fcaf39aac4b9',1,'fbgemm']]], + ['findminmax_21',['FindMinMax',['../group__fbgemm-quant-utils-avx2.html#ga38920438e5d25d4092a1b695f3420b8e',1,'fbgemm']]], + ['float16tofloat_5favx2_22',['Float16ToFloat_avx2',['../namespacefbgemm.html#ae0cba6562b792a67eb376841005a907b',1,'fbgemm']]], + ['float16tofloat_5favx512_23',['Float16ToFloat_avx512',['../namespacefbgemm.html#a5d28283194eed8d19ebc5634fd78913f',1,'fbgemm']]], + ['float16tofloat_5fref_24',['Float16ToFloat_ref',['../namespacefbgemm.html#afc22ec6e38a38c7f41484f844dbfbeac',1,'fbgemm']]], + ['float16tofloat_5fsimd_25',['Float16ToFloat_simd',['../namespacefbgemm.html#af066434e23720ecd4ddcc51d7a616aef',1,'fbgemm']]], + ['float8tofloat_5fref_26',['Float8ToFloat_ref',['../namespacefbgemm.html#ad699dd756e87f820cca1f1202cec2a11',1,'fbgemm']]], + ['floatorhalftofused8bitrowwisequantizedsbfloat_27',['FloatOrHalfToFused8BitRowwiseQuantizedSBFloat',['../namespacefbgemm.html#afafa94e239d016cf273ad0597152b86c',1,'fbgemm']]], + ['floatorhalftofused8bitrowwisequantizedsbfloatref_28',['FloatOrHalfToFused8BitRowwiseQuantizedSBFloatRef',['../namespacefbgemm.html#a7cba6dad217715349653862b3e691057',1,'fbgemm']]], + ['floatorhalftofusednbitrowwisequantizedsbhalf_29',['FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf',['../group__fbgemm-quant-utils-generic.html#ga6b77c8540e630305db9a5f30a84e7e5b',1,'fbgemm']]], + ['floatorhalftofusednbitrowwisequantizedsbhalfref_30',['FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfRef',['../namespacefbgemm.html#ad89426896d5c7b1f1b5db8ebaf201547',1,'fbgemm']]], + ['floattobfloat16_5favx2_31',['FloatToBfloat16_avx2',['../namespacefbgemm.html#a444fa054549274d8c6f442f0b866aa98',1,'fbgemm']]], + ['floattobfloat16_5favx512_32',['FloatToBfloat16_avx512',['../namespacefbgemm.html#a0e2b50c7d828e56f78cc0b8368dee35a',1,'fbgemm']]], + ['floattobfloat16_5fref_33',['FloatToBfloat16_ref',['../namespacefbgemm.html#ab22f5d961c6a42aab1c37b17a3d93770',1,'fbgemm']]], + ['floattobfloat16_5fsimd_34',['FloatToBfloat16_simd',['../namespacefbgemm.html#aa423a42208a4fde5f23ab6a28cef24a3',1,'fbgemm']]], + ['floattofloat16_5favx2_35',['FloatToFloat16_avx2',['../namespacefbgemm.html#a61c95557adf5477f3078af22d6054a7d',1,'fbgemm']]], + ['floattofloat16_5favx512_36',['FloatToFloat16_avx512',['../namespacefbgemm.html#a5305ff58517ca3669ec41c7f1d4817a4',1,'fbgemm']]], + ['floattofloat16_5fref_37',['FloatToFloat16_ref',['../namespacefbgemm.html#af44c584c974f95f4866806cee3798742',1,'fbgemm']]], + ['floattofloat16_5fsimd_38',['FloatToFloat16_simd',['../namespacefbgemm.html#a53a50b113345c09b89b45834f31d52df',1,'fbgemm']]], + ['floattofloat8_5fref_39',['FloatToFloat8_ref',['../namespacefbgemm.html#a32a45639603e1584965b471846fd067f',1,'fbgemm']]], + ['fused8bitrowwisequantizedsbfloattofloatorhalf_40',['Fused8BitRowwiseQuantizedSBFloatToFloatOrHalf',['../namespacefbgemm.html#a42aefaaf238d065625a64a757f998eef',1,'fbgemm']]], + ['fused8bitrowwisequantizedsbfloattofloatorhalfref_41',['Fused8BitRowwiseQuantizedSBFloatToFloatOrHalfRef',['../namespacefbgemm.html#a19217d3a8551a7b251ecb1eba79669bc',1,'fbgemm']]], + ['fusednbitrowwisequantizedsbhalftofloatorhalf_42',['FusedNBitRowwiseQuantizedSBHalfToFloatOrHalf',['../namespacefbgemm.html#a2b2ca55a7d2d5c2dbba337ee2e585f6a',1,'fbgemm']]], + ['fusednbitrowwisequantizedsbhalftofloatorhalfref_43',['FusedNBitRowwiseQuantizedSBHalfToFloatOrHalfRef',['../namespacefbgemm.html#adee65a9c68614b76ffdf2b3bc11b4618',1,'fbgemm']]], + ['fusedquantizedequantize_44',['FusedQuantizeDequantize',['../group__fbgemm-quant-utils-generic.html#gaa51ab0f363fbcdf8ffa0a561884225d9',1,'fbgemm']]] ]; diff --git a/search/all_7.js b/search/all_7.js index c8e744634..c7f761e7c 100644 --- a/search/all_7.js +++ b/search/all_7.js @@ -1,13 +1,28 @@ var searchData= [ ['g_0',['G',['../structfbgemm_1_1conv__param__t.html#ab8735735273b982cc3125e51fe46e2f4',1,'fbgemm::conv_param_t']]], - ['gencomputeblock_1',['genComputeBlock',['../classfbgemm_1_1_code_gen_base.html#a176924b076bd6485a83a0dd75c20cac6',1,'fbgemm::CodeGenBase::genComputeBlock(x86::Emitter *a, x86::Gp buffer_A, x86::Gp buffer_B, x86::Gp B_pf, int rowRegs, int colRegs, int lda)'],['../classfbgemm_1_1_code_gen_base.html#aa2af59748852e0ff4bfec95c946cbcc9',1,'fbgemm::CodeGenBase::genComputeBlock(x86::Emitter *a, x86::Gp buffer_A, x86::Gp buffer_B, x86::Gp B_pf, int rowRegs, int colRegs, int lda)'],['../classfbgemm_1_1_code_gen_base.html#a9727e9d8a35fccd581ad604006ea77fe',1,'fbgemm::CodeGenBase::genComputeBlock(x86::Emitter *a, x86::Gp buffer_A, x86::Gp buffer_B, x86::Gp, int rowRegs, int colRegs, int lda)'],['../classfbgemm_1_1_code_gen_base.html#a8c35863faf15810abfe3fe1cd432a687',1,'fbgemm::CodeGenBase::genComputeBlock(x86::Emitter *a, x86::Gp buffer_A, x86::Gp buffer_B, x86::Gp, int rowRegs, int colRegs, int lda)'],['../classfbgemm_1_1_code_gen_base.html#aa2af59748852e0ff4bfec95c946cbcc9',1,'fbgemm::CodeGenBase::genComputeBlock(x86::Emitter *a, x86::Gp buffer_A, x86::Gp buffer_B, x86::Gp B_pf, int rowRegs, int colRegs, int lda)'],['../classfbgemm_1_1_code_gen_base.html#a8c35863faf15810abfe3fe1cd432a687',1,'fbgemm::CodeGenBase::genComputeBlock(x86::Emitter *a, x86::Gp buffer_A, x86::Gp buffer_B, x86::Gp, int rowRegs, int colRegs, int lda)']]], - ['generic_2',['Quantization Utilities (Generic)',['../group__fbgemm-quant-utils-generic.html',1,'']]], - ['generic_5fhistogram_5fbinning_5fcalibration_5fby_5ffeature_5fcpu_3',['generic_histogram_binning_calibration_by_feature_cpu',['../group__sparse-data-cpu.html#gaef2a0a8c27e3b8b2d72be5c95ba7539e',1,'fbgemm_gpu']]], - ['get_5funique_5findices_5fcuda_4',['get_unique_indices_cuda',['../group__table-batched-embed-cuda.html#ga4887151424a90cfd0abef174a4e91f3f',1,'get_unique_indices_cuda(at::Tensor linear_indices, int64_t max_indices, bool compute_count): linearize_cache_indices.cu'],['../group__table-batched-embed-cuda.html#ga4887151424a90cfd0abef174a4e91f3f',1,'get_unique_indices_cuda(Tensor linear_indices, int64_t max_indices, bool compute_count): linearize_cache_indices.cu']]], - ['getbuf_5',['getBuf',['../classfbgemm_1_1_pack_matrix.html#ac34c29cb4d372b728c2b8460e142269b',1,'fbgemm::PackMatrix::getBuf()'],['../classfbgemm_1_1_pack_weight_matrix_for_g_conv.html#a46f1fd2c0a84f4b0b40f2e907c1908a2',1,'fbgemm::PackWeightMatrixForGConv::getBuf()']]], - ['getcodeloggingfile_6',['getCodeLoggingFile',['../classfbgemm_1_1_code_gen_base.html#af4e14d6e1be8b47db233b7226e4b8e2a',1,'fbgemm::CodeGenBase']]], - ['getorcreate_7',['getOrCreate',['../classfbgemm_1_1_code_gen_base.html#af00d94622211a89e6568e05d9b63850a',1,'fbgemm::CodeGenBase::getOrCreate(bool accum, int32_t mc, int32_t nc, int32_t kc)'],['../classfbgemm_1_1_code_gen_base.html#a65dee023d21aee79a75508f08e51c403',1,'fbgemm::CodeGenBase::getOrCreate(bool accum, int32_t mc, int32_t nc, int32_t)'],['../classfbgemm_1_1_code_gen_base.html#ae20473007cc942d5263eed6677cfddee',1,'fbgemm::CodeGenBase::getOrCreate(bool accum, int32_t mc, int32_t nc, int32_t kc)'],['../classfbgemm_1_1_code_gen_base.html#a3ba99549f858b04f892c5edb2580aa07',1,'fbgemm::CodeGenBase::getOrCreate(bool accum, int32_t mc, int32_t nc, int32_t kc)'],['../classfbgemm_1_1_code_gen_base.html#ab13fc9ae80b3c7b6f7c95597b3eca012',1,'fbgemm::CodeGenBase::getOrCreate(bool accum, int32_t mc, int32_t nc, int32_t kc)'],['../classfbgemm_1_1_code_gen_base.html#af378fb7873c900315a548c788e5f3e75',1,'fbgemm::CodeGenBase::getOrCreate(bool accum, int32_t mc, int32_t nc, int32_t kc)'],['../classfbgemm_1_1_code_gen_base.html#a4050603658b0e5cf634fd9a989cb84bc',1,'fbgemm::CodeGenBase::getOrCreate(bool accum, int32_t mc, int32_t nc, int32_t kc)'],['../classfbgemm_1_1_code_gen_base.html#a4050603658b0e5cf634fd9a989cb84bc',1,'fbgemm::CodeGenBase::getOrCreate(bool accum, int32_t mc, int32_t nc, int32_t kc)']]], - ['getrowoffsetbuffer_8',['getRowOffsetBuffer',['../classfbgemm_1_1_pack_matrix.html#a531d76d5d88825a31860959965c3af62',1,'fbgemm::PackMatrix::getRowOffsetBuffer()'],['../classfbgemm_1_1_pack_a_matrix.html#a531d76d5d88825a31860959965c3af62',1,'fbgemm::PackAMatrix::getRowOffsetBuffer()'],['../classfbgemm_1_1_pack_a_with_im2_col.html#a531d76d5d88825a31860959965c3af62',1,'fbgemm::PackAWithIm2Col::getRowOffsetBuffer()'],['../classfbgemm_1_1_pack_a_with_row_offset.html#a531d76d5d88825a31860959965c3af62',1,'fbgemm::PackAWithRowOffset::getRowOffsetBuffer()'],['../classfbgemm_1_1_pack_a_with_quant_row_offset.html#a531d76d5d88825a31860959965c3af62',1,'fbgemm::PackAWithQuantRowOffset::getRowOffsetBuffer()']]], - ['group_9',['Example Method Group',['../group__example-method-group.html',1,'']]] + ['gen_20ai_20attention_1',['Experimental-gen-ai-attention',['../group__experimental-gen-ai-attention.html',1,'']]], + ['gen16bitvectorone_2',['gen16BitVectorOne',['../namespacefbgemm.html#aa06c4dd5673e6b3df1dfe3617fdc919d',1,'fbgemm']]], + ['gen8bitvectorone_3',['gen8BitVectorOne',['../namespacefbgemm.html#a3c476c8ddbed58f3f6b4395dd55ed2a3',1,'fbgemm']]], + ['gencomputeblock_4',['genComputeBlock',['../classfbgemm_1_1_code_gen_base.html#a176924b076bd6485a83a0dd75c20cac6',1,'fbgemm::CodeGenBase::genComputeBlock(x86::Emitter *a, x86::Gp buffer_A, x86::Gp buffer_B, x86::Gp B_pf, int rowRegs, int colRegs, int lda)'],['../classfbgemm_1_1_code_gen_base.html#aa2af59748852e0ff4bfec95c946cbcc9',1,'fbgemm::CodeGenBase::genComputeBlock(x86::Emitter *a, x86::Gp buffer_A, x86::Gp buffer_B, x86::Gp B_pf, int rowRegs, int colRegs, int lda)'],['../classfbgemm_1_1_code_gen_base.html#a9727e9d8a35fccd581ad604006ea77fe',1,'fbgemm::CodeGenBase::genComputeBlock(x86::Emitter *a, x86::Gp buffer_A, x86::Gp buffer_B, x86::Gp, int rowRegs, int colRegs, int lda)'],['../classfbgemm_1_1_code_gen_base.html#a8c35863faf15810abfe3fe1cd432a687',1,'fbgemm::CodeGenBase::genComputeBlock(x86::Emitter *a, x86::Gp buffer_A, x86::Gp buffer_B, x86::Gp, int rowRegs, int colRegs, int lda)'],['../classfbgemm_1_1_code_gen_base.html#aa2af59748852e0ff4bfec95c946cbcc9',1,'fbgemm::CodeGenBase::genComputeBlock(x86::Emitter *a, x86::Gp buffer_A, x86::Gp buffer_B, x86::Gp B_pf, int rowRegs, int colRegs, int lda)'],['../classfbgemm_1_1_code_gen_base.html#a8c35863faf15810abfe3fe1cd432a687',1,'fbgemm::CodeGenBase::genComputeBlock(x86::Emitter *a, x86::Gp buffer_A, x86::Gp buffer_B, x86::Gp, int rowRegs, int colRegs, int lda)']]], + ['generateembeddingspmdm_5',['GenerateEmbeddingSpMDM',['../namespacefbgemm.html#a77602a69076f938d21d336a0df00f9c4',1,'fbgemm']]], + ['generateembeddingspmdmfp8withstrides_6',['GenerateEmbeddingSpMDMFP8WithStrides',['../namespacefbgemm.html#a57534e5ede9766d50e536437b499894d',1,'fbgemm']]], + ['generateembeddingspmdmnbit_7',['GenerateEmbeddingSpMDMNBit',['../namespacefbgemm.html#aa4e2948ec5f9097f552de8a0458d49e1',1,'fbgemm']]], + ['generateembeddingspmdmnbitrowwisesparse_8',['GenerateEmbeddingSpMDMNBitRowWiseSparse',['../namespacefbgemm.html#aa7d2220ddfc7d5c9820fd03b73101d37',1,'fbgemm']]], + ['generateembeddingspmdmnbitwithstrides_9',['GenerateEmbeddingSpMDMNBitWithStrides',['../namespacefbgemm.html#abb137866f8726f5c6cbf150ccc7e5b08',1,'fbgemm']]], + ['generateembeddingspmdmrowwisesparse_10',['GenerateEmbeddingSpMDMRowWiseSparse',['../namespacefbgemm.html#a6d6fe14c3ad83011adb500625ecbff01',1,'fbgemm']]], + ['generateembeddingspmdmwithstrides_11',['GenerateEmbeddingSpMDMWithStrides',['../namespacefbgemm.html#a243ed0e4df7bb7b6c08a930ee71a996b',1,'fbgemm']]], + ['generaterowwisesparseadagradfused_12',['GenerateRowWiseSparseAdaGradFused',['../namespacefbgemm.html#a90ceef30c1643dd1a87b1a0753b52e87',1,'fbgemm']]], + ['generic_13',['Quantization Utilities (Generic)',['../group__fbgemm-quant-utils-generic.html',1,'']]], + ['generic_5fhistogram_5fbinning_5fcalibration_5fby_5ffeature_5fcpu_14',['generic_histogram_binning_calibration_by_feature_cpu',['../group__sparse-data-cpu.html#gaef2a0a8c27e3b8b2d72be5c95ba7539e',1,'fbgemm_gpu']]], + ['genu8i8s32fma_15',['genU8I8S32FMA',['../namespacefbgemm.html#a2a15434be774f5beeb39e4fe225d6fb9',1,'fbgemm']]], + ['genu8sum4_16',['genU8Sum4',['../namespacefbgemm.html#aa9ddfdb1cd3e41712844257212fcb050',1,'fbgemm']]], + ['genu8sum8_17',['genU8Sum8',['../namespacefbgemm.html#a3535bf91ff758b3bd13929bf9f211c90',1,'fbgemm']]], + ['get_5funique_5findices_5fcuda_18',['get_unique_indices_cuda',['../group__table-batched-embed-cuda.html#ga4887151424a90cfd0abef174a4e91f3f',1,'get_unique_indices_cuda(at::Tensor linear_indices, int64_t max_indices, bool compute_count): linearize_cache_indices.cu'],['../group__table-batched-embed-cuda.html#ga4887151424a90cfd0abef174a4e91f3f',1,'get_unique_indices_cuda(Tensor linear_indices, int64_t max_indices, bool compute_count): linearize_cache_indices.cu']]], + ['getbuf_19',['getBuf',['../classfbgemm_1_1_pack_matrix.html#ac34c29cb4d372b728c2b8460e142269b',1,'fbgemm::PackMatrix::getBuf()'],['../classfbgemm_1_1_pack_weight_matrix_for_g_conv.html#a46f1fd2c0a84f4b0b40f2e907c1908a2',1,'fbgemm::PackWeightMatrixForGConv::getBuf()']]], + ['getcodeloggingfile_20',['getCodeLoggingFile',['../classfbgemm_1_1_code_gen_base.html#af4e14d6e1be8b47db233b7226e4b8e2a',1,'fbgemm::CodeGenBase']]], + ['getorcreate_21',['getOrCreate',['../classfbgemm_1_1_code_gen_base.html#af00d94622211a89e6568e05d9b63850a',1,'fbgemm::CodeGenBase::getOrCreate(bool accum, int32_t mc, int32_t nc, int32_t kc)'],['../classfbgemm_1_1_code_gen_base.html#a65dee023d21aee79a75508f08e51c403',1,'fbgemm::CodeGenBase::getOrCreate(bool accum, int32_t mc, int32_t nc, int32_t)'],['../classfbgemm_1_1_code_gen_base.html#ae20473007cc942d5263eed6677cfddee',1,'fbgemm::CodeGenBase::getOrCreate(bool accum, int32_t mc, int32_t nc, int32_t kc)'],['../classfbgemm_1_1_code_gen_base.html#a3ba99549f858b04f892c5edb2580aa07',1,'fbgemm::CodeGenBase::getOrCreate(bool accum, int32_t mc, int32_t nc, int32_t kc)'],['../classfbgemm_1_1_code_gen_base.html#ab13fc9ae80b3c7b6f7c95597b3eca012',1,'fbgemm::CodeGenBase::getOrCreate(bool accum, int32_t mc, int32_t nc, int32_t kc)'],['../classfbgemm_1_1_code_gen_base.html#af378fb7873c900315a548c788e5f3e75',1,'fbgemm::CodeGenBase::getOrCreate(bool accum, int32_t mc, int32_t nc, int32_t kc)'],['../classfbgemm_1_1_code_gen_base.html#a4050603658b0e5cf634fd9a989cb84bc',1,'fbgemm::CodeGenBase::getOrCreate(bool accum, int32_t mc, int32_t nc, int32_t kc)'],['../classfbgemm_1_1_code_gen_base.html#a4050603658b0e5cf634fd9a989cb84bc',1,'fbgemm::CodeGenBase::getOrCreate(bool accum, int32_t mc, int32_t nc, int32_t kc)']]], + ['getrowoffsetbuffer_22',['getRowOffsetBuffer',['../classfbgemm_1_1_pack_matrix.html#a531d76d5d88825a31860959965c3af62',1,'fbgemm::PackMatrix::getRowOffsetBuffer()'],['../classfbgemm_1_1_pack_a_matrix.html#a531d76d5d88825a31860959965c3af62',1,'fbgemm::PackAMatrix::getRowOffsetBuffer()'],['../classfbgemm_1_1_pack_a_with_im2_col.html#a531d76d5d88825a31860959965c3af62',1,'fbgemm::PackAWithIm2Col::getRowOffsetBuffer()'],['../classfbgemm_1_1_pack_a_with_row_offset.html#a531d76d5d88825a31860959965c3af62',1,'fbgemm::PackAWithRowOffset::getRowOffsetBuffer()'],['../classfbgemm_1_1_pack_a_with_quant_row_offset.html#a531d76d5d88825a31860959965c3af62',1,'fbgemm::PackAWithQuantRowOffset::getRowOffsetBuffer()']]], + ['gqa_5fattn_5fsplitk_5fcuda_23',['gqa_attn_splitk_cuda',['../group__experimental-gen-ai-attention.html#ga72225d1459d48465e83289c29df7447a',1,'fbgemm_gpu::gen_ai::attention']]], + ['group_24',['Example Method Group',['../group__example-method-group.html',1,'']]] ]; diff --git a/search/all_9.js b/search/all_9.js index c29fede06..f29db09b4 100644 --- a/search/all_9.js +++ b/search/all_9.js @@ -2,16 +2,23 @@ var searchData= [ ['ic_0',['IC',['../structfbgemm_1_1conv__param__t.html#aa1f2a9a2bf67f9d0cb26ec137fe48222',1,'fbgemm::conv_param_t']]], ['ics_1',['ICs',['../classfbgemm_1_1_compressed_sparse_column.html#a639afa1bb5bcdb507506ee50ef8f93e7',1,'fbgemm::CompressedSparseColumn']]], - ['in_5fdim_2',['IN_DIM',['../structfbgemm_1_1conv__param__t.html#a66c6bdf3479c1a76bf033361fe10d176',1,'fbgemm::conv_param_t']]], - ['input_20operators_3',['Combine Input Operators',['../group__input-combine.html',1,'']]], - ['int_5fnbit_5fsplit_5fembedding_5fuvm_5fcaching_5fcodegen_5flookup_5ffunction_4',['int_nbit_split_embedding_uvm_caching_codegen_lookup_function',['../group__embedding-cuda.html#gabbe880100f1036a979f3a8d8755447d0',1,'embedding_forward_quantized_host.cpp']]], - ['is_5f16or32bit_5',['is_16or32bit',['../structis__16or32bit.html',1,'']]], - ['is_5f8bit_6',['is_8bit',['../structfbgemm_1_1is__8bit.html',1,'fbgemm']]], - ['is_5fuvm_5ftensor_7',['is_uvm_tensor',['../group__cumem-utils.html#gacba28ed334d071e79c1ead1792391e9d',1,'fbgemm_gpu']]], - ['isa_8',['isA',['../classfbgemm_1_1_pack_matrix.html#a16921c92359f31fbdc4968c875c90ae2',1,'fbgemm::PackMatrix::isA()'],['../classfbgemm_1_1_pack_a_matrix.html#a16921c92359f31fbdc4968c875c90ae2',1,'fbgemm::PackAMatrix::isA()'],['../classfbgemm_1_1_pack_b_matrix.html#a16921c92359f31fbdc4968c875c90ae2',1,'fbgemm::PackBMatrix::isA()'],['../classfbgemm_1_1_pack_a_with_im2_col.html#a16921c92359f31fbdc4968c875c90ae2',1,'fbgemm::PackAWithIm2Col::isA()'],['../classfbgemm_1_1_pack_a_with_row_offset.html#a16921c92359f31fbdc4968c875c90ae2',1,'fbgemm::PackAWithRowOffset::isA()'],['../classfbgemm_1_1_pack_a_with_quant_row_offset.html#a16921c92359f31fbdc4968c875c90ae2',1,'fbgemm::PackAWithQuantRowOffset::isA()']]], - ['ishypersparse_9',['IsHyperSparse',['../classfbgemm_1_1_compressed_sparse_column.html#a5deac9b9ff0e1d7b22c7a887d40b4c8c',1,'fbgemm::CompressedSparseColumn']]], - ['ispackingcompliant_10',['isPackingCompliant',['../classfbgemm_1_1_pack_weights_for_conv.html#a5e78c80fc33d5b40be198d920a194193',1,'fbgemm::PackWeightsForConv']]], - ['isprepacked_11',['isPrePacked',['../classfbgemm_1_1_pack_matrix.html#a354db6b6efb4336767afa41583a96082',1,'fbgemm::PackMatrix::isPrePacked()'],['../classfbgemm_1_1_pack_a_matrix.html#a354db6b6efb4336767afa41583a96082',1,'fbgemm::PackAMatrix::isPrePacked()'],['../classfbgemm_1_1_pack_b_matrix.html#a354db6b6efb4336767afa41583a96082',1,'fbgemm::PackBMatrix::isPrePacked()'],['../classfbgemm_1_1_pack_a_with_im2_col.html#a354db6b6efb4336767afa41583a96082',1,'fbgemm::PackAWithIm2Col::isPrePacked()'],['../classfbgemm_1_1_pack_a_with_row_offset.html#a354db6b6efb4336767afa41583a96082',1,'fbgemm::PackAWithRowOffset::isPrePacked()'],['../classfbgemm_1_1_pack_a_with_quant_row_offset.html#a354db6b6efb4336767afa41583a96082',1,'fbgemm::PackAWithQuantRowOffset::isPrePacked()']]], - ['istherecolremainder_12',['isThereColRemainder',['../classfbgemm_1_1_pack_matrix.html#a0fea05b14052070fcc8f2f5a9a829d0f',1,'fbgemm::PackMatrix']]], - ['isthislastkblock_13',['isThisLastKBlock',['../classfbgemm_1_1_pack_matrix.html#af38b0669b7bdf219aa56a9a587f4dbaa',1,'fbgemm::PackMatrix::isThisLastKBlock()'],['../classfbgemm_1_1_pack_b_matrix.html#a231aae141b5263a766275bb3236d297d',1,'fbgemm::PackBMatrix::isThisLastKBlock()']]] + ['impl_5ftype_5ft_2',['impl_type_t',['../namespacefbgemm.html#a5356ce4b0771923d6eee8b3692afd2f3',1,'fbgemm']]], + ['in_5fdim_3',['IN_DIM',['../structfbgemm_1_1conv__param__t.html#a66c6bdf3479c1a76bf033361fe10d176',1,'fbgemm::conv_param_t']]], + ['initcregs_4',['initCRegs',['../namespacefbgemm.html#a3a50b707287c0456d23e735846b144c0',1,'fbgemm']]], + ['input_20operators_5',['Combine Input Operators',['../group__input-combine.html',1,'']]], + ['inst_5fset_5ft_6',['inst_set_t',['../namespacefbgemm.html#a637f4382ad9fa1da0795e05672b74301',1,'fbgemm']]], + ['int_5fnbit_5fsplit_5fembedding_5fuvm_5fcaching_5fcodegen_5flookup_5ffunction_7',['int_nbit_split_embedding_uvm_caching_codegen_lookup_function',['../group__embedding-cuda.html#gabbe880100f1036a979f3a8d8755447d0',1,'embedding_forward_quantized_host.cpp']]], + ['is_5f16or32bit_8',['is_16or32bit',['../structis__16or32bit.html',1,'']]], + ['is_5f8bit_9',['is_8bit',['../structfbgemm_1_1is__8bit.html',1,'fbgemm']]], + ['is_5fautovec_5fdisabled_10',['is_autovec_disabled',['../namespacefbgemm.html#a4bd183ba7e59151ac6bff236729d4a41',1,'fbgemm']]], + ['is_5fradix_5fsort_5faccelerated_5fwith_5fopenmp_11',['is_radix_sort_accelerated_with_openmp',['../namespacefbgemm.html#af24ff1c82832652af861c3634486513a',1,'fbgemm']]], + ['is_5fuvm_5ftensor_12',['is_uvm_tensor',['../group__cumem-utils.html#gacba28ed334d071e79c1ead1792391e9d',1,'fbgemm_gpu']]], + ['isa_13',['isA',['../classfbgemm_1_1_pack_matrix.html#a16921c92359f31fbdc4968c875c90ae2',1,'fbgemm::PackMatrix::isA()'],['../classfbgemm_1_1_pack_a_matrix.html#a16921c92359f31fbdc4968c875c90ae2',1,'fbgemm::PackAMatrix::isA()'],['../classfbgemm_1_1_pack_b_matrix.html#a16921c92359f31fbdc4968c875c90ae2',1,'fbgemm::PackBMatrix::isA()'],['../classfbgemm_1_1_pack_a_with_im2_col.html#a16921c92359f31fbdc4968c875c90ae2',1,'fbgemm::PackAWithIm2Col::isA()'],['../classfbgemm_1_1_pack_a_with_row_offset.html#a16921c92359f31fbdc4968c875c90ae2',1,'fbgemm::PackAWithRowOffset::isA()'],['../classfbgemm_1_1_pack_a_with_quant_row_offset.html#a16921c92359f31fbdc4968c875c90ae2',1,'fbgemm::PackAWithQuantRowOffset::isA()']]], + ['ishypersparse_14',['IsHyperSparse',['../classfbgemm_1_1_compressed_sparse_column.html#a5deac9b9ff0e1d7b22c7a887d40b4c8c',1,'fbgemm::CompressedSparseColumn']]], + ['ispackingcompliant_15',['isPackingCompliant',['../classfbgemm_1_1_pack_weights_for_conv.html#a5e78c80fc33d5b40be198d920a194193',1,'fbgemm::PackWeightsForConv']]], + ['isprepacked_16',['isPrePacked',['../classfbgemm_1_1_pack_matrix.html#a354db6b6efb4336767afa41583a96082',1,'fbgemm::PackMatrix::isPrePacked()'],['../classfbgemm_1_1_pack_a_matrix.html#a354db6b6efb4336767afa41583a96082',1,'fbgemm::PackAMatrix::isPrePacked()'],['../classfbgemm_1_1_pack_b_matrix.html#a354db6b6efb4336767afa41583a96082',1,'fbgemm::PackBMatrix::isPrePacked()'],['../classfbgemm_1_1_pack_a_with_im2_col.html#a354db6b6efb4336767afa41583a96082',1,'fbgemm::PackAWithIm2Col::isPrePacked()'],['../classfbgemm_1_1_pack_a_with_row_offset.html#a354db6b6efb4336767afa41583a96082',1,'fbgemm::PackAWithRowOffset::isPrePacked()'],['../classfbgemm_1_1_pack_a_with_quant_row_offset.html#a354db6b6efb4336767afa41583a96082',1,'fbgemm::PackAWithQuantRowOffset::isPrePacked()']]], + ['istherecolremainder_17',['isThereColRemainder',['../classfbgemm_1_1_pack_matrix.html#a0fea05b14052070fcc8f2f5a9a829d0f',1,'fbgemm::PackMatrix']]], + ['isthislastkblock_18',['isThisLastKBlock',['../classfbgemm_1_1_pack_matrix.html#af38b0669b7bdf219aa56a9a587f4dbaa',1,'fbgemm::PackMatrix::isThisLastKBlock()'],['../classfbgemm_1_1_pack_b_matrix.html#a231aae141b5263a766275bb3236d297d',1,'fbgemm::PackBMatrix::isThisLastKBlock()']]], + ['isymm_19',['isYmm',['../namespacefbgemm.html#adac821292975979b386dc3ab1b234a37',1,'fbgemm']]], + ['iszmm_20',['isZmm',['../namespacefbgemm.html#a26137f070019d80935a34fe466ac85c4',1,'fbgemm']]] ]; diff --git a/search/all_d.js b/search/all_d.js index 364ae2fca..69ca33cb0 100644 --- a/search/all_d.js +++ b/search/all_d.js @@ -1,13 +1,16 @@ var searchData= [ - ['mb_0',['MB',['../structfbgemm_1_1conv__param__t.html#ae28122c4c103b2e65c7c8b1b6d6c35ca',1,'fbgemm::conv_param_t']]], - ['mcb_1',['MCB',['../struct_packing_traits_3_01_t_00_01std_1_1int32__t_00_01inst__set__t_1_1avx2_00_01typename_01std_fe37d46c6e9c6ab5afbe4d3665c382fb.html#a7eea94d5b967637ff9e474a421ae14e2',1,'PackingTraits< T, std::int32_t, inst_set_t::avx2, typename std::enable_if< is_8bit< T >::value >::type >::MCB'],['../struct_packing_traits_3_01_t_00_01std_1_1int16__t_00_01inst__set__t_1_1avx2_00_01typename_01std_858291a64a7808d94f01c15180f04f2c.html#a7eea94d5b967637ff9e474a421ae14e2',1,'PackingTraits< T, std::int16_t, inst_set_t::avx2, typename std::enable_if< is_8bit< T >::value >::type >::MCB'],['../struct_packing_traits_3_01float_00_01float_00_01inst__set__t_1_1avx2_01_4.html#a7eea94d5b967637ff9e474a421ae14e2',1,'PackingTraits< float, float, inst_set_t::avx2 >::MCB'],['../struct_packing_traits_3_01_t_00_01std_1_1int32__t_00_01inst__set__t_1_1avx512_00_01typename_01st563fe14c40d9d54cf9fe6113c26e66c0.html#a7eea94d5b967637ff9e474a421ae14e2',1,'PackingTraits< T, std::int32_t, inst_set_t::avx512, typename std::enable_if< is_8bit< T >::value >::type >::MCB'],['../struct_packing_traits_3_01_t_00_01std_1_1int32__t_00_01inst__set__t_1_1avx512__ymm_00_01typename23c91419ea08f5673443445db549693f.html#a7eea94d5b967637ff9e474a421ae14e2',1,'PackingTraits< T, std::int32_t, inst_set_t::avx512_ymm, typename std::enable_if< is_8bit< T >::value >::type >::MCB'],['../struct_packing_traits_3_01_t_00_01std_1_1int16__t_00_01inst__set__t_1_1avx512_00_01typename_01sta3c205cd2e965b8e751c31d57cbb32f1.html#a7eea94d5b967637ff9e474a421ae14e2',1,'PackingTraits< T, std::int16_t, inst_set_t::avx512, typename std::enable_if< is_8bit< T >::value >::type >::MCB'],['../struct_packing_traits_3_01_t_00_01std_1_1int16__t_00_01inst__set__t_1_1avx512__ymm_00_01typenamea22ccba6542408684108d40af5374bf6.html#a7eea94d5b967637ff9e474a421ae14e2',1,'PackingTraits< T, std::int16_t, inst_set_t::avx512_ymm, typename std::enable_if< is_8bit< T >::value >::type >::MCB'],['../struct_packing_traits_3_01_t_00_01acc_t_00_01inst__set__t_1_1avx512__vnni_00_01typename_01std_1_495ec4d719e603d9e79f7a55acd55e37.html#a7eea94d5b967637ff9e474a421ae14e2',1,'PackingTraits< T, accT, inst_set_t::avx512_vnni, typename std::enable_if< is_8bit< T >::value &&is_16or32bit< accT >::value >::type >::MCB'],['../struct_packing_traits_3_01_t_00_01acc_t_00_01inst__set__t_1_1avx512__vnni__ymm_00_01typename_01s0dc87ea23078ad687de8b8ea67c6d3f3.html#a7eea94d5b967637ff9e474a421ae14e2',1,'PackingTraits< T, accT, inst_set_t::avx512_vnni_ymm, typename std::enable_if< is_8bit< T >::value &&is_16or32bit< accT >::value >::type >::MCB'],['../struct_packing_traits_3_01int64__t_00_01int64__t_00_01inst__set__t_1_1avx512_01_4.html#a7eea94d5b967637ff9e474a421ae14e2',1,'PackingTraits< int64_t, int64_t, inst_set_t::avx512 >::MCB']]], - ['memcopy_2',['memCopy',['../classfbgemm_1_1mem_copy.html',1,'fbgemm']]], - ['memory_20operators_3',['CUDA Memory Operators',['../group__cumem-utils.html',1,'']]], - ['merge_20operators_4',['Merge Operators',['../group__merge-pooled-emb.html',1,'']]], - ['metaequals_5',['metaEquals',['../classfbgemm_1_1_pack_b_matrix.html#aa03364175cb684a60f52bc80215e907b',1,'fbgemm::PackBMatrix']]], - ['method_20group_6',['Example Method Group',['../group__example-method-group.html',1,'']]], - ['mismatchingparams_7',['mismatchingParams',['../classfbgemm_1_1_pack_weights_for_conv.html#ac8508d632e224b9a8ee2432c5b012393',1,'fbgemm::PackWeightsForConv']]], - ['mr_8',['MR',['../struct_packing_traits_3_01_t_00_01std_1_1int32__t_00_01inst__set__t_1_1avx2_00_01typename_01std_fe37d46c6e9c6ab5afbe4d3665c382fb.html#a79b17319ff1245f18d73191560b2b506',1,'PackingTraits< T, std::int32_t, inst_set_t::avx2, typename std::enable_if< is_8bit< T >::value >::type >::MR'],['../struct_packing_traits_3_01_t_00_01std_1_1int16__t_00_01inst__set__t_1_1avx2_00_01typename_01std_858291a64a7808d94f01c15180f04f2c.html#a79b17319ff1245f18d73191560b2b506',1,'PackingTraits< T, std::int16_t, inst_set_t::avx2, typename std::enable_if< is_8bit< T >::value >::type >::MR'],['../struct_packing_traits_3_01float_00_01float_00_01inst__set__t_1_1avx2_01_4.html#a79b17319ff1245f18d73191560b2b506',1,'PackingTraits< float, float, inst_set_t::avx2 >::MR'],['../struct_packing_traits_3_01_t_00_01std_1_1int32__t_00_01inst__set__t_1_1avx512_00_01typename_01st563fe14c40d9d54cf9fe6113c26e66c0.html#a79b17319ff1245f18d73191560b2b506',1,'PackingTraits< T, std::int32_t, inst_set_t::avx512, typename std::enable_if< is_8bit< T >::value >::type >::MR'],['../struct_packing_traits_3_01_t_00_01std_1_1int32__t_00_01inst__set__t_1_1avx512__ymm_00_01typename23c91419ea08f5673443445db549693f.html#a79b17319ff1245f18d73191560b2b506',1,'PackingTraits< T, std::int32_t, inst_set_t::avx512_ymm, typename std::enable_if< is_8bit< T >::value >::type >::MR'],['../struct_packing_traits_3_01_t_00_01std_1_1int16__t_00_01inst__set__t_1_1avx512_00_01typename_01sta3c205cd2e965b8e751c31d57cbb32f1.html#a79b17319ff1245f18d73191560b2b506',1,'PackingTraits< T, std::int16_t, inst_set_t::avx512, typename std::enable_if< is_8bit< T >::value >::type >::MR'],['../struct_packing_traits_3_01_t_00_01std_1_1int16__t_00_01inst__set__t_1_1avx512__ymm_00_01typenamea22ccba6542408684108d40af5374bf6.html#a79b17319ff1245f18d73191560b2b506',1,'PackingTraits< T, std::int16_t, inst_set_t::avx512_ymm, typename std::enable_if< is_8bit< T >::value >::type >::MR'],['../struct_packing_traits_3_01_t_00_01acc_t_00_01inst__set__t_1_1avx512__vnni_00_01typename_01std_1_495ec4d719e603d9e79f7a55acd55e37.html#a79b17319ff1245f18d73191560b2b506',1,'PackingTraits< T, accT, inst_set_t::avx512_vnni, typename std::enable_if< is_8bit< T >::value &&is_16or32bit< accT >::value >::type >::MR'],['../struct_packing_traits_3_01_t_00_01acc_t_00_01inst__set__t_1_1avx512__vnni__ymm_00_01typename_01s0dc87ea23078ad687de8b8ea67c6d3f3.html#a79b17319ff1245f18d73191560b2b506',1,'PackingTraits< T, accT, inst_set_t::avx512_vnni_ymm, typename std::enable_if< is_8bit< T >::value &&is_16or32bit< accT >::value >::type >::MR'],['../struct_packing_traits_3_01int64__t_00_01int64__t_00_01inst__set__t_1_1avx512_01_4.html#a79b17319ff1245f18d73191560b2b506',1,'PackingTraits< int64_t, int64_t, inst_set_t::avx512 >::MR']]], - ['multiplier_9',['multiplier',['../structfbgemm_1_1_requantization_params.html#a3c61c2609f8bc23b8df280bd531f6515',1,'fbgemm::RequantizationParams']]] + ['matmul_5fu8i8acc16_5fref_0',['matmul_u8i8acc16_ref',['../namespacefbgemm.html#a826abc2e81aabe4c0059dd34abe8de0d',1,'fbgemm']]], + ['matmul_5fu8i8acc32_5fref_1',['matmul_u8i8acc32_ref',['../namespacefbgemm.html#a28f838d0d9db9969debe82780183bd9a',1,'fbgemm']]], + ['matrix_5fop_5ft_2',['matrix_op_t',['../namespacefbgemm.html#ae28c9cc48e43e99cb778d60ba35c0fbf',1,'fbgemm']]], + ['mb_3',['MB',['../structfbgemm_1_1conv__param__t.html#ae28122c4c103b2e65c7c8b1b6d6c35ca',1,'fbgemm::conv_param_t']]], + ['mcb_4',['MCB',['../struct_packing_traits_3_01_t_00_01std_1_1int32__t_00_01inst__set__t_1_1avx2_00_01typename_01std_fe37d46c6e9c6ab5afbe4d3665c382fb.html#a7eea94d5b967637ff9e474a421ae14e2',1,'PackingTraits< T, std::int32_t, inst_set_t::avx2, typename std::enable_if< is_8bit< T >::value >::type >::MCB'],['../struct_packing_traits_3_01_t_00_01std_1_1int16__t_00_01inst__set__t_1_1avx2_00_01typename_01std_858291a64a7808d94f01c15180f04f2c.html#a7eea94d5b967637ff9e474a421ae14e2',1,'PackingTraits< T, std::int16_t, inst_set_t::avx2, typename std::enable_if< is_8bit< T >::value >::type >::MCB'],['../struct_packing_traits_3_01float_00_01float_00_01inst__set__t_1_1avx2_01_4.html#a7eea94d5b967637ff9e474a421ae14e2',1,'PackingTraits< float, float, inst_set_t::avx2 >::MCB'],['../struct_packing_traits_3_01_t_00_01std_1_1int32__t_00_01inst__set__t_1_1avx512_00_01typename_01st563fe14c40d9d54cf9fe6113c26e66c0.html#a7eea94d5b967637ff9e474a421ae14e2',1,'PackingTraits< T, std::int32_t, inst_set_t::avx512, typename std::enable_if< is_8bit< T >::value >::type >::MCB'],['../struct_packing_traits_3_01_t_00_01std_1_1int32__t_00_01inst__set__t_1_1avx512__ymm_00_01typename23c91419ea08f5673443445db549693f.html#a7eea94d5b967637ff9e474a421ae14e2',1,'PackingTraits< T, std::int32_t, inst_set_t::avx512_ymm, typename std::enable_if< is_8bit< T >::value >::type >::MCB'],['../struct_packing_traits_3_01_t_00_01std_1_1int16__t_00_01inst__set__t_1_1avx512_00_01typename_01sta3c205cd2e965b8e751c31d57cbb32f1.html#a7eea94d5b967637ff9e474a421ae14e2',1,'PackingTraits< T, std::int16_t, inst_set_t::avx512, typename std::enable_if< is_8bit< T >::value >::type >::MCB'],['../struct_packing_traits_3_01_t_00_01std_1_1int16__t_00_01inst__set__t_1_1avx512__ymm_00_01typenamea22ccba6542408684108d40af5374bf6.html#a7eea94d5b967637ff9e474a421ae14e2',1,'PackingTraits< T, std::int16_t, inst_set_t::avx512_ymm, typename std::enable_if< is_8bit< T >::value >::type >::MCB'],['../struct_packing_traits_3_01_t_00_01acc_t_00_01inst__set__t_1_1avx512__vnni_00_01typename_01std_1_495ec4d719e603d9e79f7a55acd55e37.html#a7eea94d5b967637ff9e474a421ae14e2',1,'PackingTraits< T, accT, inst_set_t::avx512_vnni, typename std::enable_if< is_8bit< T >::value &&is_16or32bit< accT >::value >::type >::MCB'],['../struct_packing_traits_3_01_t_00_01acc_t_00_01inst__set__t_1_1avx512__vnni__ymm_00_01typename_01s0dc87ea23078ad687de8b8ea67c6d3f3.html#a7eea94d5b967637ff9e474a421ae14e2',1,'PackingTraits< T, accT, inst_set_t::avx512_vnni_ymm, typename std::enable_if< is_8bit< T >::value &&is_16or32bit< accT >::value >::type >::MCB'],['../struct_packing_traits_3_01int64__t_00_01int64__t_00_01inst__set__t_1_1avx512_01_4.html#a7eea94d5b967637ff9e474a421ae14e2',1,'PackingTraits< int64_t, int64_t, inst_set_t::avx512 >::MCB']]], + ['memcopy_5',['memCopy',['../classfbgemm_1_1mem_copy.html',1,'fbgemm']]], + ['memory_20operators_6',['CUDA Memory Operators',['../group__cumem-utils.html',1,'']]], + ['merge_20operators_7',['Merge Operators',['../group__merge-pooled-emb.html',1,'']]], + ['metaequals_8',['metaEquals',['../classfbgemm_1_1_pack_b_matrix.html#aa03364175cb684a60f52bc80215e907b',1,'fbgemm::PackBMatrix']]], + ['method_20group_9',['Example Method Group',['../group__example-method-group.html',1,'']]], + ['mismatchingparams_10',['mismatchingParams',['../classfbgemm_1_1_pack_weights_for_conv.html#ac8508d632e224b9a8ee2432c5b012393',1,'fbgemm::PackWeightsForConv']]], + ['mr_11',['MR',['../struct_packing_traits_3_01_t_00_01std_1_1int32__t_00_01inst__set__t_1_1avx2_00_01typename_01std_fe37d46c6e9c6ab5afbe4d3665c382fb.html#a79b17319ff1245f18d73191560b2b506',1,'PackingTraits< T, std::int32_t, inst_set_t::avx2, typename std::enable_if< is_8bit< T >::value >::type >::MR'],['../struct_packing_traits_3_01_t_00_01std_1_1int16__t_00_01inst__set__t_1_1avx2_00_01typename_01std_858291a64a7808d94f01c15180f04f2c.html#a79b17319ff1245f18d73191560b2b506',1,'PackingTraits< T, std::int16_t, inst_set_t::avx2, typename std::enable_if< is_8bit< T >::value >::type >::MR'],['../struct_packing_traits_3_01float_00_01float_00_01inst__set__t_1_1avx2_01_4.html#a79b17319ff1245f18d73191560b2b506',1,'PackingTraits< float, float, inst_set_t::avx2 >::MR'],['../struct_packing_traits_3_01_t_00_01std_1_1int32__t_00_01inst__set__t_1_1avx512_00_01typename_01st563fe14c40d9d54cf9fe6113c26e66c0.html#a79b17319ff1245f18d73191560b2b506',1,'PackingTraits< T, std::int32_t, inst_set_t::avx512, typename std::enable_if< is_8bit< T >::value >::type >::MR'],['../struct_packing_traits_3_01_t_00_01std_1_1int32__t_00_01inst__set__t_1_1avx512__ymm_00_01typename23c91419ea08f5673443445db549693f.html#a79b17319ff1245f18d73191560b2b506',1,'PackingTraits< T, std::int32_t, inst_set_t::avx512_ymm, typename std::enable_if< is_8bit< T >::value >::type >::MR'],['../struct_packing_traits_3_01_t_00_01std_1_1int16__t_00_01inst__set__t_1_1avx512_00_01typename_01sta3c205cd2e965b8e751c31d57cbb32f1.html#a79b17319ff1245f18d73191560b2b506',1,'PackingTraits< T, std::int16_t, inst_set_t::avx512, typename std::enable_if< is_8bit< T >::value >::type >::MR'],['../struct_packing_traits_3_01_t_00_01std_1_1int16__t_00_01inst__set__t_1_1avx512__ymm_00_01typenamea22ccba6542408684108d40af5374bf6.html#a79b17319ff1245f18d73191560b2b506',1,'PackingTraits< T, std::int16_t, inst_set_t::avx512_ymm, typename std::enable_if< is_8bit< T >::value >::type >::MR'],['../struct_packing_traits_3_01_t_00_01acc_t_00_01inst__set__t_1_1avx512__vnni_00_01typename_01std_1_495ec4d719e603d9e79f7a55acd55e37.html#a79b17319ff1245f18d73191560b2b506',1,'PackingTraits< T, accT, inst_set_t::avx512_vnni, typename std::enable_if< is_8bit< T >::value &&is_16or32bit< accT >::value >::type >::MR'],['../struct_packing_traits_3_01_t_00_01acc_t_00_01inst__set__t_1_1avx512__vnni__ymm_00_01typename_01s0dc87ea23078ad687de8b8ea67c6d3f3.html#a79b17319ff1245f18d73191560b2b506',1,'PackingTraits< T, accT, inst_set_t::avx512_vnni_ymm, typename std::enable_if< is_8bit< T >::value &&is_16or32bit< accT >::value >::type >::MR'],['../struct_packing_traits_3_01int64__t_00_01int64__t_00_01inst__set__t_1_1avx512_01_4.html#a79b17319ff1245f18d73191560b2b506',1,'PackingTraits< int64_t, int64_t, inst_set_t::avx512 >::MR']]], + ['multiplier_12',['multiplier',['../structfbgemm_1_1_requantization_params.html#a3c61c2609f8bc23b8df280bd531f6515',1,'fbgemm::RequantizationParams']]] ]; diff --git a/search/all_f.js b/search/all_f.js index cafbd2d51..f679c9d2c 100644 --- a/search/all_f.js +++ b/search/all_f.js @@ -3,5 +3,6 @@ var searchData= ['oc_0',['OC',['../structfbgemm_1_1conv__param__t.html#af005c0a5ba16f86ce90490d51d61e92e',1,'fbgemm::conv_param_t']]], ['operators_1',['Operators',['../group__input-combine.html',1,'Combine Input Operators'],['../group__cumem-utils.html',1,'CUDA Memory Operators'],['../group__table-batched-embed-cuda.html',1,'CUDA Operators'],['../group__embedding-cpu.html',1,'Embedding CPU Operators'],['../group__embedding-cuda.html',1,'Embedding CUDA Operators'],['../group__jagged-tensor-ops-cuda.html',1,'Jagged Tensor CUDA Operators'],['../group__jagged-tensor-ops-cpu.html',1,'Jagged Tensor Operators'],['../group__layout-transform-cpu.html',1,'Layout Transformation CPU Operators'],['../group__layout-transform-cuda.html',1,'Layout Transformation CUDA Operators'],['../group__merge-pooled-emb.html',1,'Merge Operators'],['../group__quantize-data-cpu.html',1,'Quantize Data CPU Operators'],['../group__sparse-data-cpu.html',1,'Sparse Data CPU Operators'],['../group__sparse-data-cuda.html',1,'Sparse Data CUDA Operators']]], ['operators_20cpu_2',['Permute Pooled Embeddings Operators (CPU)',['../group__permute-pooled-embs-cpu.html',1,'']]], - ['operators_20cuda_3',['Operators CUDA',['../group__permute-pooled-embs-gpu.html',1,'Permute Pooled Embeddings Operators (CUDA)'],['../group__quantize-ops-cuda.html',1,'Quantization Operators (CUDA)']]] + ['operators_20cuda_3',['Operators CUDA',['../group__permute-pooled-embs-gpu.html',1,'Permute Pooled Embeddings Operators (CUDA)'],['../group__quantize-ops-cuda.html',1,'Quantization Operators (CUDA)']]], + ['optimized_5fconv_5ft_4',['optimized_conv_t',['../namespacefbgemm.html#ae779e18e5742efa69f340bcb616acdb2',1,'fbgemm']]] ]; diff --git a/search/enums_0.js b/search/enums_0.js new file mode 100644 index 000000000..b7fcfc9f8 --- /dev/null +++ b/search/enums_0.js @@ -0,0 +1,5 @@ +var searchData= +[ + ['impl_5ftype_5ft_0',['impl_type_t',['../namespacefbgemm.html#a5356ce4b0771923d6eee8b3692afd2f3',1,'fbgemm']]], + ['inst_5fset_5ft_1',['inst_set_t',['../namespacefbgemm.html#a637f4382ad9fa1da0795e05672b74301',1,'fbgemm']]] +]; diff --git a/search/enums_1.js b/search/enums_1.js new file mode 100644 index 000000000..c86f2f5a0 --- /dev/null +++ b/search/enums_1.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['matrix_5fop_5ft_0',['matrix_op_t',['../namespacefbgemm.html#ae28c9cc48e43e99cb778d60ba35c0fbf',1,'fbgemm']]] +]; diff --git a/search/enums_2.js b/search/enums_2.js new file mode 100644 index 000000000..40692ebc3 --- /dev/null +++ b/search/enums_2.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['optimized_5fconv_5ft_0',['optimized_conv_t',['../namespacefbgemm.html#ae779e18e5742efa69f340bcb616acdb2',1,'fbgemm']]] +]; diff --git a/search/functions_10.js b/search/functions_10.js index e671de328..fbf75f060 100644 --- a/search/functions_10.js +++ b/search/functions_10.js @@ -1,8 +1,15 @@ var searchData= [ - ['requantizeforfloat_0',['ReQuantizeForFloat',['../classfbgemm_1_1_re_quantize_for_float.html#a7ac7e62127705921ee912811a72697c6',1,'fbgemm::ReQuantizeForFloat']]], - ['requantizeoutput_1',['ReQuantizeOutput',['../classfbgemm_1_1_re_quantize_output.html#ab36806f951ba9ce3733448d78633de16',1,'fbgemm::ReQuantizeOutput']]], - ['requantizeoutputprocessingavx2_2',['requantizeOutputProcessingAvx2',['../group__fbgemm-quant-utils-avx2.html#ga92e2b96889b039f101e24855e163021b',1,'fbgemm']]], - ['requantizeoutputprocessinggconvavx512_3',['requantizeOutputProcessingGConvAvx512',['../group__fbgemm-quant-utils-avx512.html#gab1b2ed3537f97d130f8ed039bc9aa463',1,'fbgemm']]], - ['rowoffsetbuffersize_4',['rowOffsetBufferSize',['../classfbgemm_1_1_pack_a_with_im2_col.html#aa77e3ad795b908ab26006e954e19fa19',1,'fbgemm::PackAWithIm2Col::rowOffsetBufferSize()'],['../classfbgemm_1_1_pack_a_with_row_offset.html#aa77e3ad795b908ab26006e954e19fa19',1,'fbgemm::PackAWithRowOffset::rowOffsetBufferSize()'],['../classfbgemm_1_1_pack_a_with_quant_row_offset.html#aa77e3ad795b908ab26006e954e19fa19',1,'fbgemm::PackAWithQuantRowOffset::rowOffsetBufferSize()']]] + ['radix_5fsort_5fparallel_0',['radix_sort_parallel',['../namespacefbgemm.html#a3d34903420acd55fcebd0f8a19d8a84d',1,'fbgemm']]], + ['reduceavx2_1',['reduceAvx2',['../namespacefbgemm.html#ac2d8c325cbc2893ed9a32f71c6a3596b',1,'fbgemm']]], + ['requantize_5fu8acc32_5fref_2',['requantize_u8acc32_ref',['../namespacefbgemm.html#ab87b23be5587e267db6ab7d2b97c6915',1,'fbgemm::requantize_u8acc32_ref(int M, int N, int ld, const std::int32_t *inp, std::uint8_t *out, std::int32_t C_multiplier, std::int32_t C_right_shift, std::int32_t C_zero_point, std::int32_t A_zero_point, std::int32_t B_zero_point, const std::int32_t *row_offsets, const std::int32_t *col_offsets, const std::int32_t *bias, bool fuse_relu=false)'],['../namespacefbgemm.html#a798fea9136d48e1cd4c8a2926fb869ed',1,'fbgemm::requantize_u8acc32_ref(int M, int N, int ld, const std::int32_t *inp, std::uint8_t *out, const float *C_multiplier, std::int32_t C_zero_point, std::int32_t A_zero_point, const std::int32_t *B_zero_point, const std::int32_t *row_offsets, const std::int32_t *col_offsets, const std::int32_t *bias, int ncols_per_quant_group, bool fuse_relu=false)']]], + ['requantizeforfloat_3',['ReQuantizeForFloat',['../classfbgemm_1_1_re_quantize_for_float.html#a7ac7e62127705921ee912811a72697c6',1,'fbgemm::ReQuantizeForFloat']]], + ['requantizeoutput_4',['ReQuantizeOutput',['../classfbgemm_1_1_re_quantize_output.html#ab36806f951ba9ce3733448d78633de16',1,'fbgemm::ReQuantizeOutput']]], + ['requantizeoutputprocessingavx2_5',['requantizeOutputProcessingAvx2',['../group__fbgemm-quant-utils-avx2.html#ga92e2b96889b039f101e24855e163021b',1,'fbgemm']]], + ['requantizeoutputprocessinggconvavx512_6',['requantizeOutputProcessingGConvAvx512',['../group__fbgemm-quant-utils-avx512.html#gab1b2ed3537f97d130f8ed039bc9aa463',1,'fbgemm']]], + ['roundtofloat16_7',['RoundToFloat16',['../namespacefbgemm.html#a3bf47d3d99c8b3cb2af625d90c5494ab',1,'fbgemm']]], + ['row_5foffsets_5fu8acc32_5fref_8',['row_offsets_u8acc32_ref',['../namespacefbgemm.html#a0a160cf468a51c4634688b4f43851324',1,'fbgemm']]], + ['rowoffsetbuffersize_9',['rowOffsetBufferSize',['../classfbgemm_1_1_pack_a_with_im2_col.html#aa77e3ad795b908ab26006e954e19fa19',1,'fbgemm::PackAWithIm2Col::rowOffsetBufferSize()'],['../classfbgemm_1_1_pack_a_with_row_offset.html#aa77e3ad795b908ab26006e954e19fa19',1,'fbgemm::PackAWithRowOffset::rowOffsetBufferSize()'],['../classfbgemm_1_1_pack_a_with_quant_row_offset.html#aa77e3ad795b908ab26006e954e19fa19',1,'fbgemm::PackAWithQuantRowOffset::rowOffsetBufferSize()']]], + ['rowoffsetbuffersizegconv_10',['rowOffsetBufferSizeGConv',['../namespacefbgemm.html#a8f972dca3254066120f58af5cf3b304c',1,'fbgemm']]], + ['rowwise_5fsparse_5fadagrad_5fref_11',['rowwise_sparse_adagrad_ref',['../namespacefbgemm.html#a3bee8daea3756d030209a6815db314d9',1,'fbgemm']]] ]; diff --git a/search/functions_11.js b/search/functions_11.js index 2555a4458..a442497ea 100644 --- a/search/functions_11.js +++ b/search/functions_11.js @@ -1,5 +1,9 @@ var searchData= [ - ['spmdm_0',['SpMDM',['../classfbgemm_1_1_compressed_sparse_column.html#a9f8530a8442a8fd99bfe3896d0fff5de',1,'fbgemm::CompressedSparseColumn']]], - ['storecregs_1',['storeCRegs',['../classfbgemm_1_1_code_gen_base.html#a01bcc02f063a515df6d7fda518ef1d12',1,'fbgemm::CodeGenBase::storeCRegs(x86::Emitter *a, int rowRegs, int colRegs, x86::Gp C_Offset, x86::Gp ldcReg, bool accum)'],['../classfbgemm_1_1_code_gen_base.html#a12a9f2428ed6fd0dd90c91fd4477e271',1,'fbgemm::CodeGenBase::storeCRegs(x86::Emitter *a, int rowRegs, int colRegs, x86::Gp C_Offset, x86::Gp ldcReg, bool accum)'],['../classfbgemm_1_1_code_gen_base.html#a87e3f3cd0d070bf371466f4c7521266d',1,'fbgemm::CodeGenBase::storeCRegs(x86::Emitter *a, int rowRegs, int colRegs, x86::Gp C_Offset, x86::Gp ldcReg, bool accum)'],['../classfbgemm_1_1_code_gen_base.html#a87e3f3cd0d070bf371466f4c7521266d',1,'fbgemm::CodeGenBase::storeCRegs(x86::Emitter *a, int rowRegs, int colRegs, x86::Gp C_Offset, x86::Gp ldcReg, bool accum)']]] + ['sparse_5fadagrad_5fref_0',['sparse_adagrad_ref',['../namespacefbgemm.html#a3f04df11e31dd656955d1bd1f8a7893d',1,'fbgemm']]], + ['sparsedensemm_1',['SparseDenseMM',['../namespacefbgemm.html#a1671cc912f6aa4bab678a0d255c8a690',1,'fbgemm']]], + ['spmdm_2',['SpMDM',['../classfbgemm_1_1_compressed_sparse_column.html#a9f8530a8442a8fd99bfe3896d0fff5de',1,'fbgemm::CompressedSparseColumn']]], + ['spmdm_5fref_3',['spmdm_ref',['../namespacefbgemm.html#a4f19d1389f9e99cc0daded599b1f1fd4',1,'fbgemm']]], + ['spmdmkernelavx2_4',['spmdmKernelAvx2',['../namespacefbgemm.html#a8b547effff25521017d20a5c4ddb8fcc',1,'fbgemm']]], + ['storecregs_5',['storeCRegs',['../classfbgemm_1_1_code_gen_base.html#a01bcc02f063a515df6d7fda518ef1d12',1,'fbgemm::CodeGenBase::storeCRegs(x86::Emitter *a, int rowRegs, int colRegs, x86::Gp C_Offset, x86::Gp ldcReg, bool accum)'],['../classfbgemm_1_1_code_gen_base.html#a12a9f2428ed6fd0dd90c91fd4477e271',1,'fbgemm::CodeGenBase::storeCRegs(x86::Emitter *a, int rowRegs, int colRegs, x86::Gp C_Offset, x86::Gp ldcReg, bool accum)'],['../classfbgemm_1_1_code_gen_base.html#a87e3f3cd0d070bf371466f4c7521266d',1,'fbgemm::CodeGenBase::storeCRegs(x86::Emitter *a, int rowRegs, int colRegs, x86::Gp C_Offset, x86::Gp ldcReg, bool accum)'],['../classfbgemm_1_1_code_gen_base.html#a87e3f3cd0d070bf371466f4c7521266d',1,'fbgemm::CodeGenBase::storeCRegs(x86::Emitter *a, int rowRegs, int colRegs, x86::Gp C_Offset, x86::Gp ldcReg, bool accum)']]] ]; diff --git a/search/functions_12.js b/search/functions_12.js index 69f9a510d..d8488b989 100644 --- a/search/functions_12.js +++ b/search/functions_12.js @@ -1,4 +1,9 @@ var searchData= [ - ['tostring_0',['toString',['../structfbgemm_1_1conv__param__t.html#a1fe5121d6528fdea3f243321b3fa3a49',1,'fbgemm::conv_param_t']]] + ['takedepthwisefastpath_0',['takeDepthWiseFastPath',['../namespacefbgemm.html#a523727ffa987158ac9021cc0d9b97e0b',1,'fbgemm']]], + ['takepointwisefastpath_1',['takePointWiseFastPath',['../namespacefbgemm.html#affb3e7487c8a1c6c7d1549eb7090aee1',1,'fbgemm']]], + ['tostring_2',['toString',['../structfbgemm_1_1conv__param__t.html#a1fe5121d6528fdea3f243321b3fa3a49',1,'fbgemm::conv_param_t']]], + ['transpose_5f8rows_3',['transpose_8rows',['../namespacefbgemm.html#a9ee41553113b6cd89e0e336022acf250',1,'fbgemm']]], + ['transpose_5fref_4',['transpose_ref',['../namespacefbgemm.html#a18832d0507cb6d8bce78371b97f66479',1,'fbgemm']]], + ['transpose_5fsimd_5',['transpose_simd',['../namespacefbgemm.html#a75b4ad78ae16c6e6782f82e1ff4012a8',1,'fbgemm']]] ]; diff --git a/search/functions_2.js b/search/functions_2.js index 4d4f01545..13ed6cb52 100644 --- a/search/functions_2.js +++ b/search/functions_2.js @@ -1,7 +1,12 @@ var searchData= [ - ['blockcols_0',['blockCols',['../classfbgemm_1_1_pack_matrix.html#abb3166a23e502ffb0bc12243ec205fc0',1,'fbgemm::PackMatrix']]], - ['blockcolsize_1',['blockColSize',['../classfbgemm_1_1_pack_matrix.html#a70ac7f71f0b18449dc35c7ecc1162f84',1,'fbgemm::PackMatrix']]], - ['blockrows_2',['blockRows',['../classfbgemm_1_1_pack_matrix.html#abf7a4f4bb1702ee01325f06409038631',1,'fbgemm::PackMatrix']]], - ['blockrowsize_3',['blockRowSize',['../classfbgemm_1_1_pack_matrix.html#a0f90dade3e2b75f0cbd459e24f94723d',1,'fbgemm::PackMatrix']]] + ['bfloat16tofloat_5favx2_0',['Bfloat16ToFloat_avx2',['../namespacefbgemm.html#ae121dec17e2e8a7648b3077f970f8c49',1,'fbgemm']]], + ['bfloat16tofloat_5favx512_1',['Bfloat16ToFloat_avx512',['../namespacefbgemm.html#a874e15e8f1c021008e76a24e8714024c',1,'fbgemm']]], + ['bfloat16tofloat_5fref_2',['Bfloat16ToFloat_ref',['../namespacefbgemm.html#af84f2b20490beb1dd0da4b03cf93afac',1,'fbgemm']]], + ['bfloat16tofloat_5fsimd_3',['Bfloat16ToFloat_simd',['../namespacefbgemm.html#afb00b526459a0db53a2c6ffe0276dd3e',1,'fbgemm']]], + ['blockcols_4',['blockCols',['../classfbgemm_1_1_pack_matrix.html#abb3166a23e502ffb0bc12243ec205fc0',1,'fbgemm::PackMatrix']]], + ['blockcolsize_5',['blockColSize',['../classfbgemm_1_1_pack_matrix.html#a70ac7f71f0b18449dc35c7ecc1162f84',1,'fbgemm::PackMatrix']]], + ['blockrows_6',['blockRows',['../classfbgemm_1_1_pack_matrix.html#abf7a4f4bb1702ee01325f06409038631',1,'fbgemm::PackMatrix']]], + ['blockrowsize_7',['blockRowSize',['../classfbgemm_1_1_pack_matrix.html#a0f90dade3e2b75f0cbd459e24f94723d',1,'fbgemm::PackMatrix']]], + ['broadcast8bit_8',['broadcast8Bit',['../namespacefbgemm.html#a4840e075e8c46a94cb7a489c3fa6aee4',1,'fbgemm']]] ]; diff --git a/search/functions_3.js b/search/functions_3.js index 7ae16a663..f4ffead28 100644 --- a/search/functions_3.js +++ b/search/functions_3.js @@ -1,5 +1,9 @@ var searchData= [ - ['codegenbase_0',['CodeGenBase',['../classfbgemm_1_1_code_gen_base.html#a843f4289cb9de379bac477ed0dcba1cf',1,'fbgemm::CodeGenBase']]], - ['conv_5fparam_5ft_1',['conv_param_t',['../structfbgemm_1_1conv__param__t.html#a926431139532f74306f6cd6dc08d171a',1,'fbgemm::conv_param_t']]] + ['cblas_5fsgemm_5fref_0',['cblas_sgemm_ref',['../namespacefbgemm.html#a88e97a715133ac27ca83ae5ab05010ed',1,'fbgemm']]], + ['codegenbase_1',['CodeGenBase',['../classfbgemm_1_1_code_gen_base.html#a843f4289cb9de379bac477ed0dcba1cf',1,'fbgemm::CodeGenBase']]], + ['col_5foffsets_5fwith_5fzero_5fpt_5fs8acc32_5fref_2',['col_offsets_with_zero_pt_s8acc32_ref',['../namespacefbgemm.html#a0f66af5e8e787dc1ff6893ac75ae161f',1,'fbgemm']]], + ['compare_5fbuffers_3',['compare_buffers',['../namespacefbgemm.html#a9d995b583abb4b09927c90f66e3b1463',1,'fbgemm']]], + ['conv_5fparam_5ft_4',['conv_param_t',['../structfbgemm_1_1conv__param__t.html#a926431139532f74306f6cd6dc08d171a',1,'fbgemm::conv_param_t']]], + ['convfastpath_5',['ConvFastPath',['../namespacefbgemm.html#ad5bda89769bca9a01ddf81591f20ef02',1,'fbgemm']]] ]; diff --git a/search/functions_4.js b/search/functions_4.js index 9d803af72..38dfba6fa 100644 --- a/search/functions_4.js +++ b/search/functions_4.js @@ -1,6 +1,8 @@ var searchData= [ ['density_0',['Density',['../classfbgemm_1_1_compressed_sparse_column.html#a6629bcd3b06c396540c2d5b7e4852164',1,'fbgemm::CompressedSparseColumn']]], - ['direct_5fmapped_5flru_5fcache_5fpopulate_5fbyte_5fcuda_1',['direct_mapped_lru_cache_populate_byte_cuda',['../group__table-batched-embed-cuda.html#gae019b6879bd9f89a146e0700d5a4bd8b',1,'split_embeddings_cache_cuda.cuh']]], - ['direct_5fmapped_5flxu_5fcache_5flookup_5fcuda_2',['direct_mapped_lxu_cache_lookup_cuda',['../group__table-batched-embed-cuda.html#gab305ebdd3822794c5ac462bf5df4bb49',1,'split_embeddings_cache_cuda.cuh']]] + ['depthwise_5f2d_5fsame_5fpad_1',['depthwise_2d_same_pad',['../namespacefbgemm.html#ac2e9634d4e2366ed6f2181ae7e7b17b2',1,'fbgemm']]], + ['depthwise_5f3d_5fsame_5fpad_2',['depthwise_3d_same_pad',['../namespacefbgemm.html#a0bfa499fd1b485bc3e457842343bca57',1,'fbgemm']]], + ['direct_5fmapped_5flru_5fcache_5fpopulate_5fbyte_5fcuda_3',['direct_mapped_lru_cache_populate_byte_cuda',['../group__table-batched-embed-cuda.html#gae019b6879bd9f89a146e0700d5a4bd8b',1,'split_embeddings_cache_cuda.cuh']]], + ['direct_5fmapped_5flxu_5fcache_5flookup_5fcuda_4',['direct_mapped_lxu_cache_lookup_cuda',['../group__table-batched-embed-cuda.html#gab305ebdd3822794c5ac462bf5df4bb49',1,'split_embeddings_cache_cuda.cuh']]] ]; diff --git a/search/functions_5.js b/search/functions_5.js index 9592dd017..d6ab9b057 100644 --- a/search/functions_5.js +++ b/search/functions_5.js @@ -1,7 +1,9 @@ var searchData= [ - ['equals_0',['equals',['../classfbgemm_1_1_pack_b_matrix.html#a5299773354edb62a96e39dc55ab5d770',1,'fbgemm::PackBMatrix']]], - ['example_5fmethod_1',['example_method',['../group__example-method-group.html#ga56a504c1752577359ba5b75a9cd52737',1,'example_code.cpp']]], - ['executekernel_2',['ExecuteKernel',['../classfbgemm_1_1_execute_kernel_3_01packing_a_matrix_00_01_pack_b_matrix_3_01int8__t_00_01typenam9894617fba2431fcc8042b1a22b96270.html#ae4a4e6063c0cb62d64d6159d102a899b',1,'fbgemm::ExecuteKernel< packingAMatrix, PackBMatrix< int8_t, typename packingAMatrix::accType >, cT, processOutputType >']]], - ['expand_5finto_5fjagged_5fpermute_5fcuda_3',['expand_into_jagged_permute_cuda',['../group__sparse-data-cuda.html#gab7344d63216dd37673733b26954aaec4',1,'fbgemm_gpu']]] + ['emitextracthalfvector_0',['emitExtractHalfVector',['../namespacefbgemm.html#a602ac18c0e6c32448ff8d21818bded38',1,'fbgemm']]], + ['emitloaddword_1',['emitLoadDWord',['../namespacefbgemm.html#aa56904bd84399e10104d286e0e10a2d7',1,'fbgemm']]], + ['equals_2',['equals',['../classfbgemm_1_1_pack_b_matrix.html#a5299773354edb62a96e39dc55ab5d770',1,'fbgemm::PackBMatrix']]], + ['example_5fmethod_3',['example_method',['../group__example-method-group.html#ga56a504c1752577359ba5b75a9cd52737',1,'example_code.cpp']]], + ['executekernel_4',['ExecuteKernel',['../classfbgemm_1_1_execute_kernel_3_01packing_a_matrix_00_01_pack_b_matrix_3_01int8__t_00_01typenam9894617fba2431fcc8042b1a22b96270.html#ae4a4e6063c0cb62d64d6159d102a899b',1,'fbgemm::ExecuteKernel< packingAMatrix, PackBMatrix< int8_t, typename packingAMatrix::accType >, cT, processOutputType >']]], + ['expand_5finto_5fjagged_5fpermute_5fcuda_5',['expand_into_jagged_permute_cuda',['../group__sparse-data-cuda.html#gab7344d63216dd37673733b26954aaec4',1,'fbgemm_gpu']]] ]; diff --git a/search/functions_6.js b/search/functions_6.js index 31fa5b3e7..f936feadf 100644 --- a/search/functions_6.js +++ b/search/functions_6.js @@ -1,6 +1,47 @@ var searchData= [ - ['findminmax_0',['FindMinMax',['../group__fbgemm-quant-utils-avx2.html#ga38920438e5d25d4092a1b695f3420b8e',1,'fbgemm']]], - ['floatorhalftofusednbitrowwisequantizedsbhalf_1',['FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf',['../group__fbgemm-quant-utils-generic.html#ga6b77c8540e630305db9a5f30a84e7e5b',1,'fbgemm']]], - ['fusedquantizedequantize_2',['FusedQuantizeDequantize',['../group__fbgemm-quant-utils-generic.html#gaa51ab0f363fbcdf8ffa0a561884225d9',1,'fbgemm']]] + ['fbgemmalignedalloc_0',['fbgemmAlignedAlloc',['../namespacefbgemm.html#ab55953ea0d8867577ef9b096d68cfce4',1,'fbgemm']]], + ['fbgemmalignedfree_1',['fbgemmAlignedFree',['../namespacefbgemm.html#a539e708e033ffe98b075a6b6c5cb4b46',1,'fbgemm']]], + ['fbgemmconv_2',['fbgemmConv',['../namespacefbgemm.html#adc9d2af7ea01634fd2e5bef0e6baa0ab',1,'fbgemm']]], + ['fbgemmenableavx512ymm_3',['fbgemmEnableAvx512Ymm',['../namespacefbgemm.html#a9d17a4894f822f7afd5c79407bbf91d5',1,'fbgemm']]], + ['fbgemmforceisa_4',['fbgemmForceIsa',['../namespacefbgemm.html#af150495d47d5ccb1670524166fd9b45d',1,'fbgemm']]], + ['fbgemmget2dpartition_5',['fbgemmGet2DPartition',['../namespacefbgemm.html#a26e1a7f0b1935835ad95a52aa4f56eb8',1,'fbgemm']]], + ['fbgemmgetthreadpartition_6',['fbgemmGetThreadPartition',['../namespacefbgemm.html#a38cca353e8e2984e37704be8bbc327fe',1,'fbgemm']]], + ['fbgemmgroupwiseconv_7',['fbgemmGroupwiseConv',['../namespacefbgemm.html#a5780ef4a16a1682740af29283360caa4',1,'fbgemm']]], + ['fbgemmhasarmneonsupport_8',['fbgemmHasArmNeonSupport',['../namespacefbgemm.html#af727aa5e29e172f994653d01e444973e',1,'fbgemm']]], + ['fbgemmhasarmsve2support_9',['fbgemmHasArmSve2Support',['../namespacefbgemm.html#aae9bb60bfb4acb2e62976adcd98ccaa0',1,'fbgemm']]], + ['fbgemmhasavx2support_10',['fbgemmHasAvx2Support',['../namespacefbgemm.html#a52e1a4ce201a6b89ad8b3dee69c59d40',1,'fbgemm']]], + ['fbgemmhasavx512support_11',['fbgemmHasAvx512Support',['../namespacefbgemm.html#ae0e6eeaba3d5c4265d9aee5e898fb329',1,'fbgemm']]], + ['fbgemmhasavx512vnnisupport_12',['fbgemmHasAvx512VnniSupport',['../namespacefbgemm.html#ae6e5fa9178cd2a70a01ef78a571802f5',1,'fbgemm']]], + ['fbgemminstructionset_13',['fbgemmInstructionSet',['../namespacefbgemm.html#a2be92a96ebd3c0d9bc9f9c0d0c537969',1,'fbgemm']]], + ['fbgemmisintelxeond_14',['fbgemmIsIntelXeonD',['../namespacefbgemm.html#a84685dfa70eedf3c2befcb8d02cf9d27',1,'fbgemm']]], + ['fbgemmoptimizedgconv_15',['fbgemmOptimizedGConv',['../namespacefbgemm.html#a19ec32cc9a1932f774bd8b2e0b047afe',1,'fbgemm']]], + ['fbgemmpacked_16',['fbgemmPacked',['../namespacefbgemm.html#a1f01b8b3f8fea3e9c8ccc2aed30ba70a',1,'fbgemm']]], + ['fbgemmpartition1d_17',['fbgemmPartition1D',['../namespacefbgemm.html#abf9cb71c5c3a79935f7146f05510bb19',1,'fbgemm']]], + ['fbgemmpartition1dblocked_18',['fbgemmPartition1DBlocked',['../namespacefbgemm.html#ae6d6321b283eaa5a8ddaaa96ea22c62f',1,'fbgemm']]], + ['fbgemmsupportedcpu_19',['fbgemmSupportedCPU',['../namespacefbgemm.html#a713e97500428aba767f6fcaf39aac4b9',1,'fbgemm']]], + ['findminmax_20',['FindMinMax',['../group__fbgemm-quant-utils-avx2.html#ga38920438e5d25d4092a1b695f3420b8e',1,'fbgemm']]], + ['float16tofloat_5favx2_21',['Float16ToFloat_avx2',['../namespacefbgemm.html#ae0cba6562b792a67eb376841005a907b',1,'fbgemm']]], + ['float16tofloat_5favx512_22',['Float16ToFloat_avx512',['../namespacefbgemm.html#a5d28283194eed8d19ebc5634fd78913f',1,'fbgemm']]], + ['float16tofloat_5fref_23',['Float16ToFloat_ref',['../namespacefbgemm.html#afc22ec6e38a38c7f41484f844dbfbeac',1,'fbgemm']]], + ['float16tofloat_5fsimd_24',['Float16ToFloat_simd',['../namespacefbgemm.html#af066434e23720ecd4ddcc51d7a616aef',1,'fbgemm']]], + ['float8tofloat_5fref_25',['Float8ToFloat_ref',['../namespacefbgemm.html#ad699dd756e87f820cca1f1202cec2a11',1,'fbgemm']]], + ['floatorhalftofused8bitrowwisequantizedsbfloat_26',['FloatOrHalfToFused8BitRowwiseQuantizedSBFloat',['../namespacefbgemm.html#afafa94e239d016cf273ad0597152b86c',1,'fbgemm']]], + ['floatorhalftofused8bitrowwisequantizedsbfloatref_27',['FloatOrHalfToFused8BitRowwiseQuantizedSBFloatRef',['../namespacefbgemm.html#a7cba6dad217715349653862b3e691057',1,'fbgemm']]], + ['floatorhalftofusednbitrowwisequantizedsbhalf_28',['FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf',['../group__fbgemm-quant-utils-generic.html#ga6b77c8540e630305db9a5f30a84e7e5b',1,'fbgemm']]], + ['floatorhalftofusednbitrowwisequantizedsbhalfref_29',['FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfRef',['../namespacefbgemm.html#ad89426896d5c7b1f1b5db8ebaf201547',1,'fbgemm']]], + ['floattobfloat16_5favx2_30',['FloatToBfloat16_avx2',['../namespacefbgemm.html#a444fa054549274d8c6f442f0b866aa98',1,'fbgemm']]], + ['floattobfloat16_5favx512_31',['FloatToBfloat16_avx512',['../namespacefbgemm.html#a0e2b50c7d828e56f78cc0b8368dee35a',1,'fbgemm']]], + ['floattobfloat16_5fref_32',['FloatToBfloat16_ref',['../namespacefbgemm.html#ab22f5d961c6a42aab1c37b17a3d93770',1,'fbgemm']]], + ['floattobfloat16_5fsimd_33',['FloatToBfloat16_simd',['../namespacefbgemm.html#aa423a42208a4fde5f23ab6a28cef24a3',1,'fbgemm']]], + ['floattofloat16_5favx2_34',['FloatToFloat16_avx2',['../namespacefbgemm.html#a61c95557adf5477f3078af22d6054a7d',1,'fbgemm']]], + ['floattofloat16_5favx512_35',['FloatToFloat16_avx512',['../namespacefbgemm.html#a5305ff58517ca3669ec41c7f1d4817a4',1,'fbgemm']]], + ['floattofloat16_5fref_36',['FloatToFloat16_ref',['../namespacefbgemm.html#af44c584c974f95f4866806cee3798742',1,'fbgemm']]], + ['floattofloat16_5fsimd_37',['FloatToFloat16_simd',['../namespacefbgemm.html#a53a50b113345c09b89b45834f31d52df',1,'fbgemm']]], + ['floattofloat8_5fref_38',['FloatToFloat8_ref',['../namespacefbgemm.html#a32a45639603e1584965b471846fd067f',1,'fbgemm']]], + ['fused8bitrowwisequantizedsbfloattofloatorhalf_39',['Fused8BitRowwiseQuantizedSBFloatToFloatOrHalf',['../namespacefbgemm.html#a42aefaaf238d065625a64a757f998eef',1,'fbgemm']]], + ['fused8bitrowwisequantizedsbfloattofloatorhalfref_40',['Fused8BitRowwiseQuantizedSBFloatToFloatOrHalfRef',['../namespacefbgemm.html#a19217d3a8551a7b251ecb1eba79669bc',1,'fbgemm']]], + ['fusednbitrowwisequantizedsbhalftofloatorhalf_41',['FusedNBitRowwiseQuantizedSBHalfToFloatOrHalf',['../namespacefbgemm.html#a2b2ca55a7d2d5c2dbba337ee2e585f6a',1,'fbgemm']]], + ['fusednbitrowwisequantizedsbhalftofloatorhalfref_42',['FusedNBitRowwiseQuantizedSBHalfToFloatOrHalfRef',['../namespacefbgemm.html#adee65a9c68614b76ffdf2b3bc11b4618',1,'fbgemm']]], + ['fusedquantizedequantize_43',['FusedQuantizeDequantize',['../group__fbgemm-quant-utils-generic.html#gaa51ab0f363fbcdf8ffa0a561884225d9',1,'fbgemm']]] ]; diff --git a/search/functions_7.js b/search/functions_7.js index 56dd88c2e..6c5dcc88b 100644 --- a/search/functions_7.js +++ b/search/functions_7.js @@ -1,10 +1,24 @@ var searchData= [ - ['gencomputeblock_0',['genComputeBlock',['../classfbgemm_1_1_code_gen_base.html#a176924b076bd6485a83a0dd75c20cac6',1,'fbgemm::CodeGenBase::genComputeBlock(x86::Emitter *a, x86::Gp buffer_A, x86::Gp buffer_B, x86::Gp B_pf, int rowRegs, int colRegs, int lda)'],['../classfbgemm_1_1_code_gen_base.html#aa2af59748852e0ff4bfec95c946cbcc9',1,'fbgemm::CodeGenBase::genComputeBlock(x86::Emitter *a, x86::Gp buffer_A, x86::Gp buffer_B, x86::Gp B_pf, int rowRegs, int colRegs, int lda)'],['../classfbgemm_1_1_code_gen_base.html#a9727e9d8a35fccd581ad604006ea77fe',1,'fbgemm::CodeGenBase::genComputeBlock(x86::Emitter *a, x86::Gp buffer_A, x86::Gp buffer_B, x86::Gp, int rowRegs, int colRegs, int lda)'],['../classfbgemm_1_1_code_gen_base.html#a8c35863faf15810abfe3fe1cd432a687',1,'fbgemm::CodeGenBase::genComputeBlock(x86::Emitter *a, x86::Gp buffer_A, x86::Gp buffer_B, x86::Gp, int rowRegs, int colRegs, int lda)'],['../classfbgemm_1_1_code_gen_base.html#aa2af59748852e0ff4bfec95c946cbcc9',1,'fbgemm::CodeGenBase::genComputeBlock(x86::Emitter *a, x86::Gp buffer_A, x86::Gp buffer_B, x86::Gp B_pf, int rowRegs, int colRegs, int lda)'],['../classfbgemm_1_1_code_gen_base.html#a8c35863faf15810abfe3fe1cd432a687',1,'fbgemm::CodeGenBase::genComputeBlock(x86::Emitter *a, x86::Gp buffer_A, x86::Gp buffer_B, x86::Gp, int rowRegs, int colRegs, int lda)']]], - ['generic_5fhistogram_5fbinning_5fcalibration_5fby_5ffeature_5fcpu_1',['generic_histogram_binning_calibration_by_feature_cpu',['../group__sparse-data-cpu.html#gaef2a0a8c27e3b8b2d72be5c95ba7539e',1,'fbgemm_gpu']]], - ['get_5funique_5findices_5fcuda_2',['get_unique_indices_cuda',['../group__table-batched-embed-cuda.html#ga4887151424a90cfd0abef174a4e91f3f',1,'get_unique_indices_cuda(at::Tensor linear_indices, int64_t max_indices, bool compute_count): linearize_cache_indices.cu'],['../group__table-batched-embed-cuda.html#ga4887151424a90cfd0abef174a4e91f3f',1,'get_unique_indices_cuda(Tensor linear_indices, int64_t max_indices, bool compute_count): linearize_cache_indices.cu']]], - ['getbuf_3',['getBuf',['../classfbgemm_1_1_pack_matrix.html#ac34c29cb4d372b728c2b8460e142269b',1,'fbgemm::PackMatrix::getBuf()'],['../classfbgemm_1_1_pack_weight_matrix_for_g_conv.html#a46f1fd2c0a84f4b0b40f2e907c1908a2',1,'fbgemm::PackWeightMatrixForGConv::getBuf()']]], - ['getcodeloggingfile_4',['getCodeLoggingFile',['../classfbgemm_1_1_code_gen_base.html#af4e14d6e1be8b47db233b7226e4b8e2a',1,'fbgemm::CodeGenBase']]], - ['getorcreate_5',['getOrCreate',['../classfbgemm_1_1_code_gen_base.html#af00d94622211a89e6568e05d9b63850a',1,'fbgemm::CodeGenBase::getOrCreate(bool accum, int32_t mc, int32_t nc, int32_t kc)'],['../classfbgemm_1_1_code_gen_base.html#a65dee023d21aee79a75508f08e51c403',1,'fbgemm::CodeGenBase::getOrCreate(bool accum, int32_t mc, int32_t nc, int32_t)'],['../classfbgemm_1_1_code_gen_base.html#ae20473007cc942d5263eed6677cfddee',1,'fbgemm::CodeGenBase::getOrCreate(bool accum, int32_t mc, int32_t nc, int32_t kc)'],['../classfbgemm_1_1_code_gen_base.html#a3ba99549f858b04f892c5edb2580aa07',1,'fbgemm::CodeGenBase::getOrCreate(bool accum, int32_t mc, int32_t nc, int32_t kc)'],['../classfbgemm_1_1_code_gen_base.html#ab13fc9ae80b3c7b6f7c95597b3eca012',1,'fbgemm::CodeGenBase::getOrCreate(bool accum, int32_t mc, int32_t nc, int32_t kc)'],['../classfbgemm_1_1_code_gen_base.html#af378fb7873c900315a548c788e5f3e75',1,'fbgemm::CodeGenBase::getOrCreate(bool accum, int32_t mc, int32_t nc, int32_t kc)'],['../classfbgemm_1_1_code_gen_base.html#a4050603658b0e5cf634fd9a989cb84bc',1,'fbgemm::CodeGenBase::getOrCreate(bool accum, int32_t mc, int32_t nc, int32_t kc)'],['../classfbgemm_1_1_code_gen_base.html#a4050603658b0e5cf634fd9a989cb84bc',1,'fbgemm::CodeGenBase::getOrCreate(bool accum, int32_t mc, int32_t nc, int32_t kc)']]], - ['getrowoffsetbuffer_6',['getRowOffsetBuffer',['../classfbgemm_1_1_pack_matrix.html#a531d76d5d88825a31860959965c3af62',1,'fbgemm::PackMatrix::getRowOffsetBuffer()'],['../classfbgemm_1_1_pack_a_matrix.html#a531d76d5d88825a31860959965c3af62',1,'fbgemm::PackAMatrix::getRowOffsetBuffer()'],['../classfbgemm_1_1_pack_a_with_im2_col.html#a531d76d5d88825a31860959965c3af62',1,'fbgemm::PackAWithIm2Col::getRowOffsetBuffer()'],['../classfbgemm_1_1_pack_a_with_row_offset.html#a531d76d5d88825a31860959965c3af62',1,'fbgemm::PackAWithRowOffset::getRowOffsetBuffer()'],['../classfbgemm_1_1_pack_a_with_quant_row_offset.html#a531d76d5d88825a31860959965c3af62',1,'fbgemm::PackAWithQuantRowOffset::getRowOffsetBuffer()']]] + ['gen16bitvectorone_0',['gen16BitVectorOne',['../namespacefbgemm.html#aa06c4dd5673e6b3df1dfe3617fdc919d',1,'fbgemm']]], + ['gen8bitvectorone_1',['gen8BitVectorOne',['../namespacefbgemm.html#a3c476c8ddbed58f3f6b4395dd55ed2a3',1,'fbgemm']]], + ['gencomputeblock_2',['genComputeBlock',['../classfbgemm_1_1_code_gen_base.html#a176924b076bd6485a83a0dd75c20cac6',1,'fbgemm::CodeGenBase::genComputeBlock(x86::Emitter *a, x86::Gp buffer_A, x86::Gp buffer_B, x86::Gp B_pf, int rowRegs, int colRegs, int lda)'],['../classfbgemm_1_1_code_gen_base.html#aa2af59748852e0ff4bfec95c946cbcc9',1,'fbgemm::CodeGenBase::genComputeBlock(x86::Emitter *a, x86::Gp buffer_A, x86::Gp buffer_B, x86::Gp B_pf, int rowRegs, int colRegs, int lda)'],['../classfbgemm_1_1_code_gen_base.html#a9727e9d8a35fccd581ad604006ea77fe',1,'fbgemm::CodeGenBase::genComputeBlock(x86::Emitter *a, x86::Gp buffer_A, x86::Gp buffer_B, x86::Gp, int rowRegs, int colRegs, int lda)'],['../classfbgemm_1_1_code_gen_base.html#a8c35863faf15810abfe3fe1cd432a687',1,'fbgemm::CodeGenBase::genComputeBlock(x86::Emitter *a, x86::Gp buffer_A, x86::Gp buffer_B, x86::Gp, int rowRegs, int colRegs, int lda)'],['../classfbgemm_1_1_code_gen_base.html#aa2af59748852e0ff4bfec95c946cbcc9',1,'fbgemm::CodeGenBase::genComputeBlock(x86::Emitter *a, x86::Gp buffer_A, x86::Gp buffer_B, x86::Gp B_pf, int rowRegs, int colRegs, int lda)'],['../classfbgemm_1_1_code_gen_base.html#a8c35863faf15810abfe3fe1cd432a687',1,'fbgemm::CodeGenBase::genComputeBlock(x86::Emitter *a, x86::Gp buffer_A, x86::Gp buffer_B, x86::Gp, int rowRegs, int colRegs, int lda)']]], + ['generateembeddingspmdm_3',['GenerateEmbeddingSpMDM',['../namespacefbgemm.html#a77602a69076f938d21d336a0df00f9c4',1,'fbgemm']]], + ['generateembeddingspmdmfp8withstrides_4',['GenerateEmbeddingSpMDMFP8WithStrides',['../namespacefbgemm.html#a57534e5ede9766d50e536437b499894d',1,'fbgemm']]], + ['generateembeddingspmdmnbit_5',['GenerateEmbeddingSpMDMNBit',['../namespacefbgemm.html#aa4e2948ec5f9097f552de8a0458d49e1',1,'fbgemm']]], + ['generateembeddingspmdmnbitrowwisesparse_6',['GenerateEmbeddingSpMDMNBitRowWiseSparse',['../namespacefbgemm.html#aa7d2220ddfc7d5c9820fd03b73101d37',1,'fbgemm']]], + ['generateembeddingspmdmnbitwithstrides_7',['GenerateEmbeddingSpMDMNBitWithStrides',['../namespacefbgemm.html#abb137866f8726f5c6cbf150ccc7e5b08',1,'fbgemm']]], + ['generateembeddingspmdmrowwisesparse_8',['GenerateEmbeddingSpMDMRowWiseSparse',['../namespacefbgemm.html#a6d6fe14c3ad83011adb500625ecbff01',1,'fbgemm']]], + ['generateembeddingspmdmwithstrides_9',['GenerateEmbeddingSpMDMWithStrides',['../namespacefbgemm.html#a243ed0e4df7bb7b6c08a930ee71a996b',1,'fbgemm']]], + ['generaterowwisesparseadagradfused_10',['GenerateRowWiseSparseAdaGradFused',['../namespacefbgemm.html#a90ceef30c1643dd1a87b1a0753b52e87',1,'fbgemm']]], + ['generic_5fhistogram_5fbinning_5fcalibration_5fby_5ffeature_5fcpu_11',['generic_histogram_binning_calibration_by_feature_cpu',['../group__sparse-data-cpu.html#gaef2a0a8c27e3b8b2d72be5c95ba7539e',1,'fbgemm_gpu']]], + ['genu8i8s32fma_12',['genU8I8S32FMA',['../namespacefbgemm.html#a2a15434be774f5beeb39e4fe225d6fb9',1,'fbgemm']]], + ['genu8sum4_13',['genU8Sum4',['../namespacefbgemm.html#aa9ddfdb1cd3e41712844257212fcb050',1,'fbgemm']]], + ['genu8sum8_14',['genU8Sum8',['../namespacefbgemm.html#a3535bf91ff758b3bd13929bf9f211c90',1,'fbgemm']]], + ['get_5funique_5findices_5fcuda_15',['get_unique_indices_cuda',['../group__table-batched-embed-cuda.html#ga4887151424a90cfd0abef174a4e91f3f',1,'get_unique_indices_cuda(at::Tensor linear_indices, int64_t max_indices, bool compute_count): linearize_cache_indices.cu'],['../group__table-batched-embed-cuda.html#ga4887151424a90cfd0abef174a4e91f3f',1,'get_unique_indices_cuda(Tensor linear_indices, int64_t max_indices, bool compute_count): linearize_cache_indices.cu']]], + ['getbuf_16',['getBuf',['../classfbgemm_1_1_pack_matrix.html#ac34c29cb4d372b728c2b8460e142269b',1,'fbgemm::PackMatrix::getBuf()'],['../classfbgemm_1_1_pack_weight_matrix_for_g_conv.html#a46f1fd2c0a84f4b0b40f2e907c1908a2',1,'fbgemm::PackWeightMatrixForGConv::getBuf()']]], + ['getcodeloggingfile_17',['getCodeLoggingFile',['../classfbgemm_1_1_code_gen_base.html#af4e14d6e1be8b47db233b7226e4b8e2a',1,'fbgemm::CodeGenBase']]], + ['getorcreate_18',['getOrCreate',['../classfbgemm_1_1_code_gen_base.html#af00d94622211a89e6568e05d9b63850a',1,'fbgemm::CodeGenBase::getOrCreate(bool accum, int32_t mc, int32_t nc, int32_t kc)'],['../classfbgemm_1_1_code_gen_base.html#a65dee023d21aee79a75508f08e51c403',1,'fbgemm::CodeGenBase::getOrCreate(bool accum, int32_t mc, int32_t nc, int32_t)'],['../classfbgemm_1_1_code_gen_base.html#ae20473007cc942d5263eed6677cfddee',1,'fbgemm::CodeGenBase::getOrCreate(bool accum, int32_t mc, int32_t nc, int32_t kc)'],['../classfbgemm_1_1_code_gen_base.html#a3ba99549f858b04f892c5edb2580aa07',1,'fbgemm::CodeGenBase::getOrCreate(bool accum, int32_t mc, int32_t nc, int32_t kc)'],['../classfbgemm_1_1_code_gen_base.html#ab13fc9ae80b3c7b6f7c95597b3eca012',1,'fbgemm::CodeGenBase::getOrCreate(bool accum, int32_t mc, int32_t nc, int32_t kc)'],['../classfbgemm_1_1_code_gen_base.html#af378fb7873c900315a548c788e5f3e75',1,'fbgemm::CodeGenBase::getOrCreate(bool accum, int32_t mc, int32_t nc, int32_t kc)'],['../classfbgemm_1_1_code_gen_base.html#a4050603658b0e5cf634fd9a989cb84bc',1,'fbgemm::CodeGenBase::getOrCreate(bool accum, int32_t mc, int32_t nc, int32_t kc)'],['../classfbgemm_1_1_code_gen_base.html#a4050603658b0e5cf634fd9a989cb84bc',1,'fbgemm::CodeGenBase::getOrCreate(bool accum, int32_t mc, int32_t nc, int32_t kc)']]], + ['getrowoffsetbuffer_19',['getRowOffsetBuffer',['../classfbgemm_1_1_pack_matrix.html#a531d76d5d88825a31860959965c3af62',1,'fbgemm::PackMatrix::getRowOffsetBuffer()'],['../classfbgemm_1_1_pack_a_matrix.html#a531d76d5d88825a31860959965c3af62',1,'fbgemm::PackAMatrix::getRowOffsetBuffer()'],['../classfbgemm_1_1_pack_a_with_im2_col.html#a531d76d5d88825a31860959965c3af62',1,'fbgemm::PackAWithIm2Col::getRowOffsetBuffer()'],['../classfbgemm_1_1_pack_a_with_row_offset.html#a531d76d5d88825a31860959965c3af62',1,'fbgemm::PackAWithRowOffset::getRowOffsetBuffer()'],['../classfbgemm_1_1_pack_a_with_quant_row_offset.html#a531d76d5d88825a31860959965c3af62',1,'fbgemm::PackAWithQuantRowOffset::getRowOffsetBuffer()']]], + ['gqa_5fattn_5fsplitk_5fcuda_20',['gqa_attn_splitk_cuda',['../group__experimental-gen-ai-attention.html#ga72225d1459d48465e83289c29df7447a',1,'fbgemm_gpu::gen_ai::attention']]] ]; diff --git a/search/functions_9.js b/search/functions_9.js index a22263101..b460b19bf 100644 --- a/search/functions_9.js +++ b/search/functions_9.js @@ -1,12 +1,17 @@ var searchData= [ ['ics_0',['ICs',['../classfbgemm_1_1_compressed_sparse_column.html#a639afa1bb5bcdb507506ee50ef8f93e7',1,'fbgemm::CompressedSparseColumn']]], - ['int_5fnbit_5fsplit_5fembedding_5fuvm_5fcaching_5fcodegen_5flookup_5ffunction_1',['int_nbit_split_embedding_uvm_caching_codegen_lookup_function',['../group__embedding-cuda.html#gabbe880100f1036a979f3a8d8755447d0',1,'embedding_forward_quantized_host.cpp']]], - ['is_5fuvm_5ftensor_2',['is_uvm_tensor',['../group__cumem-utils.html#gacba28ed334d071e79c1ead1792391e9d',1,'fbgemm_gpu']]], - ['isa_3',['isA',['../classfbgemm_1_1_pack_matrix.html#a16921c92359f31fbdc4968c875c90ae2',1,'fbgemm::PackMatrix::isA()'],['../classfbgemm_1_1_pack_a_matrix.html#a16921c92359f31fbdc4968c875c90ae2',1,'fbgemm::PackAMatrix::isA()'],['../classfbgemm_1_1_pack_b_matrix.html#a16921c92359f31fbdc4968c875c90ae2',1,'fbgemm::PackBMatrix::isA()'],['../classfbgemm_1_1_pack_a_with_im2_col.html#a16921c92359f31fbdc4968c875c90ae2',1,'fbgemm::PackAWithIm2Col::isA()'],['../classfbgemm_1_1_pack_a_with_row_offset.html#a16921c92359f31fbdc4968c875c90ae2',1,'fbgemm::PackAWithRowOffset::isA()'],['../classfbgemm_1_1_pack_a_with_quant_row_offset.html#a16921c92359f31fbdc4968c875c90ae2',1,'fbgemm::PackAWithQuantRowOffset::isA()']]], - ['ishypersparse_4',['IsHyperSparse',['../classfbgemm_1_1_compressed_sparse_column.html#a5deac9b9ff0e1d7b22c7a887d40b4c8c',1,'fbgemm::CompressedSparseColumn']]], - ['ispackingcompliant_5',['isPackingCompliant',['../classfbgemm_1_1_pack_weights_for_conv.html#a5e78c80fc33d5b40be198d920a194193',1,'fbgemm::PackWeightsForConv']]], - ['isprepacked_6',['isPrePacked',['../classfbgemm_1_1_pack_matrix.html#a354db6b6efb4336767afa41583a96082',1,'fbgemm::PackMatrix::isPrePacked()'],['../classfbgemm_1_1_pack_a_matrix.html#a354db6b6efb4336767afa41583a96082',1,'fbgemm::PackAMatrix::isPrePacked()'],['../classfbgemm_1_1_pack_b_matrix.html#a354db6b6efb4336767afa41583a96082',1,'fbgemm::PackBMatrix::isPrePacked()'],['../classfbgemm_1_1_pack_a_with_im2_col.html#a354db6b6efb4336767afa41583a96082',1,'fbgemm::PackAWithIm2Col::isPrePacked()'],['../classfbgemm_1_1_pack_a_with_row_offset.html#a354db6b6efb4336767afa41583a96082',1,'fbgemm::PackAWithRowOffset::isPrePacked()'],['../classfbgemm_1_1_pack_a_with_quant_row_offset.html#a354db6b6efb4336767afa41583a96082',1,'fbgemm::PackAWithQuantRowOffset::isPrePacked()']]], - ['istherecolremainder_7',['isThereColRemainder',['../classfbgemm_1_1_pack_matrix.html#a0fea05b14052070fcc8f2f5a9a829d0f',1,'fbgemm::PackMatrix']]], - ['isthislastkblock_8',['isThisLastKBlock',['../classfbgemm_1_1_pack_matrix.html#af38b0669b7bdf219aa56a9a587f4dbaa',1,'fbgemm::PackMatrix::isThisLastKBlock()'],['../classfbgemm_1_1_pack_b_matrix.html#a231aae141b5263a766275bb3236d297d',1,'fbgemm::PackBMatrix::isThisLastKBlock()']]] + ['initcregs_1',['initCRegs',['../namespacefbgemm.html#a3a50b707287c0456d23e735846b144c0',1,'fbgemm']]], + ['int_5fnbit_5fsplit_5fembedding_5fuvm_5fcaching_5fcodegen_5flookup_5ffunction_2',['int_nbit_split_embedding_uvm_caching_codegen_lookup_function',['../group__embedding-cuda.html#gabbe880100f1036a979f3a8d8755447d0',1,'embedding_forward_quantized_host.cpp']]], + ['is_5fautovec_5fdisabled_3',['is_autovec_disabled',['../namespacefbgemm.html#a4bd183ba7e59151ac6bff236729d4a41',1,'fbgemm']]], + ['is_5fradix_5fsort_5faccelerated_5fwith_5fopenmp_4',['is_radix_sort_accelerated_with_openmp',['../namespacefbgemm.html#af24ff1c82832652af861c3634486513a',1,'fbgemm']]], + ['is_5fuvm_5ftensor_5',['is_uvm_tensor',['../group__cumem-utils.html#gacba28ed334d071e79c1ead1792391e9d',1,'fbgemm_gpu']]], + ['isa_6',['isA',['../classfbgemm_1_1_pack_matrix.html#a16921c92359f31fbdc4968c875c90ae2',1,'fbgemm::PackMatrix::isA()'],['../classfbgemm_1_1_pack_a_matrix.html#a16921c92359f31fbdc4968c875c90ae2',1,'fbgemm::PackAMatrix::isA()'],['../classfbgemm_1_1_pack_b_matrix.html#a16921c92359f31fbdc4968c875c90ae2',1,'fbgemm::PackBMatrix::isA()'],['../classfbgemm_1_1_pack_a_with_im2_col.html#a16921c92359f31fbdc4968c875c90ae2',1,'fbgemm::PackAWithIm2Col::isA()'],['../classfbgemm_1_1_pack_a_with_row_offset.html#a16921c92359f31fbdc4968c875c90ae2',1,'fbgemm::PackAWithRowOffset::isA()'],['../classfbgemm_1_1_pack_a_with_quant_row_offset.html#a16921c92359f31fbdc4968c875c90ae2',1,'fbgemm::PackAWithQuantRowOffset::isA()']]], + ['ishypersparse_7',['IsHyperSparse',['../classfbgemm_1_1_compressed_sparse_column.html#a5deac9b9ff0e1d7b22c7a887d40b4c8c',1,'fbgemm::CompressedSparseColumn']]], + ['ispackingcompliant_8',['isPackingCompliant',['../classfbgemm_1_1_pack_weights_for_conv.html#a5e78c80fc33d5b40be198d920a194193',1,'fbgemm::PackWeightsForConv']]], + ['isprepacked_9',['isPrePacked',['../classfbgemm_1_1_pack_matrix.html#a354db6b6efb4336767afa41583a96082',1,'fbgemm::PackMatrix::isPrePacked()'],['../classfbgemm_1_1_pack_a_matrix.html#a354db6b6efb4336767afa41583a96082',1,'fbgemm::PackAMatrix::isPrePacked()'],['../classfbgemm_1_1_pack_b_matrix.html#a354db6b6efb4336767afa41583a96082',1,'fbgemm::PackBMatrix::isPrePacked()'],['../classfbgemm_1_1_pack_a_with_im2_col.html#a354db6b6efb4336767afa41583a96082',1,'fbgemm::PackAWithIm2Col::isPrePacked()'],['../classfbgemm_1_1_pack_a_with_row_offset.html#a354db6b6efb4336767afa41583a96082',1,'fbgemm::PackAWithRowOffset::isPrePacked()'],['../classfbgemm_1_1_pack_a_with_quant_row_offset.html#a354db6b6efb4336767afa41583a96082',1,'fbgemm::PackAWithQuantRowOffset::isPrePacked()']]], + ['istherecolremainder_10',['isThereColRemainder',['../classfbgemm_1_1_pack_matrix.html#a0fea05b14052070fcc8f2f5a9a829d0f',1,'fbgemm::PackMatrix']]], + ['isthislastkblock_11',['isThisLastKBlock',['../classfbgemm_1_1_pack_matrix.html#af38b0669b7bdf219aa56a9a587f4dbaa',1,'fbgemm::PackMatrix::isThisLastKBlock()'],['../classfbgemm_1_1_pack_b_matrix.html#a231aae141b5263a766275bb3236d297d',1,'fbgemm::PackBMatrix::isThisLastKBlock()']]], + ['isymm_12',['isYmm',['../namespacefbgemm.html#adac821292975979b386dc3ab1b234a37',1,'fbgemm']]], + ['iszmm_13',['isZmm',['../namespacefbgemm.html#a26137f070019d80935a34fe466ac85c4',1,'fbgemm']]] ]; diff --git a/search/functions_c.js b/search/functions_c.js index 9ff4cdfc7..4466cf3cc 100644 --- a/search/functions_c.js +++ b/search/functions_c.js @@ -1,5 +1,7 @@ var searchData= [ - ['metaequals_0',['metaEquals',['../classfbgemm_1_1_pack_b_matrix.html#aa03364175cb684a60f52bc80215e907b',1,'fbgemm::PackBMatrix']]], - ['mismatchingparams_1',['mismatchingParams',['../classfbgemm_1_1_pack_weights_for_conv.html#ac8508d632e224b9a8ee2432c5b012393',1,'fbgemm::PackWeightsForConv']]] + ['matmul_5fu8i8acc16_5fref_0',['matmul_u8i8acc16_ref',['../namespacefbgemm.html#a826abc2e81aabe4c0059dd34abe8de0d',1,'fbgemm']]], + ['matmul_5fu8i8acc32_5fref_1',['matmul_u8i8acc32_ref',['../namespacefbgemm.html#a28f838d0d9db9969debe82780183bd9a',1,'fbgemm']]], + ['metaequals_2',['metaEquals',['../classfbgemm_1_1_pack_b_matrix.html#aa03364175cb684a60f52bc80215e907b',1,'fbgemm::PackBMatrix']]], + ['mismatchingparams_3',['mismatchingParams',['../classfbgemm_1_1_pack_weights_for_conv.html#ac8508d632e224b9a8ee2432c5b012393',1,'fbgemm::PackWeightsForConv']]] ]; diff --git a/search/functions_e.js b/search/functions_e.js index 1fbcefb19..69b32a354 100644 --- a/search/functions_e.js +++ b/search/functions_e.js @@ -1,15 +1,17 @@ var searchData= [ ['pack_0',['pack',['../classfbgemm_1_1_pack_matrix.html#a0c1765c6a94482209b1fc0cd334ad44e',1,'fbgemm::PackMatrix::pack()'],['../classfbgemm_1_1_pack_a_matrix.html#a58a5c6b7f15d3c2a796942e383a97f36',1,'fbgemm::PackAMatrix::pack()'],['../classfbgemm_1_1_pack_b_matrix.html#ae2d8887226e140ed6ddc140cd338910d',1,'fbgemm::PackBMatrix::pack()'],['../classfbgemm_1_1_pack_weight_matrix_for_g_conv.html#a915ffc82b17862ab1d2a466a79d23a3f',1,'fbgemm::PackWeightMatrixForGConv::pack()'],['../classfbgemm_1_1_pack_a_with_im2_col.html#a58a5c6b7f15d3c2a796942e383a97f36',1,'fbgemm::PackAWithIm2Col::pack()'],['../classfbgemm_1_1_pack_a_with_row_offset.html#a58a5c6b7f15d3c2a796942e383a97f36',1,'fbgemm::PackAWithRowOffset::pack()'],['../classfbgemm_1_1_pack_a_with_quant_row_offset.html#a58a5c6b7f15d3c2a796942e383a97f36',1,'fbgemm::PackAWithQuantRowOffset::pack()'],['../structfbgemm_1_1_b_c_s_r_matrix.html#ae1871cae73e37637f6a2d65a14f0512f',1,'fbgemm::BCSRMatrix::pack(const DTYPE *src, size_t ld)'],['../structfbgemm_1_1_b_c_s_r_matrix.html#ac86c58878f6bcd10610f66eefbe53a90',1,'fbgemm::BCSRMatrix::pack(const DTYPE *src)']]], - ['packawithim2col_1',['PackAWithIm2Col',['../classfbgemm_1_1_pack_a_with_im2_col.html#a37d96dcba66f792135549702d2f25e4a',1,'fbgemm::PackAWithIm2Col']]], - ['packawithquantrowoffset_2',['PackAWithQuantRowOffset',['../classfbgemm_1_1_pack_a_with_quant_row_offset.html#a44ca398424d2d534802de6b892bf3a6a',1,'fbgemm::PackAWithQuantRowOffset']]], - ['packawithrowoffset_3',['PackAWithRowOffset',['../classfbgemm_1_1_pack_a_with_row_offset.html#a8dceb15ed761dfbf804244ffd2fc7f66',1,'fbgemm::PackAWithRowOffset']]], - ['packbmatrix_4',['PackBMatrix',['../classfbgemm_1_1_pack_b_matrix.html#a1afee702206695dfcd20de0474408b07',1,'fbgemm::PackBMatrix']]], - ['packedblock_5',['packedBlock',['../classfbgemm_1_1_pack_matrix.html#a9c6a626fc1b0a20479c167862d7a91be',1,'fbgemm::PackMatrix']]], - ['packedbuffersize_6',['packedBufferSize',['../classfbgemm_1_1_pack_matrix.html#ab11bd74e390ac73323a514cf2d6e6b98',1,'fbgemm::PackMatrix']]], - ['packedcolstart_7',['packedColStart',['../classfbgemm_1_1_pack_matrix.html#aa981736a44501513eb4c0f8cb72a11c8',1,'fbgemm::PackMatrix']]], - ['packedrowstart_8',['packedRowStart',['../classfbgemm_1_1_pack_matrix.html#ae9e47d9b93f5049504203ff55472e075',1,'fbgemm::PackMatrix']]], - ['packmatrix_9',['PackMatrix',['../classfbgemm_1_1_pack_matrix.html#ac15276b97315df2567c4ab36d48b8da0',1,'fbgemm::PackMatrix']]], - ['packweightmatrixforgconv_10',['PackWeightMatrixForGConv',['../classfbgemm_1_1_pack_weight_matrix_for_g_conv.html#ac4aac545b455c64f161fc78ac724d3e3',1,'fbgemm::PackWeightMatrixForGConv']]], - ['printpackedmatrix_11',['printPackedMatrix',['../classfbgemm_1_1_pack_matrix.html#a6efc512f013eacc053ed29b2dee722d5',1,'fbgemm::PackMatrix::printPackedMatrix()'],['../classfbgemm_1_1_pack_a_matrix.html#a6efc512f013eacc053ed29b2dee722d5',1,'fbgemm::PackAMatrix::printPackedMatrix()'],['../classfbgemm_1_1_pack_b_matrix.html#ab19db6d7505e9ed131b2a101f90d5093',1,'fbgemm::PackBMatrix::printPackedMatrix()'],['../classfbgemm_1_1_pack_a_with_im2_col.html#a6efc512f013eacc053ed29b2dee722d5',1,'fbgemm::PackAWithIm2Col::printPackedMatrix()'],['../classfbgemm_1_1_pack_a_with_row_offset.html#a6efc512f013eacc053ed29b2dee722d5',1,'fbgemm::PackAWithRowOffset::printPackedMatrix()'],['../classfbgemm_1_1_pack_a_with_quant_row_offset.html#a6efc512f013eacc053ed29b2dee722d5',1,'fbgemm::PackAWithQuantRowOffset::printPackedMatrix()']]] + ['packa_1',['PackA',['../namespacefbgemm.html#a28c69d65ed666a9f46dc4763be70cdf6',1,'fbgemm']]], + ['packawithim2col_2',['PackAWithIm2Col',['../classfbgemm_1_1_pack_a_with_im2_col.html#a37d96dcba66f792135549702d2f25e4a',1,'fbgemm::PackAWithIm2Col']]], + ['packawithquantrowoffset_3',['PackAWithQuantRowOffset',['../classfbgemm_1_1_pack_a_with_quant_row_offset.html#a44ca398424d2d534802de6b892bf3a6a',1,'fbgemm::PackAWithQuantRowOffset']]], + ['packawithrowoffset_4',['PackAWithRowOffset',['../classfbgemm_1_1_pack_a_with_row_offset.html#a8dceb15ed761dfbf804244ffd2fc7f66',1,'fbgemm::PackAWithRowOffset']]], + ['packbmatrix_5',['PackBMatrix',['../classfbgemm_1_1_pack_b_matrix.html#a1afee702206695dfcd20de0474408b07',1,'fbgemm::PackBMatrix']]], + ['packedblock_6',['packedBlock',['../classfbgemm_1_1_pack_matrix.html#a9c6a626fc1b0a20479c167862d7a91be',1,'fbgemm::PackMatrix']]], + ['packedbuffersize_7',['packedBufferSize',['../classfbgemm_1_1_pack_matrix.html#ab11bd74e390ac73323a514cf2d6e6b98',1,'fbgemm::PackMatrix']]], + ['packedcolstart_8',['packedColStart',['../classfbgemm_1_1_pack_matrix.html#aa981736a44501513eb4c0f8cb72a11c8',1,'fbgemm::PackMatrix']]], + ['packedrowstart_9',['packedRowStart',['../classfbgemm_1_1_pack_matrix.html#ae9e47d9b93f5049504203ff55472e075',1,'fbgemm::PackMatrix']]], + ['packmatrix_10',['PackMatrix',['../classfbgemm_1_1_pack_matrix.html#ac15276b97315df2567c4ab36d48b8da0',1,'fbgemm::PackMatrix']]], + ['packweightmatrixforgconv_11',['PackWeightMatrixForGConv',['../classfbgemm_1_1_pack_weight_matrix_for_g_conv.html#ac4aac545b455c64f161fc78ac724d3e3',1,'fbgemm::PackWeightMatrixForGConv']]], + ['printmatrix_12',['printMatrix',['../namespacefbgemm.html#adfee356e154f8b2f88c725885b1dbc11',1,'fbgemm']]], + ['printpackedmatrix_13',['printPackedMatrix',['../classfbgemm_1_1_pack_matrix.html#a6efc512f013eacc053ed29b2dee722d5',1,'fbgemm::PackMatrix::printPackedMatrix()'],['../classfbgemm_1_1_pack_a_matrix.html#a6efc512f013eacc053ed29b2dee722d5',1,'fbgemm::PackAMatrix::printPackedMatrix()'],['../classfbgemm_1_1_pack_b_matrix.html#ab19db6d7505e9ed131b2a101f90d5093',1,'fbgemm::PackBMatrix::printPackedMatrix()'],['../classfbgemm_1_1_pack_a_with_im2_col.html#a6efc512f013eacc053ed29b2dee722d5',1,'fbgemm::PackAWithIm2Col::printPackedMatrix()'],['../classfbgemm_1_1_pack_a_with_row_offset.html#a6efc512f013eacc053ed29b2dee722d5',1,'fbgemm::PackAWithRowOffset::printPackedMatrix()'],['../classfbgemm_1_1_pack_a_with_quant_row_offset.html#a6efc512f013eacc053ed29b2dee722d5',1,'fbgemm::PackAWithQuantRowOffset::printPackedMatrix()']]] ]; diff --git a/search/functions_f.js b/search/functions_f.js index 709e3aae6..a6a8ee423 100644 --- a/search/functions_f.js +++ b/search/functions_f.js @@ -1,4 +1,5 @@ var searchData= [ - ['quantizegroupwise_0',['QuantizeGroupwise',['../group__fbgemm-quant-utils-generic.html#ga7a5705b5705425abc8f72fe339c2ae91',1,'fbgemm']]] + ['quantize_0',['Quantize',['../namespacefbgemm.html#a3350c03dc2d62e8e434332d088f6a895',1,'fbgemm']]], + ['quantizegroupwise_1',['QuantizeGroupwise',['../group__fbgemm-quant-utils-generic.html#ga7a5705b5705425abc8f72fe339c2ae91',1,'fbgemm']]] ]; diff --git a/search/groups_0.js b/search/groups_0.js index 6d9180ff1..51f3b99c1 100644 --- a/search/groups_0.js +++ b/search/groups_0.js @@ -1,5 +1,7 @@ var searchData= [ - ['avx2_0',['Quantization Utilities (AVX2)',['../group__fbgemm-quant-utils-avx2.html',1,'']]], - ['avx512_1',['Quantization Utilities (AVX512)',['../group__fbgemm-quant-utils-avx512.html',1,'']]] + ['ai_20attention_0',['Experimental-gen-ai-attention',['../group__experimental-gen-ai-attention.html',1,'']]], + ['attention_1',['Experimental-gen-ai-attention',['../group__experimental-gen-ai-attention.html',1,'']]], + ['avx2_2',['Quantization Utilities (AVX2)',['../group__fbgemm-quant-utils-avx2.html',1,'']]], + ['avx512_3',['Quantization Utilities (AVX512)',['../group__fbgemm-quant-utils-avx512.html',1,'']]] ]; diff --git a/search/groups_3.js b/search/groups_3.js index 858143fb1..b7c85daf5 100644 --- a/search/groups_3.js +++ b/search/groups_3.js @@ -4,5 +4,6 @@ var searchData= ['embedding_20cuda_20operators_1',['Embedding CUDA Operators',['../group__embedding-cuda.html',1,'']]], ['embeddings_20operators_20cpu_2',['Permute Pooled Embeddings Operators (CPU)',['../group__permute-pooled-embs-cpu.html',1,'']]], ['embeddings_20operators_20cuda_3',['Permute Pooled Embeddings Operators (CUDA)',['../group__permute-pooled-embs-gpu.html',1,'']]], - ['example_20method_20group_4',['Example Method Group',['../group__example-method-group.html',1,'']]] + ['example_20method_20group_4',['Example Method Group',['../group__example-method-group.html',1,'']]], + ['experimental_20gen_20ai_20attention_5',['Experimental-gen-ai-attention',['../group__experimental-gen-ai-attention.html',1,'']]] ]; diff --git a/search/groups_4.js b/search/groups_4.js index 0989b6636..3d3d8935e 100644 --- a/search/groups_4.js +++ b/search/groups_4.js @@ -1,5 +1,6 @@ var searchData= [ - ['generic_0',['Quantization Utilities (Generic)',['../group__fbgemm-quant-utils-generic.html',1,'']]], - ['group_1',['Example Method Group',['../group__example-method-group.html',1,'']]] + ['gen_20ai_20attention_0',['Experimental-gen-ai-attention',['../group__experimental-gen-ai-attention.html',1,'']]], + ['generic_1',['Quantization Utilities (Generic)',['../group__fbgemm-quant-utils-generic.html',1,'']]], + ['group_2',['Example Method Group',['../group__example-method-group.html',1,'']]] ]; diff --git a/search/namespaces_0.js b/search/namespaces_0.js new file mode 100644 index 000000000..dd5bd2229 --- /dev/null +++ b/search/namespaces_0.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['fbgemm_0',['fbgemm',['../namespacefbgemm.html',1,'']]] +]; diff --git a/search/searchdata.js b/search/searchdata.js index 5801d1f78..51d806d81 100644 --- a/search/searchdata.js +++ b/search/searchdata.js @@ -2,26 +2,32 @@ var indexSectionsWithContent = { 0: "_abcdefghijklmnopqrstux", 1: "bcdeimprst", - 2: "_abcdefghijlmnpqrstux", - 3: "bgikmnor", - 4: "acdegijlmopqstu" + 2: "f", + 3: "_abcdefghijlmnpqrstux", + 4: "bgikmnor", + 5: "imo", + 6: "acdegijlmopqstu" }; var indexSectionNames = { 0: "all", 1: "classes", - 2: "functions", - 3: "variables", - 4: "groups" + 2: "namespaces", + 3: "functions", + 4: "variables", + 5: "enums", + 6: "groups" }; var indexSectionLabels = { 0: "All", 1: "Classes", - 2: "Functions", - 3: "Variables", - 4: "Modules" + 2: "Namespaces", + 3: "Functions", + 4: "Variables", + 5: "Enumerations", + 6: "Modules" }; diff --git a/searchindex.js b/searchindex.js index b48492544..c75a1e360 100644 --- a/searchindex.js +++ b/searchindex.js @@ -1 +1 @@ -Search.setIndex({"docnames": ["fbgemm-cpp-api/QuantUtils", "fbgemm-development/BuildInstructions", "fbgemm_gpu-cpp-api/embedding_ops", "fbgemm_gpu-cpp-api/input_combine", "fbgemm_gpu-cpp-api/jagged_tensor_ops", "fbgemm_gpu-cpp-api/layout_transform_ops", "fbgemm_gpu-cpp-api/memory_utils", "fbgemm_gpu-cpp-api/merge_pooled_embeddings", "fbgemm_gpu-cpp-api/quantize_ops", "fbgemm_gpu-cpp-api/sparse_ops", "fbgemm_gpu-cpp-api/split_table_batched_embeddings", "fbgemm_gpu-development/BuildInstructions", "fbgemm_gpu-development/InstallationInstructions", "fbgemm_gpu-development/TestInstructions", "fbgemm_gpu-overview/jagged-tensor-ops/JaggedTensorOps", "fbgemm_gpu-python-api/jagged_tensor_ops", "fbgemm_gpu-python-api/table_batched_embedding_ops", "general/ContactUs", "general/Contributing", "general/License", "general/documentation/Cpp", "general/documentation/Overview", "general/documentation/Python", "general/documentation/Sphinx", "index"], "filenames": ["fbgemm-cpp-api/QuantUtils.rst", "fbgemm-development/BuildInstructions.rst", "fbgemm_gpu-cpp-api/embedding_ops.rst", "fbgemm_gpu-cpp-api/input_combine.rst", "fbgemm_gpu-cpp-api/jagged_tensor_ops.rst", "fbgemm_gpu-cpp-api/layout_transform_ops.rst", "fbgemm_gpu-cpp-api/memory_utils.rst", "fbgemm_gpu-cpp-api/merge_pooled_embeddings.rst", "fbgemm_gpu-cpp-api/quantize_ops.rst", "fbgemm_gpu-cpp-api/sparse_ops.rst", "fbgemm_gpu-cpp-api/split_table_batched_embeddings.rst", "fbgemm_gpu-development/BuildInstructions.rst", "fbgemm_gpu-development/InstallationInstructions.rst", "fbgemm_gpu-development/TestInstructions.rst", "fbgemm_gpu-overview/jagged-tensor-ops/JaggedTensorOps.rst", "fbgemm_gpu-python-api/jagged_tensor_ops.rst", "fbgemm_gpu-python-api/table_batched_embedding_ops.rst", "general/ContactUs.rst", "general/Contributing.rst", "general/License.rst", "general/documentation/Cpp.rst", "general/documentation/Overview.rst", "general/documentation/Python.rst", "general/documentation/Sphinx.rst", "index.rst"], "titles": ["Quantization Utilities", "Build Instructions", "Embedding Operators", "Combine Input Operators", "Jagged Tensor Operators", "Layout Transformation Operators", "CUDA Memory Operators", "Pooled Embeddings Operators", "Quantization Operators", "Sparse Data Operators", "Table Batched Embedding Operators", "Build Instructions", "Installation Instructions", "Test Instructions", "Jagged Tensor Operators", "Jagged Tensor Operators", "Table Batched Embedding (TBE) Operators", "Contact Us", "Contributing", "License", "Adding Documentation to C++ Code", "Documentation", "Adding Documentation to Python Code", "Sphinx Documentation Pointers", "FBGEMM and FBGEMM_GPU Documentation Homepage"], "terms": {"templat": [0, 11, 20], "typenam": [0, 20], "t": [0, 1, 6, 9, 11, 16, 18, 20, 21], "layout_t": 0, "layout": [0, 24], "kcx": 0, "void": [0, 2, 6, 8, 10], "quantizegroupwis": 0, "const": [0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 22], "float": [0, 8, 15, 16, 20, 22], "src": 0, "int": [0, 8, 15, 16, 20, 22], "k": 0, "c": [0, 10, 12, 14, 19, 21, 22, 23], "x": [0, 4, 14, 20, 22], "g": [0, 1, 9, 11, 20, 22], "scale": 0, "std": [0, 3, 4, 5, 6, 7, 9, 10, 11, 20, 22], "int32_t": [0, 20, 22], "zero_point": 0, "dst": 0, "point": [0, 8, 15, 20, 22], "data": [0, 6, 14, 16, 19, 24], "type": [0, 1, 8, 12, 14, 15, 16, 20], "paramet": [0, 6, 8, 9, 15, 16, 20, 21, 22], "output": [0, 4, 8, 9, 15, 16, 20, 22], "int8_t": 0, "uint8_t": [0, 8, 10], "ar": [0, 1, 4, 10, 11, 12, 14, 15, 16, 19, 20, 21, 22], "support": [0, 1, 11, 12, 14, 22, 24], "input": [0, 4, 6, 8, 9, 14, 15, 16, 20, 24], "tensor": [0, 2, 3, 5, 6, 7, 8, 9, 10, 16, 21, 22, 24], "kxc": 0, "correspond": [0, 9, 10, 14, 20, 22], "kcr": 0, "kctr": 0, "weight": [0, 2, 9, 10, 16], "time": [0, 1, 11, 12, 14], "dimens": [0, 4, 6, 9, 14, 15, 16, 22], "krsc": 0, "ktrsc": 0, "channel": [0, 11, 12, 17], "number": [0, 1, 9, 11, 14, 15, 16, 21], "r": [0, 13, 21], "": [0, 1, 6, 11, 13, 14, 18, 20, 21, 22], "group": [0, 14, 20], "function": [0, 1, 11, 20, 22], "perform": [0, 1, 8, 9, 14, 24], "channelwis": 0, "1": [0, 1, 9, 10, 11, 12, 13, 14, 15, 16, 21, 22, 23], "groupwis": 0, "per": [0, 14], "size": [0, 1, 6, 8, 9, 14, 15, 16], "should": [0, 9, 10, 11, 12, 14, 18, 20, 21, 22], "equal": [0, 14, 22], "zero": [0, 15, 22], "reprsent": 0, "fusedquantizedequant": 0, "int64_t": [0, 2, 3, 4, 5, 6, 8, 9, 10], "len": [0, 14], "tensorquantizationparam": 0, "qparam": 0, "thread_id": 0, "0": [0, 1, 8, 9, 10, 11, 12, 14, 15, 16, 22], "num_thread": 0, "noise_ratio": 0, "0f": 0, "fuse": [0, 8, 16], "integ": [0, 6, 8, 14], "dequant": 0, "kernel": [0, 1, 6, 13, 24], "acceler": 0, "awar": 0, "train": [0, 16, 24], "fp32": [0, 8, 16], "valu": [0, 4, 6, 8, 9, 10, 15, 16, 20, 21, 22], "u": [0, 11, 23, 24], "int8": [0, 16], "us": [0, 1, 6, 9, 11, 13, 14, 16, 17, 18, 19, 20, 21, 22, 23, 24], "provid": [0, 1, 11, 12, 13, 19, 20, 21, 22, 24], "back": [0, 6, 10, 11, 12], "inputtyp": 0, "floatorhalftofusednbitrowwisequantizedsbhalf": 0, "bit_rat": [0, 8], "size_t": [0, 8, 20], "input_row": 0, "input_column": 0, "convert": [0, 6, 8, 14, 15, 22], "fp16": [0, 8, 16], "rowwis": [0, 8, 16], "bitrat": 0, "specifi": [0, 1, 8, 9, 11, 15, 16], "bit": [0, 8], "bia": [0, 8], "each": [0, 9, 11, 14, 15, 16, 22], "row": [0, 4, 10, 14, 15, 16, 22], "store": [0, 9, 10], "itself": [0, 14, 21], "end": [0, 12, 14, 23], "can": [0, 1, 8, 9, 11, 12, 14, 20, 21, 22, 23], "4": [0, 11, 12, 14, 15, 16, 22], "8": [0, 8, 11, 14, 16], "uint32_t": 0, "xor128": 0, "random": 0, "gener": [0, 1, 9, 11, 12, 20, 23], "9": [0, 11, 14, 16], "base": [0, 1, 9, 10, 11, 14], "thi": [0, 1, 4, 6, 7, 9, 11, 12, 14, 17, 18, 19, 20, 22, 23, 24], "paper": 0, "findminmax": 0, "m": [0, 11, 12, 13], "min": 0, "max": [0, 16], "find": [0, 10, 11], "matrix": [0, 1, 15, 24], "bool": [0, 6, 7, 8, 10, 16], "a_symmetr": 0, "b_symmetr": 0, "quantizationgranular": 0, "q_gran": 0, "has_bia": 0, "fuse_relu": 0, "bias_typ": 0, "direct": [0, 10, 12, 19, 20, 22, 23], "fals": [0, 6, 16, 21], "requantizeoutputprocessingavx2": 0, "out": [0, 11, 17, 19, 21], "inp": 0, "block_type_t": 0, "block": [0, 20, 22, 23], "ld_out": 0, "ld_in": 0, "requantizationparams_t": 0, "requant": 0, "avx2": [0, 1], "i": [0, 1, 4, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 20, 21, 22, 23, 24], "c_per_g": 0, "requantizeoutputprocessinggconvavx512": 0, "avx512": 0, "note": [1, 10, 11, 12, 20, 21, 22, 23], "The": [1, 6, 8, 9, 12, 13, 14, 15, 16, 18, 20, 21, 22, 23], "most": [1, 11, 12, 14, 21], "date": [1, 11, 12, 21], "embed": [1, 11, 12, 21, 24], "script": [1, 11, 12, 21], "bundl": [1, 11, 12, 21], "repo": [1, 11, 12, 21, 22], "under": [1, 11, 12, 18, 19, 21, 22], "setup_env": [1, 11, 12, 21], "bash": [1, 11, 12, 21], "step": [1, 11, 12, 14, 21, 22], "fbgemm_gpu": [1, 6, 14, 16, 17, 18, 19, 20, 22], "follow": [1, 9, 11, 12, 14, 19, 20, 21, 22], "toolchain": [1, 11, 12], "run": [1, 11, 12, 21], "cpu": [1, 6, 7, 13, 21], "higher": 1, "In": [1, 9, 11, 12, 14, 18, 20, 22], "doe": [1, 2, 12, 20, 21, 22], "have": [1, 9, 10, 11, 14, 21], "ani": [1, 9, 11, 15, 18, 19, 21, 22], "intel": 1, "mkl": 1, "howev": [1, 11, 14, 19], "comparison": 1, "some": [1, 11, 14, 21], "benchmark": 1, "If": [1, 11, 12, 16, 18, 20, 21, 22], "found": [1, 11, 12, 21], "path": [1, 11, 20, 23], "through": [1, 18, 20, 22], "intel_mkl_dir": 1, "variabl": 1, "built": [1, 11, 12, 21, 24], "report": [1, 12], "otherwis": [1, 6, 12, 19], "subset": 1, "all": [1, 9, 10, 11, 12, 14, 16, 19, 21], "three": [1, 14], "git": [1, 11], "submodul": [1, 11], "custom": [1, 23], "version": [1, 12], "desir": [1, 11, 14, 15, 20], "thei": [1, 11, 21, 23], "asmjit_src_dir": 1, "cpuinfo_src_dir": 1, "googletest_source_dir": 1, "With": 1, "inner": [1, 14], "take": [1, 11], "one": [1, 8, 9, 10, 15, 16, 20, 22], "doesn": 1, "fit": [1, 19], "approach": 1, "so": [1, 9, 11, 12, 14], "implement": [1, 11, 14], "dynam": 1, "effici": [1, 24], "shape": [1, 14, 16], "specif": [1, 9, 11, 16, 19], "vector": [1, 3, 4, 5, 6, 7, 15, 22], "code": [1, 11, 19, 21], "third": 1, "parti": 1, "call": [1, 6, 12], "detect": [1, 13], "runtim": [1, 11], "pytorch": [1, 14, 17, 21, 22, 24], "project": [1, 18], "dispatch": [1, 6], "optim": [1, 8, 16], "test": [1, 11, 12, 18, 24], "you": [1, 18, 20, 22], "don": [1, 9, 11, 21], "want": [1, 18], "togeth": [1, 20, 21], "default": [1, 9, 11, 12, 16], "turn": [1, 21], "off": [1, 12, 17], "simpli": [1, 11], "fbgemm_build_test": 1, "conda": [1, 13, 21], "For": [1, 13, 14, 17, 19, 20, 21, 22, 23], "platform": [1, 11, 19], "machin": [1, 11, 12, 13, 24], "microsoft": [1, 8], "visual": 1, "studio": 1, "2019": 1, "newer": [1, 11], "recommend": [1, 4, 11, 12, 14], "here": [1, 6, 11, 18, 20, 21, 22, 23], "necessari": [1, 11], "ninja": [1, 11], "etc": [1, 11, 16], "n": [1, 8, 11, 12, 23], "env_nam": [1, 11, 12], "y": [1, 4, 11, 12, 15, 21], "doxygen": [1, 20, 21], "make": [1, 10, 11, 18, 20, 21, 22], "openbla": 1, "packag": [1, 13, 21], "onli": [1, 9, 10, 13, 14, 18, 20, 21, 23], "clone": [1, 11], "along": [1, 11, 12], "its": [1, 6, 9, 11, 16, 19, 21, 23], "insid": [1, 11, 12, 13, 21, 23], "recurs": [1, 11], "http": [1, 11, 12, 18, 20, 21, 22], "github": [1, 11, 18], "com": [1, 11, 18], "cd": [1, 11, 13, 21], "assum": [1, 9], "process": [1, 4, 12, 14, 18, 22], "straightforward": 1, "creat": [1, 6, 11, 14, 18, 20, 22, 23], "directori": [1, 11, 13, 18, 20, 21], "mkdir": 1, "argument": [1, 9, 20, 21, 22], "build_arg": 1, "duse_sanit": 1, "address": [1, 11], "dfbgemm_library_typ": 1, "share": [1, 6], "dpython_execut": 1, "which": [1, 9, 11, 12, 14, 16, 21], "python3": [1, 12], "option": [1, 2, 4, 6, 10, 11, 15, 16], "document": [1, 6, 18, 19], "dfbgemm_build_doc": 1, "ON": [1, 19], "j": [1, 14], "verbos": 1, "As": [1, 9, 11, 12, 14], "write": [1, 11, 12, 21, 22], "fail": [1, 12, 13, 20], "due": [1, 11], "known": [1, 11, 16], "regress": 1, "To": [1, 11, 13, 23], "work": [1, 11, 12, 14, 18], "around": 1, "append": [1, 11, 20, 22], "export": [1, 11, 13], "prior": [1, 11, 12, 19], "cflag": 1, "wno": 1, "error": [1, 8, 12, 20, 21, 22], "mayb": 1, "uniniti": 1, "restrict": 1, "cxxflag": 1, "pleas": [1, 18, 20, 22], "see": [1, 6, 11, 12, 14, 20, 22, 23], "77939": 1, "1094": 1, "1666": 1, "more": [1, 6, 11, 16, 20, 22, 23], "detail": [1, 12], "exactli": 1, "same": [1, 6, 9, 11, 14, 15, 20, 21, 22], "extra": 1, "need": [1, 11, 12, 13, 14, 18, 20, 22, 23], "ad": [1, 18, 21], "invoc": [1, 11, 21], "llvm": [1, 11], "standard": [1, 11], "libc": 1, "openmp": [1, 11], "libomp": 1, "locat": [1, 6, 10, 11, 14], "cc_path": 1, "cxx_path": 1, "dcmake_c_compil": 1, "dcmake_cxx_compil": 1, "dcmake_c_flag": [1, 11], "fopenmp": 1, "stdlib": [1, 11], "conda_prefix": [1, 11], "includ": [1, 7, 11, 19, 20, 22], "dcmake_cxx_flag": [1, 11], "likewis": 1, "also": [1, 11, 16, 23], "veri": [1, 11, 20, 21, 22], "target": [1, 6, 8, 9, 11, 14, 20, 21, 22, 23], "architectur": [1, 11, 12], "bc": [1, 11], "x64": 1, "program": [1, 18], "file": [1, 11, 12, 17, 18, 20, 21, 22, 23], "x86": [1, 24], "enterpris": 1, "vc": 1, "auxiliari": 1, "vcvarsal": 1, "bat": 1, "build_dir": 1, "dfbgemm_build_benchmark": 1, "dcmake_build_typ": 1, "releas": [1, 12], "cl": 1, "ex": 1, "v": [1, 4, 13, 15], "int_nbit_split_embedding_codegen_lookup_funct": 2, "dev_weight": [2, 10], "uvm_weight": [2, 10], "weights_plac": [2, 10], "weights_offset": [2, 10], "weights_ti": [2, 10], "d_offset": [2, 8, 10], "total_d": [2, 10, 16], "max_int2_d": 2, "max_int4_d": 2, "max_int8_d": 2, "max_float16_d": 2, "max_float32_d": 2, "indic": [2, 10, 14, 16], "offset": [2, 4, 9, 10, 15, 16], "pooling_mod": [2, 16], "c10": [2, 4, 6, 8, 10], "indice_weight": 2, "output_dtyp": [2, 8, 16], "lxu_cache_weight": [2, 10], "lxu_cache_loc": [2, 10], "row_align": [2, 10], "max_float8_d": 2, "fp8_exponent_bit": 2, "fp8_exponent_bia": 2, "int_nbit_split_embedding_uvm_caching_codegen_lookup_funct": 2, "cache_hash_size_cumsum": [2, 10], "total_cache_hash_s": [2, 10], "cache_index_table_map": [2, 10], "lxu_cache_st": [2, 10], "lxu_stat": 2, "simlar": 2, "uvm_cach": 2, "lookup": [2, 10], "pruned_hashmap_lookup_cuda": 2, "hash_tabl": 2, "hash_table_offset": 2, "pruned_array_lookup_cuda": 2, "index_remap": 2, "index_remappings_offset": 2, "bounds_check_indices_cuda": 2, "rows_per_t": 2, "bounds_check_mod": [2, 16], "warn": [2, 16, 20], "b_ofset": 2, "max_b": [2, 10], "int_nbit_split_embedding_codegen_lookup_function_cpu": 2, "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu": 2, "pruned_hashmap_insert_unweighted_cpu": 2, "dense_indic": 2, "pruned_hashmap_lookup_unweighted_cpu": 2, "pruned_array_lookup_cpu": 2, "tupl": [3, 4, 9, 10, 16], "tbe_input_combine_cpu": 3, "indices_list": 3, "offsets_list": 3, "per_sample_weight": [3, 16], "include_last_offset": 3, "padding_fused_tbe_input_combine_cpu": 3, "batch_siz": 3, "solv": 4, "issu": [4, 6, 11, 12, 17], "when": [4, 9, 11, 13, 14, 16, 20, 21, 23], "differ": [4, 9, 14], "length": [4, 9, 15, 16, 22], "often": 4, "occur": [4, 20], "spars": [4, 14, 24], "featur": [4, 9, 14, 16, 17], "system": [4, 11, 12, 14], "well": [4, 9, 11, 20], "natur": [4, 14], "languag": [4, 14, 23], "batch": [4, 9, 14, 15, 24], "jagged_to_padded_dense_forward": 4, "symintarrayref": 4, "max_length": [4, 15], "doubl": [4, 8, 9], "padding_valu": [4, 15], "jagged_dense_elementwise_add_jagged_output_cuda": 4, "x_valu": [4, 15], "x_offset": [4, 15, 22], "where": [4, 6, 9, 14, 15, 16], "dens": [4, 15, 22], "jagged_to_padded_dens": [4, 15], "jagged_dense_elementwise_add": [4, 15], "jagged_dense_elementwise_mul": [4, 15], "batched_dense_vec_jagged_2d_mul": [4, 15], "a_valu": [4, 15], "a_offset": [4, 15], "dense_to_jag": [4, 15], "symint": 4, "total_l": [4, 15], "jagged_dense_elementwise_add_jagged_output": [4, 15], "jagged_1d_to_dens": [4, 15], "max_l": 4, "jagged_2d_to_dens": [4, 11, 12, 15, 21, 22], "max_sequence_length": [4, 15, 22], "recat_embedding_grad_output_cuda": 5, "grad_output": 5, "num_features_per_rank": 5, "recat_embedding_grad_output_mixed_d_cuda": 5, "dim_sum_per_rank": 5, "recat_embedding_grad_output_mixed_d_batch_cuda": 5, "cumsum_dim_sum_per_rank": 5, "recat_embedding_grad_output_mixed_d_cpu": 5, "new_managed_tensor": 6, "self": 6, "alloc": [6, 20], "an": [6, 9, 12, 13, 14, 16, 20, 21, 22, 23], "unifi": 6, "manag": [6, 11, 12, 16], "uvm": [6, 13], "Then": 6, "set": [6, 10, 13, 14, 15, 16], "prefer": [6, 12], "storag": [6, 8, 10], "host": [6, 11], "establish": 6, "map": [6, 9, 10, 14, 16], "devic": [6, 7, 11, 13, 16], "return": [6, 8, 9, 15, 16, 20, 21, 22], "A": [6, 8, 12, 14, 15, 16, 19, 20, 21, 22], "new": [6, 8, 10, 20, 21, 22], "new_managed_tensor_meta": 6, "placehold": 6, "meta": [6, 19], "kei": 6, "empti": [6, 14, 15, 23], "new_host_mapped_tensor": 6, "new_unified_tensor": 6, "is_host_map": 6, "either": [6, 8, 9, 11, 12], "whether": [6, 11, 19], "depend": [6, 8, 11, 12, 14], "new_vanilla_managed_tensor": 6, "allow": [6, 11], "automat": [6, 9, 13, 21], "uvm_storag": 6, "check": [6, 16], "gpu": [6, 11, 12, 13, 24], "true": [6, 16], "is_uvm_tensor": 6, "BUT": [6, 19], "non": [6, 16], "uvm_to_cpu": 6, "effect": [6, 14], "move": 6, "from": [6, 8, 9, 10, 11, 12, 13, 14, 16, 18, 19, 20, 21, 22, 23], "uvm_to_devic": 6, "prototyp": 6, "whose": 6, "uvm_cuda_mem_advis": 6, "cuda_memory_advis": 6, "cudamemadvis": 6, "cudamemoryadvis": 6, "enum": [6, 8], "avail": [6, 11, 13, 21], "python": [6, 11, 13, 20, 21, 23], "side": [6, 20, 22, 24], "namespac": 6, "over": [6, 11], "valid": 6, "inform": [6, 14, 22, 23], "uvm_cuda_mem_prefetch_async": 6, "device_t": 6, "cudamemprefetchasync": 6, "prefetch": 6, "destin": 6, "uvm_mem_advice_dont_fork": 6, "madvis": 6, "madv_dontfork": 6, "workaround": 6, "driver": [6, 11], "un": 6, "page": [6, 18, 23, 24], "tabl": [6, 9, 14, 24], "fork": [6, 18], "caus": [6, 11, 12, 19, 21], "slowdown": 6, "next": [6, 14, 20, 22], "access": [6, 16], "uvm_to_cpu_clon": 6, "copi": 6, "contigu": [6, 9], "singl": [6, 8], "thread": 6, "memcpi": 6, "contain": [6, 11, 14, 15, 16, 22], "section": [7, 11, 22], "cuda": [7, 16, 24], "variou": 7, "all_to_one_devic": 7, "inputtensor": 7, "target_devic": 7, "permute_pooled_embs_split_gpu": 7, "pooled_emb": 7, "offset_dim_list": 7, "permute_list": 7, "inv_offset_dim_list": 7, "inv_permute_list": 7, "permute_pooled_embs_auto_grad_split_gpu": 7, "permute_pooled_embs_auto_grad_gpu": 7, "permute_pooled_embs_cpu_impl": 7, "allow_dupl": 7, "permute_pooled_embs_split_cpu": 7, "permute_pooled_embs_auto_grad_split_cpu": 7, "permute_pooled_embs_auto_grad": 7, "permute_pooled_embs_auto_grad_cpu": 7, "model": [8, 9], "techniqu": 8, "reduc": 8, "larg": [8, 11], "order": [8, 14, 18], "achiev": [8, 12], "better": [8, 20], "small": 8, "loss": [8, 19], "accuraci": 8, "_float_to_bfloat16_gpu": 8, "brain": 8, "bfloat16": 8, "_bfloat16_to_float_gpu": 8, "_float_to_fp8rowwise_gpu": 8, "forward": 8, "fp8": 8, "dtype": [8, 16], "sparsetyp": [8, 16], "bf16": 8, "throw": [8, 20], "_fp8rowwise_to_float_gpu": 8, "represent": [8, 14], "_float_to_fused8bitrowwise_gpu": 8, "_half_to_fused8bitrowwise_gpu": 8, "half": 8, "_single_or_half_precision_to_fused8bitrowwise_gpu": 8, "_fused8bitrowwise_to_float_gpu": 8, "_fused8bitrowwise_to_half_gpu": 8, "_fused8bitrowwise_to_single_or_half_precision_gpu": 8, "scale_bias_last": 8, "quant_padding_float_typ": 8, "_fused8bitrowwise_to_float_mixed_dim_gpu": 8, "kfloat": 8, "khalf": 8, "_float_to_fusednbitrowwise_gpu": 8, "_half_to_fusednbitrowwise_gpu": 8, "_single_or_half_precision_to_fusednbitrowwise_gpu": 8, "_fusednbitrowwise_to_float_gpu": 8, "_fusednbitrowwise_to_half_gpu": 8, "_fusednbitrowwise_to_single_or_half_precision_gpu": 8, "_float_to_hfp8_gpu": 8, "ebit": 8, "exponent_bia": 8, "max_po": 8, "hybrid": 8, "hfp8": 8, "_hfp8_to_float_gpu": 8, "_float_to_msfp_gpu": 8, "bounding_box_s": 8, "mbit": 8, "min_po": 8, "msfp": 8, "_msfp_to_float_gpu": 8, "_float_to_paddedfp8rowwise_gpu": 8, "row_dim": 8, "pad": [8, 14, 15, 22], "_paddedfp8rowwise_to_float_gpu": 8, "output_last_dim": 8, "_fused8bitrowwise_to_float_cpu_out": 8, "_float_to_fused8bitrowwise_cpu_out": 8, "float_to_fused8bitrowwise_cpu": 8, "half_to_fused8bitrowwise_cpu": 8, "float_or_half_to_fused8bitrowwise_cpu": 8, "fused8bitrowwise_to_float_cpu": 8, "fused8bitrowwise_to_half_cpu": 8, "fused8bitrowwise_to_float_or_half_cpu": 8, "float_to_fp8rowwise_cpu": 8, "fp8rowwise_to_float_cpu": 8, "fusednbitrowwise_to_float_cpu": 8, "fusednbitrowwise_to_half_cpu": 8, "fusednbitrowwise_to_float_or_half_cpu": 8, "floattofp8quantized_ref": 8, "nrow": 8, "ncol": 8, "fp8quantizedtofloat_ref": 8, "expand_into_jagged_permute_cuda": 9, "permut": 9, "input_offset": 9, "output_offset": 9, "output_s": 9, "expand_into_jagged_permut": 9, "expand": 9, "index": [9, 10, 11, 12, 14, 20, 22], "case": [9, 11, 12, 14, 18], "ha": [9, 12, 14, 18, 20, 21], "across": [9, 11], "rank": [9, 14], "level": 9, "exclus": 9, "op": [9, 12, 15, 22], "bag": [9, 16, 24], "posit": [9, 16], "sit": 9, "after": [9, 11, 12, 13, 14, 16, 21, 22, 23], "we": [9, 11, 14, 18], "deriv": [9, 14, 19], "arrai": [9, 15, 22], "comput": [9, 11, 12, 16], "formula": 9, "output_permut": 9, "table_offset": 9, "bag_offset": 9, "histogram_binning_calibration_cpu": 9, "logit": 9, "bin_num_exampl": 9, "bin_num_posit": 9, "positive_weight": 9, "lower_bound": 9, "upper_bound": 9, "bin_ctr_in_use_aft": 9, "bin_ctr_weight_valu": 9, "divid": [9, 14], "predict": 9, "rang": [9, 14], "e": [9, 11, 14, 20, 22, 23], "b": [9, 11, 14, 15, 16, 20, 21, 22, 23], "bin": [9, 11], "two": [9, 14, 15, 16, 21], "exampl": [9, 11, 12, 13, 15, 16, 20, 21, 22, 23], "fall": [9, 11, 12], "bucket": [9, 11], "basic": [9, 22], "histogram": 9, "result": [9, 11, 15], "statist": 9, "real": 9, "ctr": 9, "num_po": 9, "num_exampl": 9, "final": 9, "calibr": 9, "pre": [9, 11, 12], "cali": 9, "wai": [9, 19], "within": 9, "suffici": [9, 18, 21], "That": 9, "fine": 9, "grain": 9, "modul": [9, 12, 16, 22], "theoret": 9, "layer": 9, "fix": [9, 11], "uncalibr": 9, "befor": [9, 11, 16, 23], "appli": [9, 11, 14, 16], "sigmoid": 9, "calibart": 9, "pass": [9, 16, 18, 21], "lower": 9, "bound": [9, 14], "calibration_target": 9, "observ": 9, "sum": [9, 15, 16], "statisct": 9, "final_calibrated_predict": 9, "bin_ctr_weight": 9, "bin_ctr": 9, "calibrated_predict": 9, "bin_id": 9, "generic_histogram_binning_calibration_by_feature_cpu": 9, "segment_valu": 9, "segment_length": 9, "num_seg": 9, "bin_boundari": 9, "extens": [9, 20, 21], "ectr": 9, "abov": [9, 12, 14, 19, 20, 22, 23], "accept": [9, 18], "sort": [9, 10, 11], "keyjaggedtensor": 9, "num_bin": 9, "longer": [9, 17, 20], "still": [9, 11], "parambin_ctr_weight_valu": 9, "get_unique_indices_cuda": 10, "linear_indic": 10, "max_indic": 10, "compute_count": 10, "dedupl": 10, "pair": [10, 23], "lru_cache_find_uncached_cuda": 10, "unique_indic": 10, "unique_indices_length": 10, "time_stamp": 10, "lru_stat": 10, "gather_cache_stat": 10, "uvm_cache_stat": 10, "lock_cache_lin": 10, "lxu_cache_locking_count": 10, "lru": [10, 16], "cach": [10, 11, 16], "uncach": 10, "them": 10, "host_lxu_cache_slot": 10, "h_in": 10, "cache_set": [10, 16], "linearize_cache_indices_cuda": 10, "b_offset": 10, "linear": 10, "uniqu": [10, 23], "linearize_cache_indices_from_row_idx_cuda": 10, "update_table_indic": 10, "update_row_indic": 10, "format": [10, 21, 22], "inplac": 10, "updat": [10, 11, 12, 16, 18], "lru_cache_populate_cuda": 10, "hash_size_cumsum": 10, "linear_cache_indic": 10, "stochastic_round": [10, 16], "fetch": 10, "insert": [10, 23], "timestep": 10, "lru_cache_populate_byte_cuda": 10, "byte": 10, "element": [10, 14], "direct_mapped_lru_cache_populate_byte_cuda": 10, "lxu_cache_miss_timestamp": 10, "assoc": 10, "variant": [10, 11, 12, 21], "lfu_cache_populate_cuda": 10, "lfu_stat": 10, "lfu": [10, 16], "lfu_cache_populate_byte_cuda": 10, "lxu_cache_lookup_cuda": 10, "invalid_index": 10, "num_uniq_cache_indic": 10, "lxu_cache_locations_output": 10, "look": [10, 16], "up": [10, 16], "slot": 10, "sentinel": 10, "miss": [10, 11], "direct_mapped_lxu_cache_lookup_cuda": 10, "lxu_cache_flush_cuda": 10, "flush": 10, "reset_weight_momentum_cuda": 10, "momentum1_dev": 10, "momentum1_uvm": 10, "momentum1_plac": 10, "momentum1_offset": 10, "pruned_indic": 10, "pruned_indices_offset": 10, "logical_table_id": 10, "buffer_id": 10, "lxu_cache_locking_counter_decrement_cuda": 10, "decrement": 10, "counter": 10, "lxu_cache_locations_update_cuda": 10, "lxu_cache_locations_new": 10, "fbgemm": [11, 12, 15, 17, 18, 19, 21, 22], "reproduc": [11, 12, 18, 19], "platform_nam": 11, "unam": 11, "prefix": [11, 23], "miniconda_prefix": 11, "home": 11, "download": [11, 12], "wget": 11, "q": 11, "anaconda": 11, "miniconda3": 11, "latest": 11, "sh": 11, "o": [11, 12], "p": 11, "load": [11, 14, 22], "shortcut": 11, "bashrc": 11, "command": [11, 12, 20, 21], "against": [11, 13], "env": [11, 12], "name": [11, 12, 19, 20, 22], "python_vers": 11, "3": [11, 14, 15, 16, 19, 22], "12": [11, 14, 16], "upgrad": 11, "pyopenssl": 11, "22": [11, 14], "requir": [11, 12, 13, 14, 16, 21, 22], "recent": [11, 12], "nvcc": 11, "capabl": [11, 13], "5": [11, 14, 16], "done": [11, 12], "bare": 11, "metal": 11, "neither": [11, 19], "nor": [11, 19], "nvidia": 11, "present": [11, 22], "sinc": [11, 14], "setup": [11, 12], "pull": [11, 12, 21], "linux": [11, 12], "distribut": [11, 19], "ubuntu": 11, "04": 11, "11": [11, 12, 14], "entrypoint": 11, "devel": 11, "ubuntu22": 11, "rest": [11, 12], "mai": [11, 12, 14, 19], "construct": [11, 12, 14], "mechan": 11, "full": [11, 12, 23], "nvml": 11, "org": [11, 12, 22], "cuda_vers": 11, "label": 11, "verifi": [11, 12, 20, 22], "cuda_runtim": 11, "h": [11, 15, 20], "libnvidia": [11, 12], "ml": [11, 12], "printenv": 11, "extract": 11, "given": [11, 14, 15], "url": [11, 12], "builder": 11, "blob": 11, "main": [11, 18], "common": [11, 12, 14, 22], "install_cuda": 11, "cudnn_url": 11, "redist": 11, "x86_64": 11, "2": [11, 12, 14, 15, 16, 20, 22, 23], "26_cuda12": 11, "archiv": 11, "tar": 11, "xz": 11, "unpack": 11, "xvf": 11, "amd": [11, 12], "minim": 11, "6": [11, 12, 14], "termin": 11, "while": [11, 21], "come": 11, "reason": [11, 12, 21], "oper": [11, 12, 24], "guid": [11, 22], "disabl": 11, "apt": 11, "prompt": 11, "debian_frontend": 11, "noninteract": 11, "db": 11, "radeon": 11, "amdgpu": 11, "focal": 11, "install_5": 11, "50601": 11, "1_all": 11, "deb": 11, "usecas": 11, "hiplibsdk": 11, "dkm": 11, "hipifi": 11, "hip": 11, "dev": 11, "20": 11, "sysroot": 11, "avoid": 11, "glibcxx": 11, "fbgemm_cpu": 11, "10": [11, 12, 14], "keep": 11, "older": [11, 12], "gcc_version": 11, "15": 11, "7": [11, 12, 14, 15, 16], "forg": [11, 21], "gxx_linux": 11, "64": [11, 14], "sysroot_linux": 11, "17": 11, "binari": [11, 19], "cento": 11, "stream": 11, "becaus": [11, 14], "librari": [11, 21, 24], "refer": [11, 14, 21, 22], "libstdc": 11, "what": [11, 21], "libcxx_path": 11, "print": [11, 12, 16, 22], "objdump": 11, "tc": 11, "grep": 11, "glibc_": 11, "sed": 11, "vu": 11, "cat": 11, "glibcxx_": 11, "It": [11, 12, 14], "possibl": [11, 14, 18, 19], "just": 11, "do": [11, 12, 18], "llvm_version": 11, "libcxx": 11, "outdat": 11, "aarch64": [11, 12], "cannot": 11, "explicitli": 11, "clangxx": 11, "rt": 11, "lib": [11, 12], "ld_library_path": [11, 12], "config": 11, "var": 11, "nvcc_prepend_flag": 11, "correctli": [11, 12, 13, 20, 21], "xcompil": 11, "ccbin": 11, "clangxx_path": 11, "unsupport": 11, "even": [11, 19], "though": [11, 12], "libstd": 11, "being": [11, 21], "mean": [11, 14, 16], "regardless": 11, "scenario": 11, "first": [11, 20, 22, 23], "binpath": 11, "overrid": 11, "exist": [11, 20, 22], "ln": 11, "sf": 11, "path_to_either_gcc_or_clang": 11, "cc": 11, "These": 11, "later": 11, "configur": [11, 20], "stage": [11, 14], "cmake": 11, "click": 11, "hypothesi": [11, 12], "jinja2": 11, "ncurs": 11, "numpi": [11, 12], "scikit": [11, 12], "offici": 11, "homepag": 11, "authorit": [11, 12, 21], "how": [11, 12, 13, 22], "nightli": [11, 12], "rc": 11, "without": [11, 19], "alwai": 11, "reliabl": 11, "arriv": 11, "hour": 11, "than": [11, 14], "window": 11, "silent": 11, "both": [11, 17, 19, 21], "place": [11, 16], "artifact": 11, "select": 11, "dure": [11, 14, 16, 22], "thu": [11, 16], "import": [11, 12, 16, 22, 23], "much": [11, 20], "determinist": 11, "torch": [11, 12, 15, 16, 21, 22], "whl": [11, 12], "cu121": [11, 12], "rocm5": [11, 12], "ensur": [11, 12, 18], "properli": 11, "__version__": 11, "minimum": [11, 20, 21, 22], "cuda_cmake_macro": 11, "txt": [11, 13, 21, 23], "tag": [11, 20, 23], "fbgemm_vers": 11, "v0": 11, "fbgemm_": 11, "addit": [11, 13, 14, 15], "flow": 11, "state": 11, "becom": 11, "stale": 11, "problem": 11, "re": [11, 12], "attempt": 11, "failur": [11, 12], "clear": [11, 18], "py": [11, 12, 13, 21, 22], "clean": [11, 21], "must": [11, 12, 13, 14, 16, 19, 23], "package_nam": 11, "fbgemm_gpu_": 11, "convent": 11, "major": 11, "minor": 11, "py312": 11, "python_tag": 11, "determin": [11, 14], "processor": 11, "arch": 11, "python_plat_nam": 11, "manylinux2014_": 11, "maco": 11, "macosx_10_9_": 11, "arm64": 11, "macosx_11_0_": 11, "win_": 11, "cpu_onli": 11, "flag": [11, 21], "bdist_wheel": 11, "package_vari": 11, "plat": 11, "instead": [11, 21], "cxxprefix": 11, "presum": 11, "made": [11, 21], "presenc": 11, "similar": [11, 14, 16], "enabl": [11, 13], "been": [11, 20], "unabl": 11, "cudacxx": 11, "cuda_bin_path": 11, "cub": 11, "applic": [11, 16, 20, 22], "cub_dir": 11, "header": [11, 20, 23], "cudnn_include_dir": 11, "cudnn_librari": 11, "nvml_lib_path": 11, "sm70": [11, 12], "80": 11, "v100": [11, 12], "a100": [11, 12], "current": [11, 12, 14, 16], "cuda_arch_list": 11, "unset": 11, "torch_cuda_arch_list": 11, "preced": 11, "dtorch_cuda_arch_list": 11, "rocm_path": 11, "pytorch_rocm_arch": 11, "gfx906": 11, "gfx908": 11, "gfx90a": 11, "wiki": 11, "gentoo": 11, "list": [11, 14, 15, 16, 19, 20, 22], "rocminfo": 11, "gfx": 11, "dhip_root_dir": 11, "dtorch_use_hip_dsa": 11, "complet": [11, 18, 21], "actual": 11, "correct": 11, "lot": 11, "jinja": 11, "instanti": 11, "sure": [11, 18, 20, 22], "accident": 11, "cours": 11, "fbgemm_gpu_lib_path": 11, "fbgemm_gpu_pi": [11, 12], "defin": [11, 14, 20], "nm": 11, "gdcu": 11, "referenc": 11, "certain": 11, "gdc": 11, "merge_pooled_embed": [11, 12], "isol": [12, 21], "build": [12, 13, 20, 22, 24], "sm80": 12, "respect": 12, "other": [12, 14, 19, 20, 21, 22], "scratch": 12, "guarante": 12, "especi": 12, "displai": [12, 23], "smi": 12, "515": 12, "76": 12, "persist": 12, "bu": [12, 23], "id": 12, "disp": 12, "volatil": 12, "uncorr": 12, "ecc": 12, "fan": 12, "temp": 12, "perf": 12, "pwr": 12, "usag": [12, 21, 22], "cap": 12, "memori": [12, 16, 24], "util": [12, 24], "mig": 12, "a10g": 12, "00000000": 12, "00": 12, "1e": 12, "31c": 12, "p0": 12, "59w": 12, "300w": 12, "0mib": 12, "23028mib": 12, "gi": 12, "ci": 12, "pid": 12, "No": 12, "expos": 12, "onc": [12, 18], "imag": 12, "launch": 12, "alreadi": [12, 18, 20, 22], "toolkit": 12, "interfac": 12, "concis": 12, "info": [12, 20, 22], "dieedg": 12, "avgpwr": 12, "sclk": 12, "mclk": 12, "pwrcap": 12, "vram": 12, "33": 12, "0c": 12, "37": 12, "0w": 12, "300mhz": 12, "1200mhz": 12, "auto": [12, 21], "290": 12, "32": 12, "39": 12, "log": 12, "difficult": 12, "relev": [12, 20], "link": [12, 21], "encount": 12, "signatur": [12, 21], "traceback": 12, "last": 12, "root": [12, 18], "miniconda": 12, "mycondaenv": 12, "site": 12, "_op": [12, 21], "line": [12, 22, 23], "565": 12, "__getattr__": 12, "overload_nam": 12, "_c": 12, "_jit_get_oper": 12, "qualified_op_nam": 12, "runtimeerror": 12, "except": [12, 20, 22], "wa": 12, "string": [12, 23], "post47": 12, "py3": 12, "egg": 12, "__init__": [12, 22], "21": 12, "_fbgemm_gpu_doc": 12, "noqa": 12, "f401": 12, "e402": 12, "18": 12, "569": 12, "rais": [12, 22], "attributeerror": [12, 22], "_opnamespac": 12, "object": [12, 14], "attribut": [12, 22], "cli": 12, "main_run": 12, "execut": [12, 13], "47": 12, "_zn6fbgemm48floatorhalftofusednbitrowwisequantizedsbhalfavx2itli2eeevpkt_miph": 12, "appear": 12, "libtorch": 12, "visibl": 12, "incorrectli": [12, 21], "declar": [12, 20], "were": [12, 15], "pr": [12, 20, 21, 22], "1618": 12, "former": 12, "resolv": 12, "manual": [12, 20], "latter": 12, "seriou": 12, "tha": 12, "develop": [12, 21], "bench": 13, "good": [13, 19], "instal": [13, 21, 24], "pip": [13, 21], "pytest": 13, "rsx": 13, "w": 13, "ignor": [13, 16, 21], "pytestcollectionwarn": 13, "split_table_batched_embeddings_test": 13, "quantize_ops_test": 13, "sparse_ops_test": 13, "split_embedding_inference_converter_test": 13, "mode": [13, 16], "cuda_visible_devic": 13, "debug": 13, "cuda_launch_block": 13, "fbgemm_test_with_rocm": 13, "hip_launch_block": 13, "split_table_batched_embeddings_benchmark": 13, "purpos": [14, 15, 16, 19], "handl": 14, "consecut": 14, "nestedtensor": 14, "raggedtensor": 14, "tensorflow": 14, "notabl": 14, "token": 14, "sentenc": 14, "repres": 14, "maxlength": 14, "2d": [14, 15, 16, 22], "numel": 14, "greatest": 14, "divisor": 14, "smallest": 14, "sub": 14, "exclud": 14, "partit": 14, "impli": [14, 19], "denot": [14, 20, 22], "offest": 14, "outer": 14, "would": 14, "begin": 14, "maximum": [14, 15, 22], "between": [14, 20, 21, 23], "normal": 14, "densor": 14, "form": [14, 19], "figur": 14, "below": 14, "show": [14, 21], "accomod": 14, "logic": [14, 20], "At": [14, 20, 21, 22], "wise": [14, 16], "multipl": [14, 15, 16, 22, 24], "hadamard": 14, "product": [14, 19], "involv": 14, "bmatrix": 14, "rightarrow": 14, "16": 14, "25": 14, "36": 14, "49": 14, "81": 14, "50": 14, "operand": 14, "word": 14, "ax": 14, "properti": 14, "hold": 14, "elementwis": [14, 15], "equival": 14, "d": [14, 15, 23], "start": [14, 15, 22, 23], "dim": 14, "onto": 14, "part": 14, "everi": 14, "those": [14, 15, 18, 22], "converson": 14, "could": 14, "lead": 14, "read": [14, 16], "relat": 14, "smaller": 14, "expect": 14, "happen": 14, "give": 14, "situat": 14, "like": 14, "dense_tensor": 14, "jagged_tensor": 14, "break": 14, "exact": 14, "usual": 14, "1d": [15, 16, 22], "area": 15, "outsid": 15, "coverag": 15, "total": [15, 16], "identit": 15, "add": [15, 18, 20, 21, 22], "structur": 15, "jagged_dense_dense_elementwise_add_jagged_output": 15, "y_0": 15, "y_1": 15, "multipli": [15, 16], "max_n": 15, "matmul": 15, "stacked_jagged_1d_to_dens": 15, "arg": [15, 22], "kwarg": 15, "stacked_jagged_2d_to_dens": 15, "split_table_batched_embeddings_op": 16, "splittablebatchedembeddingbagscodegen": 16, "embedding_spec": 16, "feature_table_map": 16, "none": 16, "cache_algorithm": 16, "cachealgorithm": 16, "cache_load_factor": 16, "cache_reserved_memori": 16, "cache_precis": 16, "weights_precis": 16, "enforce_hbm": 16, "optimtyp": 16, "exact_sgd": 16, "record_cache_metr": 16, "gradient_clip": 16, "max_gradi": 16, "learning_r": 16, "01": 16, "ep": 16, "0e": 16, "momentum": 16, "weight_decai": 16, "weight_decay_mod": 16, "weightdecaymod": 16, "eta": 16, "001": 16, "beta1": 16, "beta2": 16, "999": 16, "poolingmod": 16, "boundscheckmod": 16, "sourc": [16, 18, 19, 20, 21, 22], "backward": 16, "embeddingloc": 16, "computedevic": 16, "spec": 16, "placement": 16, "lxu": 16, "algorithm": 16, "capac": 16, "amount": 16, "reserv": [16, 19], "hbm": 16, "adam": 16, "exact_adagrad": 16, "exact_rowwise_adagrad": 16, "lamb": 16, "lars_sgd": 16, "partial_rowwise_adam": 16, "partial_rowwise_lamb": 16, "sgd": 16, "recordcachemetr": 16, "record": 16, "hit": 16, "request": [16, 17, 21], "record_cache_miss_count": 16, "metric": 16, "record_tablewise_cache_miss": 16, "stochast": 16, "round": 16, "gradient": 16, "clip": 16, "learn": 16, "rate": 16, "epsilon": 16, "adagrad": 16, "lar": 16, "decai": 16, "l2": 16, "decoupl": 16, "pool": [16, 24], "boundari": 16, "fatal": 16, "conatin": 16, "column": 16, "feature_requires_grad": 16, "split_table_batched_embeddings_ops_common": 16, "split_table_batched_embeddings_ops_train": 16, "init_embedding_weights_uniform": 16, "split_embedding_weight": 16, "9426": 16, "7046": 16, "4214": 16, "0419": 16, "1331": 16, "7856": 16, "8124": 16, "2021": 16, "5771": 16, "5911": 16, "7792": 16, "1068": 16, "6203": 16, "4813": 16, "1677": 16, "4790": 16, "5587": 16, "0941": 16, "5754": 16, "3475": 16, "8952": 16, "1964": 16, "0810": 16, "4174": 16, "2513": 16, "4039": 16, "3775": 16, "3273": 16, "5399": 16, "0229": 16, "1455": 16, "8770": 16, "9520": 16, "4593": 16, "7169": 16, "6307": 16, "1765": 16, "8757": 16, "8614": 16, "2051": 16, "0603": 16, "9980": 16, "7958": 16, "5826": 16, "long": 16, "13": 16, "5197": 16, "2957": 16, "3578": 16, "1487": 16, "4873": 16, "3044": 16, "9801": 16, "2769": 16, "7164": 16, "8528": 16, "7159": 16, "6719": 16, "0784": 16, "2016": 16, "2176": 16, "1988": 16, "3825": 16, "5008": 16, "8991": 16, "1405": 16, "2637": 16, "9427": 16, "8902": 16, "3754": 16, "5013": 16, "6105": 16, "9968": 16, "3057": 16, "7621": 16, "9821": 16, "7314": 16, "6195": 16, "grad_fn": 16, "cppnode": 16, "splitlookupfunction_sgd_op": 16, "question": 17, "concern": 17, "discuss": 17, "kick": 17, "regard": 17, "feel": 17, "free": 17, "reach": 17, "easi": 18, "transpar": 18, "describ": 18, "activ": 18, "welcom": [18, 24], "your": [18, 21, 22], "repositori": 18, "branch": 18, "ve": 18, "chang": [18, 20, 22], "api": [18, 20, 21, 22], "suit": 18, "lint": 18, "haven": 18, "submit": [18, 20, 22], "facebook": [18, 19, 24], "open": 18, "track": 18, "public": [18, 21], "bug": 18, "descript": [18, 20, 21, 22, 23], "instruct": [18, 20, 21, 22, 24], "abl": 18, "bounti": 18, "safe": 18, "disclosur": 18, "secur": 18, "go": 18, "outlin": 18, "By": 18, "agre": 18, "tree": 18, "claus": 19, "bsd": 19, "softwar": 19, "copyright": 19, "inc": 19, "affili": 19, "right": [19, 23], "redistribut": 19, "modif": 19, "permit": 19, "condit": 19, "met": 19, "retain": 19, "notic": 19, "disclaim": 19, "materi": 19, "contributor": 19, "endors": 19, "promot": 19, "written": 19, "permiss": 19, "BY": 19, "THE": 19, "holder": 19, "AND": 19, "AS": 19, "express": [19, 23], "OR": 19, "warranti": 19, "NOT": 19, "limit": [19, 21], "TO": 19, "OF": 19, "merchant": 19, "FOR": 19, "particular": 19, "IN": 19, "NO": 19, "event": 19, "shall": 19, "BE": 19, "liabl": 19, "indirect": 19, "incident": 19, "special": 19, "exemplari": 19, "consequenti": 19, "damag": 19, "procur": 19, "substitut": 19, "servic": 19, "profit": 19, "busi": 19, "interrupt": 19, "theori": 19, "liabil": 19, "contract": 19, "strict": 19, "tort": 19, "neglig": 19, "aris": 19, "IF": 19, "advis": 19, "SUCH": 19, "javadoc": 20, "style": [20, 22], "comment": [20, 21, 23], "sphinx": [20, 21, 22], "breath": 20, "kept": 20, "cpp": [20, 22, 23], "cu": 20, "cuh": 20, "everyth": 20, "ifndef": 20, "doxygen_this_will_be_skip": 20, "endif": 20, "hidden": 20, "html": [20, 21, 22], "descriptionss": 20, "publish": [20, 22], "docstr": [20, 21, 22], "method": [20, 21, 22], "organ": 20, "yet": 20, "top": [20, 24], "defgroup": 20, "directli": [20, 22], "behavior": [20, 22], "tparam": 20, "param": [20, 22], "thrown": [20, 22], "ingroup": 20, "brief": 20, "short": 20, "example_method": [20, 22], "def": [20, 22], "foo": [20, 22], "lst": [20, 22], "And": [20, 22], "verbatim": [20, 22], "text": [20, 22, 23], "diagram": [20, 22], "unpars": 20, "second": [20, 22], "prev": [20, 22], "usabl": [20, 22], "space": [20, 21, 22], "endcod": 20, "align": [20, 22], "param1": [20, 22], "param2": 20, "bad_alloc": 20, "logic_error": 20, "href": 20, "www": [20, 22], "nl": 20, "cmdlink": 20, "On": [20, 22], "doxygengroup": 20, "rst": [20, 22, 23], "content": [20, 23, 24], "toctre": [20, 22], "ini": 20, "taken": 20, "care": 20, "doc": [20, 21, 22, 23], "local": [20, 22], "netlifi": [20, 21, 22], "preview": [20, 22], "serv": 21, "accompani": 21, "put": 21, "yourself": 21, "shoe": 21, "who": 21, "understand": 21, "live": 21, "easier": 21, "leav": 21, "separ": 21, "task": 21, "pointer": 21, "tool": 21, "graphviz": [21, 23], "assembl": 21, "view": 21, "prepend": 21, "sphinx_lint": 21, "technic": 21, "why": 21, "invok": 21, "occasion": 21, "unresolv": 21, "might": 21, "opt": 21, "pycapsul": 21, "class": [21, 22], "neg": 21, "silenc": 21, "nitpick": 21, "conf": 21, "domain": 21, "deploi": 21, "app": 21, "googl": 22, "c_size_t": 22, "about": 22, "ret": 22, "emplace_back": 22, "item": 22, "valueerror": 22, "14": 22, "restructuredtext": 22, "en": 22, "master": 22, "__": 22, "pep": 22, "0287": 22, "42": 22, "autofunct": 22, "c_ulong": 22, "mani": 22, "attach": 22, "fact": 22, "helper": 22, "codebas": 22, "add_doc": 22, "jag": [22, 24], "forc": 22, "hoc": 22, "the_new_doc_modul": 22, "remain": 22, "render": [22, 23], "anchor": 23, "_doc": 23, "underscor": 23, "_": 23, "There": 23, "elsewher": 23, "ref": 23, "anoth": 23, "literalinclud": 23, "rel": 23, "enclos": 23, "bracket": 23, "skiplin": 23, "suppli": 23, "math": 23, "inlin": 23, "k_": 23, "k_n": 23, "expressino": 23, "int_a": 23, "frac": 23, "2v": 23, "dx": 23, "left": 23, "dv": 23, "_a": 23, "du": 23, "digraph": 23, "altern": 23, "extern": 23, "dot": 23, "examplegraph": 23, "low": 24, "precis": 24, "high": 24, "convolut": 24, "server": 24, "infer": 24, "backend": 24, "caffe2": 24, "quantiz": 24, "collect": 24, "transform": 24, "contribut": 24, "contact": 24, "licens": 24, "combin": 24, "tbe": 24}, "objects": {"": [[8, 0, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref"], [8, 1, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref::ebits"], [8, 1, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref::exponent_bias"], [8, 1, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref::input"], [8, 1, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref::ncols"], [8, 1, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref::nrows"], [8, 1, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref::output"], [8, 0, 1, "_CPPv423FP8rowwise_to_float_cpuRK6TensorbK7int64_t", "FP8rowwise_to_float_cpu"], [8, 1, 1, "_CPPv423FP8rowwise_to_float_cpuRK6TensorbK7int64_t", "FP8rowwise_to_float_cpu::forward"], [8, 1, 1, "_CPPv423FP8rowwise_to_float_cpuRK6TensorbK7int64_t", "FP8rowwise_to_float_cpu::input"], [8, 1, 1, "_CPPv423FP8rowwise_to_float_cpuRK6TensorbK7int64_t", "FP8rowwise_to_float_cpu::output_dtype"], [0, 0, 1, "_CPPv410FindMinMaxPKfPfPf7int64_t", "FindMinMax"], [0, 1, 1, "_CPPv410FindMinMaxPKfPfPf7int64_t", "FindMinMax::len"], [0, 1, 1, "_CPPv410FindMinMaxPKfPfPf7int64_t", "FindMinMax::m"], [0, 1, 1, "_CPPv410FindMinMaxPKfPfPf7int64_t", "FindMinMax::max"], [0, 1, 1, "_CPPv410FindMinMaxPKfPfPf7int64_t", "FindMinMax::min"], [0, 0, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf"], [0, 2, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf::InputType"], [0, 1, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf::bit_rate"], [0, 1, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf::input"], [0, 1, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf::input_columns"], [0, 1, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf::input_rows"], [0, 1, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf::output"], [8, 0, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref"], [8, 1, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::ebits"], [8, 1, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::exponent_bias"], [8, 1, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::input"], [8, 1, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::max_pos"], [8, 1, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::ncols"], [8, 1, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::nrows"], [8, 1, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::output"], [0, 0, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize"], [0, 2, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::T"], [0, 1, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::dst"], [0, 1, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::len"], [0, 1, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::noise_ratio"], [0, 1, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::num_threads"], [0, 1, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::qparams"], [0, 1, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::src"], [0, 1, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::thread_id"], [0, 0, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise"], [0, 1, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::C"], [0, 1, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::G"], [0, 1, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::K"], [0, 2, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::LAYOUT"], [0, 2, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::T"], [0, 1, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::X"], [0, 1, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::dst"], [0, 1, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::scales"], [0, 1, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::src"], [0, 1, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::zero_points"], [0, 0, 1, "_CPPv46Xor128v", "Xor128"], [8, 0, 1, "_CPPv424_FP8rowwise_to_float_gpuRKN2at6TensorEbK7int64_t", "_FP8rowwise_to_float_gpu"], [8, 1, 1, "_CPPv424_FP8rowwise_to_float_gpuRKN2at6TensorEbK7int64_t", "_FP8rowwise_to_float_gpu::forward"], [8, 1, 1, "_CPPv424_FP8rowwise_to_float_gpuRKN2at6TensorEbK7int64_t", "_FP8rowwise_to_float_gpu::input"], [8, 1, 1, "_CPPv424_FP8rowwise_to_float_gpuRKN2at6TensorEbK7int64_t", "_FP8rowwise_to_float_gpu::output_dtype"], [8, 0, 1, "_CPPv422_bfloat16_to_float_gpuRKN2at6TensorE", "_bfloat16_to_float_gpu"], [8, 1, 1, "_CPPv422_bfloat16_to_float_gpuRKN2at6TensorE", "_bfloat16_to_float_gpu::input"], [8, 0, 1, "_CPPv424_float_to_FP8rowwise_gpuRK6TensorKb", "_float_to_FP8rowwise_gpu"], [8, 1, 1, "_CPPv424_float_to_FP8rowwise_gpuRK6TensorKb", "_float_to_FP8rowwise_gpu::forward"], [8, 1, 1, "_CPPv424_float_to_FP8rowwise_gpuRK6TensorKb", "_float_to_FP8rowwise_gpu::input"], [8, 0, 1, "_CPPv422_float_to_bfloat16_gpuRKN2at6TensorE", "_float_to_bfloat16_gpu"], [8, 1, 1, "_CPPv422_float_to_bfloat16_gpuRKN2at6TensorE", "_float_to_bfloat16_gpu::input"], [8, 0, 1, "_CPPv434_float_to_fused8bitrowwise_cpu_outR6TensorRK6Tensor", "_float_to_fused8bitrowwise_cpu_out"], [8, 1, 1, "_CPPv434_float_to_fused8bitrowwise_cpu_outR6TensorRK6Tensor", "_float_to_fused8bitrowwise_cpu_out::input"], [8, 1, 1, "_CPPv434_float_to_fused8bitrowwise_cpu_outR6TensorRK6Tensor", "_float_to_fused8bitrowwise_cpu_out::output"], [8, 0, 1, "_CPPv430_float_to_fused8bitrowwise_gpuRK6Tensor", "_float_to_fused8bitrowwise_gpu"], [8, 1, 1, "_CPPv430_float_to_fused8bitrowwise_gpuRK6Tensor", "_float_to_fused8bitrowwise_gpu::input"], [8, 0, 1, "_CPPv430_float_to_fusednbitrowwise_gpuRK6TensorK7int64_t", "_float_to_fusednbitrowwise_gpu"], [8, 1, 1, "_CPPv430_float_to_fusednbitrowwise_gpuRK6TensorK7int64_t", "_float_to_fusednbitrowwise_gpu::bit_rate"], [8, 1, 1, "_CPPv430_float_to_fusednbitrowwise_gpuRK6TensorK7int64_t", "_float_to_fusednbitrowwise_gpu::input"], [8, 0, 1, "_CPPv418_float_to_hfp8_gpuRKN2at6TensorEK7int64_tK7int64_tKd", "_float_to_hfp8_gpu"], [8, 1, 1, "_CPPv418_float_to_hfp8_gpuRKN2at6TensorEK7int64_tK7int64_tKd", "_float_to_hfp8_gpu::ebits"], [8, 1, 1, "_CPPv418_float_to_hfp8_gpuRKN2at6TensorEK7int64_tK7int64_tKd", "_float_to_hfp8_gpu::exponent_bias"], [8, 1, 1, "_CPPv418_float_to_hfp8_gpuRKN2at6TensorEK7int64_tK7int64_tKd", "_float_to_hfp8_gpu::input"], [8, 1, 1, "_CPPv418_float_to_hfp8_gpuRKN2at6TensorEK7int64_tK7int64_tKd", "_float_to_hfp8_gpu::max_pos"], [8, 0, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu"], [8, 1, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::bias"], [8, 1, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::bounding_box_size"], [8, 1, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::ebits"], [8, 1, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::input"], [8, 1, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::max_pos"], [8, 1, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::mbits"], [8, 1, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::min_pos"], [8, 0, 1, "_CPPv430_float_to_paddedFP8rowwise_gpuRK6TensorKbK7int64_t", "_float_to_paddedFP8rowwise_gpu"], [8, 1, 1, "_CPPv430_float_to_paddedFP8rowwise_gpuRK6TensorKbK7int64_t", "_float_to_paddedFP8rowwise_gpu::forward"], [8, 1, 1, "_CPPv430_float_to_paddedFP8rowwise_gpuRK6TensorKbK7int64_t", "_float_to_paddedFP8rowwise_gpu::input"], [8, 1, 1, "_CPPv430_float_to_paddedFP8rowwise_gpuRK6TensorKbK7int64_t", "_float_to_paddedFP8rowwise_gpu::row_dim"], [8, 0, 1, "_CPPv434_fused8bitrowwise_to_float_cpu_outR6TensorRK6Tensor", "_fused8bitrowwise_to_float_cpu_out"], [8, 1, 1, "_CPPv434_fused8bitrowwise_to_float_cpu_outR6TensorRK6Tensor", "_fused8bitrowwise_to_float_cpu_out::input"], [8, 1, 1, "_CPPv434_fused8bitrowwise_to_float_cpu_outR6TensorRK6Tensor", "_fused8bitrowwise_to_float_cpu_out::output"], [8, 0, 1, "_CPPv430_fused8bitrowwise_to_float_gpuRKN2at6TensorE", "_fused8bitrowwise_to_float_gpu"], [8, 1, 1, "_CPPv430_fused8bitrowwise_to_float_gpuRKN2at6TensorE", "_fused8bitrowwise_to_float_gpu::input"], [8, 0, 1, "_CPPv440_fused8bitrowwise_to_float_mixed_dim_gpuRKN2at6TensorERKN2at6TensorEK7int64_t", "_fused8bitrowwise_to_float_mixed_dim_gpu"], [8, 1, 1, "_CPPv440_fused8bitrowwise_to_float_mixed_dim_gpuRKN2at6TensorERKN2at6TensorEK7int64_t", "_fused8bitrowwise_to_float_mixed_dim_gpu::D_offsets"], [8, 1, 1, "_CPPv440_fused8bitrowwise_to_float_mixed_dim_gpuRKN2at6TensorERKN2at6TensorEK7int64_t", "_fused8bitrowwise_to_float_mixed_dim_gpu::input"], [8, 1, 1, "_CPPv440_fused8bitrowwise_to_float_mixed_dim_gpuRKN2at6TensorERKN2at6TensorEK7int64_t", "_fused8bitrowwise_to_float_mixed_dim_gpu::output_dtype"], [8, 0, 1, "_CPPv429_fused8bitrowwise_to_half_gpuRKN2at6TensorE", "_fused8bitrowwise_to_half_gpu"], [8, 1, 1, "_CPPv429_fused8bitrowwise_to_half_gpuRKN2at6TensorE", "_fused8bitrowwise_to_half_gpu::input"], [8, 0, 1, "_CPPv449_fused8bitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tKbKb", "_fused8bitrowwise_to_single_or_half_precision_gpu"], [8, 1, 1, "_CPPv449_fused8bitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tKbKb", "_fused8bitrowwise_to_single_or_half_precision_gpu::input"], [8, 1, 1, "_CPPv449_fused8bitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tKbKb", "_fused8bitrowwise_to_single_or_half_precision_gpu::output_dtype"], [8, 1, 1, "_CPPv449_fused8bitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tKbKb", "_fused8bitrowwise_to_single_or_half_precision_gpu::quant_padding_float_type"], [8, 1, 1, "_CPPv449_fused8bitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tKbKb", "_fused8bitrowwise_to_single_or_half_precision_gpu::scale_bias_last"], [8, 0, 1, "_CPPv430_fusednbitrowwise_to_float_gpuRKN2at6TensorEK7int64_t", "_fusednbitrowwise_to_float_gpu"], [8, 1, 1, "_CPPv430_fusednbitrowwise_to_float_gpuRKN2at6TensorEK7int64_t", "_fusednbitrowwise_to_float_gpu::bit_rate"], [8, 1, 1, "_CPPv430_fusednbitrowwise_to_float_gpuRKN2at6TensorEK7int64_t", "_fusednbitrowwise_to_float_gpu::input"], [8, 0, 1, "_CPPv429_fusednbitrowwise_to_half_gpuRKN2at6TensorEK7int64_t", "_fusednbitrowwise_to_half_gpu"], [8, 1, 1, "_CPPv429_fusednbitrowwise_to_half_gpuRKN2at6TensorEK7int64_t", "_fusednbitrowwise_to_half_gpu::bit_rate"], [8, 1, 1, "_CPPv429_fusednbitrowwise_to_half_gpuRKN2at6TensorEK7int64_t", "_fusednbitrowwise_to_half_gpu::input"], [8, 0, 1, "_CPPv449_fusednbitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tK7int64_t", "_fusednbitrowwise_to_single_or_half_precision_gpu"], [8, 1, 1, "_CPPv449_fusednbitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tK7int64_t", "_fusednbitrowwise_to_single_or_half_precision_gpu::bit_rate"], [8, 1, 1, "_CPPv449_fusednbitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tK7int64_t", "_fusednbitrowwise_to_single_or_half_precision_gpu::input"], [8, 1, 1, "_CPPv449_fusednbitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tK7int64_t", "_fusednbitrowwise_to_single_or_half_precision_gpu::output_dtype"], [8, 0, 1, "_CPPv429_half_to_fused8bitrowwise_gpuRK6Tensor", "_half_to_fused8bitrowwise_gpu"], [8, 1, 1, "_CPPv429_half_to_fused8bitrowwise_gpuRK6Tensor", "_half_to_fused8bitrowwise_gpu::input"], [8, 0, 1, "_CPPv429_half_to_fusednbitrowwise_gpuRKN2at6TensorEK7int64_t", "_half_to_fusednbitrowwise_gpu"], [8, 1, 1, "_CPPv429_half_to_fusednbitrowwise_gpuRKN2at6TensorEK7int64_t", "_half_to_fusednbitrowwise_gpu::bit_rate"], [8, 1, 1, "_CPPv429_half_to_fusednbitrowwise_gpuRKN2at6TensorEK7int64_t", "_half_to_fusednbitrowwise_gpu::input"], [8, 0, 1, "_CPPv418_hfp8_to_float_gpuRKN2at6TensorEK7int64_tK7int64_t", "_hfp8_to_float_gpu"], [8, 1, 1, "_CPPv418_hfp8_to_float_gpuRKN2at6TensorEK7int64_tK7int64_t", "_hfp8_to_float_gpu::ebits"], [8, 1, 1, "_CPPv418_hfp8_to_float_gpuRKN2at6TensorEK7int64_tK7int64_t", "_hfp8_to_float_gpu::exponent_bias"], [8, 1, 1, "_CPPv418_hfp8_to_float_gpuRKN2at6TensorEK7int64_tK7int64_t", "_hfp8_to_float_gpu::input"], [8, 0, 1, "_CPPv418_msfp_to_float_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_t", "_msfp_to_float_gpu"], [8, 1, 1, "_CPPv418_msfp_to_float_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_t", "_msfp_to_float_gpu::bias"], [8, 1, 1, "_CPPv418_msfp_to_float_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_t", "_msfp_to_float_gpu::ebits"], [8, 1, 1, "_CPPv418_msfp_to_float_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_t", "_msfp_to_float_gpu::input"], [8, 1, 1, "_CPPv418_msfp_to_float_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_t", "_msfp_to_float_gpu::mbits"], [8, 0, 1, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t", "_paddedFP8rowwise_to_float_gpu"], [8, 1, 1, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t", "_paddedFP8rowwise_to_float_gpu::forward"], [8, 1, 1, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t", "_paddedFP8rowwise_to_float_gpu::input"], [8, 1, 1, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t", "_paddedFP8rowwise_to_float_gpu::output_dtype"], [8, 1, 1, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t", "_paddedFP8rowwise_to_float_gpu::output_last_dim"], [8, 1, 1, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t", "_paddedFP8rowwise_to_float_gpu::row_dim"], [8, 0, 1, "_CPPv449_single_or_half_precision_to_fused8bitrowwise_gpuRK6Tensor", "_single_or_half_precision_to_fused8bitrowwise_gpu"], [8, 1, 1, "_CPPv449_single_or_half_precision_to_fused8bitrowwise_gpuRK6Tensor", "_single_or_half_precision_to_fused8bitrowwise_gpu::input"], [8, 0, 1, "_CPPv449_single_or_half_precision_to_fusednbitrowwise_gpuRK6TensorK7int64_t", "_single_or_half_precision_to_fusednbitrowwise_gpu"], [8, 1, 1, "_CPPv449_single_or_half_precision_to_fusednbitrowwise_gpuRK6TensorK7int64_t", "_single_or_half_precision_to_fusednbitrowwise_gpu::bit_rate"], [8, 1, 1, "_CPPv449_single_or_half_precision_to_fusednbitrowwise_gpuRK6TensorK7int64_t", "_single_or_half_precision_to_fusednbitrowwise_gpu::input"], [7, 0, 1, "_CPPv417all_to_one_deviceNSt6vectorIN2at6TensorEEEN2at6DeviceE", "all_to_one_device"], [7, 1, 1, "_CPPv417all_to_one_deviceNSt6vectorIN2at6TensorEEEN2at6DeviceE", "all_to_one_device::inputTensors"], [7, 1, 1, "_CPPv417all_to_one_deviceNSt6vectorIN2at6TensorEEEN2at6DeviceE", "all_to_one_device::target_device"], [4, 0, 1, "_CPPv431batched_dense_vec_jagged_2d_mulRK6TensorRK6TensorRK6Tensor", "batched_dense_vec_jagged_2d_mul"], [4, 1, 1, "_CPPv431batched_dense_vec_jagged_2d_mulRK6TensorRK6TensorRK6Tensor", "batched_dense_vec_jagged_2d_mul::a_offsets"], [4, 1, 1, "_CPPv431batched_dense_vec_jagged_2d_mulRK6TensorRK6TensorRK6Tensor", "batched_dense_vec_jagged_2d_mul::a_values"], [4, 1, 1, "_CPPv431batched_dense_vec_jagged_2d_mulRK6TensorRK6TensorRK6Tensor", "batched_dense_vec_jagged_2d_mul::v"], [2, 0, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKN3c108optionalI6TensorEERKN3c108optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda"], [2, 1, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKN3c108optionalI6TensorEERKN3c108optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::B_ofsets"], [2, 1, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKN3c108optionalI6TensorEERKN3c108optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::bounds_check_mode"], [2, 1, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKN3c108optionalI6TensorEERKN3c108optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::indices"], [2, 1, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKN3c108optionalI6TensorEERKN3c108optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::max_B"], [2, 1, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKN3c108optionalI6TensorEERKN3c108optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::offsets"], [2, 1, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKN3c108optionalI6TensorEERKN3c108optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::rows_per_table"], [2, 1, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKN3c108optionalI6TensorEERKN3c108optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::warning"], [2, 1, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKN3c108optionalI6TensorEERKN3c108optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::weights"], [4, 0, 1, "_CPPv415dense_to_jaggedRK6TensorRKNSt6vectorI6TensorEEN3c108optionalIN2at6SymIntEEE", "dense_to_jagged"], [4, 1, 1, "_CPPv415dense_to_jaggedRK6TensorRKNSt6vectorI6TensorEEN3c108optionalIN2at6SymIntEEE", "dense_to_jagged::dense"], [4, 1, 1, "_CPPv415dense_to_jaggedRK6TensorRKNSt6vectorI6TensorEEN3c108optionalIN2at6SymIntEEE", "dense_to_jagged::offsets"], [4, 1, 1, "_CPPv415dense_to_jaggedRK6TensorRKNSt6vectorI6TensorEEN3c108optionalIN2at6SymIntEEE", "dense_to_jagged::total_L"], [10, 0, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda"], [10, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::D_offsets"], [10, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::cache_index_table_map"], [10, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::gather_cache_stats"], [10, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::hash_size_cumsum"], [10, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::linear_cache_indices"], [10, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::lru_state"], [10, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::lxu_cache_miss_timestamp"], [10, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::lxu_cache_state"], [10, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::lxu_cache_weights"], [10, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::row_alignment"], [10, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::time_stamp"], [10, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::total_cache_hash_size"], [10, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::uvm_cache_stats"], [10, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::weights"], [10, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::weights_offsets"], [10, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::weights_tys"], [10, 0, 1, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lxu_cache_lookup_cuda"], [10, 1, 1, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lxu_cache_lookup_cuda::gather_cache_stats"], [10, 1, 1, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lxu_cache_lookup_cuda::invalid_index"], [10, 1, 1, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lxu_cache_lookup_cuda::linear_cache_indices"], [10, 1, 1, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lxu_cache_lookup_cuda::lxu_cache_state"], [10, 1, 1, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lxu_cache_lookup_cuda::uvm_cache_stats"], [20, 0, 1, "_CPPv4I0_NSt6size_tEE14example_method7int32_t1Tf", "example_method"], [20, 2, 1, "_CPPv4I0_NSt6size_tEE14example_method7int32_t1Tf", "example_method::Alignment"], [20, 2, 1, "_CPPv4I0_NSt6size_tEE14example_method7int32_t1Tf", "example_method::T"], [20, 1, 1, "_CPPv4I0_NSt6size_tEE14example_method7int32_t1Tf", "example_method::param1"], [20, 1, 1, "_CPPv4I0_NSt6size_tEE14example_method7int32_t1Tf", "example_method::param2"], [9, 0, 1, "_CPPv431expand_into_jagged_permute_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_t", "expand_into_jagged_permute_cuda"], [9, 1, 1, "_CPPv431expand_into_jagged_permute_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_t", "expand_into_jagged_permute_cuda::input_offsets"], [9, 1, 1, "_CPPv431expand_into_jagged_permute_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_t", "expand_into_jagged_permute_cuda::output_offsets"], [9, 1, 1, "_CPPv431expand_into_jagged_permute_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_t", "expand_into_jagged_permute_cuda::output_size"], [9, 1, 1, "_CPPv431expand_into_jagged_permute_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_t", "expand_into_jagged_permute_cuda::permute"], [8, 0, 1, "_CPPv437float_or_half_to_fused8bitrowwise_cpuRK6Tensor", "float_or_half_to_fused8bitrowwise_cpu"], [8, 1, 1, "_CPPv437float_or_half_to_fused8bitrowwise_cpuRK6Tensor", "float_or_half_to_fused8bitrowwise_cpu::input"], [8, 0, 1, "_CPPv423float_to_FP8rowwise_cpuRK6Tensorb", "float_to_FP8rowwise_cpu"], [8, 1, 1, "_CPPv423float_to_FP8rowwise_cpuRK6Tensorb", "float_to_FP8rowwise_cpu::forward"], [8, 1, 1, "_CPPv423float_to_FP8rowwise_cpuRK6Tensorb", "float_to_FP8rowwise_cpu::input"], [8, 0, 1, "_CPPv429float_to_fused8bitrowwise_cpuRK6Tensor", "float_to_fused8bitrowwise_cpu"], [8, 1, 1, "_CPPv429float_to_fused8bitrowwise_cpuRK6Tensor", "float_to_fused8bitrowwise_cpu::input"], [8, 0, 1, "_CPPv429fused8bitrowwise_to_float_cpuRK6Tensor", "fused8bitrowwise_to_float_cpu"], [8, 1, 1, "_CPPv429fused8bitrowwise_to_float_cpuRK6Tensor", "fused8bitrowwise_to_float_cpu::input"], [8, 0, 1, "_CPPv437fused8bitrowwise_to_float_or_half_cpuRK6TensorK7int64_tKbKb", "fused8bitrowwise_to_float_or_half_cpu"], [8, 1, 1, "_CPPv437fused8bitrowwise_to_float_or_half_cpuRK6TensorK7int64_tKbKb", "fused8bitrowwise_to_float_or_half_cpu::input"], [8, 1, 1, "_CPPv437fused8bitrowwise_to_float_or_half_cpuRK6TensorK7int64_tKbKb", "fused8bitrowwise_to_float_or_half_cpu::output_dtype"], [8, 1, 1, "_CPPv437fused8bitrowwise_to_float_or_half_cpuRK6TensorK7int64_tKbKb", "fused8bitrowwise_to_float_or_half_cpu::quant_padding_float_type"], [8, 1, 1, "_CPPv437fused8bitrowwise_to_float_or_half_cpuRK6TensorK7int64_tKbKb", "fused8bitrowwise_to_float_or_half_cpu::scale_bias_last"], [8, 0, 1, "_CPPv428fused8bitrowwise_to_half_cpuRK6Tensor", "fused8bitrowwise_to_half_cpu"], [8, 1, 1, "_CPPv428fused8bitrowwise_to_half_cpuRK6Tensor", "fused8bitrowwise_to_half_cpu::input"], [8, 0, 1, "_CPPv429fusednbitrowwise_to_float_cpuRK6TensorK7int64_t", "fusednbitrowwise_to_float_cpu"], [8, 1, 1, "_CPPv429fusednbitrowwise_to_float_cpuRK6TensorK7int64_t", "fusednbitrowwise_to_float_cpu::bit_rate"], [8, 1, 1, "_CPPv429fusednbitrowwise_to_float_cpuRK6TensorK7int64_t", "fusednbitrowwise_to_float_cpu::input"], [8, 0, 1, "_CPPv437fusednbitrowwise_to_float_or_half_cpuRK6TensorK7int64_tK7int64_t", "fusednbitrowwise_to_float_or_half_cpu"], [8, 1, 1, "_CPPv437fusednbitrowwise_to_float_or_half_cpuRK6TensorK7int64_tK7int64_t", "fusednbitrowwise_to_float_or_half_cpu::bit_rate"], [8, 1, 1, "_CPPv437fusednbitrowwise_to_float_or_half_cpuRK6TensorK7int64_tK7int64_t", "fusednbitrowwise_to_float_or_half_cpu::input"], [8, 1, 1, "_CPPv437fusednbitrowwise_to_float_or_half_cpuRK6TensorK7int64_tK7int64_t", "fusednbitrowwise_to_float_or_half_cpu::output_dtype"], [8, 0, 1, "_CPPv428fusednbitrowwise_to_half_cpuRK6TensorK7int64_t", "fusednbitrowwise_to_half_cpu"], [8, 1, 1, "_CPPv428fusednbitrowwise_to_half_cpuRK6TensorK7int64_t", "fusednbitrowwise_to_half_cpu::bit_rate"], [8, 1, 1, "_CPPv428fusednbitrowwise_to_half_cpuRK6TensorK7int64_t", "fusednbitrowwise_to_half_cpu::input"], [9, 0, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu"], [9, 1, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::bin_boundaries"], [9, 1, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::bin_ctr_in_use_after"], [9, 1, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::bin_ctr_weight_value"], [9, 1, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::bin_num_examples"], [9, 1, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::bin_num_positives"], [9, 1, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::logit"], [9, 1, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::num_segments"], [9, 1, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::positive_weight"], [9, 1, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::segment_lengths"], [9, 1, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::segment_value"], [10, 0, 1, "_CPPv423get_unique_indices_cudaN2at6TensorE7int64_tb", "get_unique_indices_cuda"], [10, 1, 1, "_CPPv423get_unique_indices_cudaN2at6TensorE7int64_tb", "get_unique_indices_cuda::compute_count"], [10, 1, 1, "_CPPv423get_unique_indices_cudaN2at6TensorE7int64_tb", "get_unique_indices_cuda::linear_indices"], [10, 1, 1, "_CPPv423get_unique_indices_cudaN2at6TensorE7int64_tb", "get_unique_indices_cuda::max_indices"], [8, 0, 1, "_CPPv428half_to_fused8bitrowwise_cpuRK6Tensor", "half_to_fused8bitrowwise_cpu"], [8, 1, 1, "_CPPv428half_to_fused8bitrowwise_cpuRK6Tensor", "half_to_fused8bitrowwise_cpu::input"], [9, 0, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu"], [9, 1, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::bin_ctr_in_use_after"], [9, 1, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::bin_ctr_weight_value"], [9, 1, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::bin_num_examples"], [9, 1, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::bin_num_positives"], [9, 1, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::logit"], [9, 1, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::lower_bound"], [9, 1, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::positive_weight"], [9, 1, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::upper_bound"], [10, 0, 1, "_CPPv419host_lxu_cache_slot7int64_t7int64_t", "host_lxu_cache_slot"], [10, 1, 1, "_CPPv419host_lxu_cache_slot7int64_t7int64_t", "host_lxu_cache_slot::C"], [10, 1, 1, "_CPPv419host_lxu_cache_slot7int64_t7int64_t", "host_lxu_cache_slot::h_in"], [2, 0, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::D_offsets"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::dev_weights"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::fp8_exponent_bias"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::fp8_exponent_bits"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::indice_weights"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::indices"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::lxu_cache_locations"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::lxu_cache_weights"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::max_float16_D"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::max_float32_D"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::max_float8_D"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::max_int2_D"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::max_int4_D"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::max_int8_D"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::offsets"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::output_dtype"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::pooling_mode"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::row_alignment"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::total_D"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::uvm_weights"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::weights_offsets"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::weights_placements"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::weights_tys"], [2, 0, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::D_offsets"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::dev_weights"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::fp8_exponent_bias"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::fp8_exponent_bits"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::indice_weights"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::indices"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::lxu_cache_locations"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::lxu_cache_weights"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::max_float16_D"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::max_float32_D"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::max_float8_D"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::max_int2_D"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::max_int4_D"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::max_int8_D"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::offsets"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::output_dtype"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::pooling_mode"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::row_alignment"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::total_D"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::uvm_weights"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::weights_offsets"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::weights_placements"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::weights_tys"], [2, 0, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::D_offsets"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::cache_hash_size_cumsum"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::cache_index_table_map"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::dev_weights"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::fp8_exponent_bias"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::fp8_exponent_bits"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::indice_weights"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::indices"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::lxu_cache_locations"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::lxu_cache_state"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::lxu_cache_weights"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::lxu_state"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::max_float16_D"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::max_float32_D"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::max_float8_D"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::max_int2_D"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::max_int4_D"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::max_int8_D"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::offsets"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::output_dtype"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::pooling_mode"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::row_alignment"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::total_D"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::total_cache_hash_size"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::uvm_weights"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::weights_offsets"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::weights_placements"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::weights_tys"], [2, 0, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::D_offsets"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::cache_hash_size_cumsum"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::cache_index_table_map"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::dev_weights"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::fp8_exponent_bias"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::fp8_exponent_bits"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::indice_weights"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::indices"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::lxu_cache_locations"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::lxu_cache_state"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::lxu_cache_weights"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::lxu_state"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::max_float16_D"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::max_float32_D"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::max_float8_D"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::max_int2_D"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::max_int4_D"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::max_int8_D"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::offsets"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::output_dtype"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::pooling_mode"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::row_alignment"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::total_D"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::total_cache_hash_size"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::uvm_weights"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::weights_offsets"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::weights_placements"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::weights_tys"], [6, 0, 1, "_CPPv413is_uvm_tensorRK6Tensor", "is_uvm_tensor"], [6, 1, 1, "_CPPv413is_uvm_tensorRK6Tensor", "is_uvm_tensor::self"], [4, 0, 1, "_CPPv418jagged_1d_to_dense6Tensor6TensorN3c106SymIntE7int64_t", "jagged_1d_to_dense"], [4, 1, 1, "_CPPv418jagged_1d_to_dense6Tensor6TensorN3c106SymIntE7int64_t", "jagged_1d_to_dense::max_L"], [4, 1, 1, "_CPPv418jagged_1d_to_dense6Tensor6TensorN3c106SymIntE7int64_t", "jagged_1d_to_dense::offsets"], [4, 1, 1, "_CPPv418jagged_1d_to_dense6Tensor6TensorN3c106SymIntE7int64_t", "jagged_1d_to_dense::padding_value"], [4, 1, 1, "_CPPv418jagged_1d_to_dense6Tensor6TensorN3c106SymIntE7int64_t", "jagged_1d_to_dense::values"], [4, 0, 1, "_CPPv418jagged_2d_to_dense6Tensor6TensorN3c106SymIntE", "jagged_2d_to_dense"], [4, 1, 1, "_CPPv418jagged_2d_to_dense6Tensor6TensorN3c106SymIntE", "jagged_2d_to_dense::max_sequence_length"], [4, 1, 1, "_CPPv418jagged_2d_to_dense6Tensor6TensorN3c106SymIntE", "jagged_2d_to_dense::offsets"], [4, 1, 1, "_CPPv418jagged_2d_to_dense6Tensor6TensorN3c106SymIntE", "jagged_2d_to_dense::values"], [4, 0, 1, "_CPPv428jagged_dense_elementwise_addRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add"], [4, 1, 1, "_CPPv428jagged_dense_elementwise_addRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add::x_offsets"], [4, 1, 1, "_CPPv428jagged_dense_elementwise_addRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add::x_values"], [4, 1, 1, "_CPPv428jagged_dense_elementwise_addRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add::y"], [4, 0, 1, "_CPPv442jagged_dense_elementwise_add_jagged_outputRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output"], [4, 1, 1, "_CPPv442jagged_dense_elementwise_add_jagged_outputRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output::x_offsets"], [4, 1, 1, "_CPPv442jagged_dense_elementwise_add_jagged_outputRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output::x_values"], [4, 1, 1, "_CPPv442jagged_dense_elementwise_add_jagged_outputRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output::y"], [4, 0, 1, "_CPPv447jagged_dense_elementwise_add_jagged_output_cudaRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output_cuda"], [4, 1, 1, "_CPPv447jagged_dense_elementwise_add_jagged_output_cudaRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output_cuda::x_offsets"], [4, 1, 1, "_CPPv447jagged_dense_elementwise_add_jagged_output_cudaRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output_cuda::x_values"], [4, 1, 1, "_CPPv447jagged_dense_elementwise_add_jagged_output_cudaRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output_cuda::y"], [4, 0, 1, "_CPPv428jagged_dense_elementwise_mulRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_mul"], [4, 1, 1, "_CPPv428jagged_dense_elementwise_mulRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_mul::x_offsets"], [4, 1, 1, "_CPPv428jagged_dense_elementwise_mulRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_mul::x_values"], [4, 1, 1, "_CPPv428jagged_dense_elementwise_mulRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_mul::y"], [4, 0, 1, "_CPPv422jagged_to_padded_denseRK6TensorRKNSt6vectorI6TensorEEKN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense"], [4, 1, 1, "_CPPv422jagged_to_padded_denseRK6TensorRKNSt6vectorI6TensorEEKN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense::max_lengths"], [4, 1, 1, "_CPPv422jagged_to_padded_denseRK6TensorRKNSt6vectorI6TensorEEKN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense::offsets"], [4, 1, 1, "_CPPv422jagged_to_padded_denseRK6TensorRKNSt6vectorI6TensorEEKN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense::padding_value"], [4, 1, 1, "_CPPv422jagged_to_padded_denseRK6TensorRKNSt6vectorI6TensorEEKN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense::values"], [4, 0, 1, "_CPPv430jagged_to_padded_dense_forwardRK6TensorRKNSt6vectorI6TensorEEN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense_forward"], [4, 1, 1, "_CPPv430jagged_to_padded_dense_forwardRK6TensorRKNSt6vectorI6TensorEEN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense_forward::max_lengths"], [4, 1, 1, "_CPPv430jagged_to_padded_dense_forwardRK6TensorRKNSt6vectorI6TensorEEN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense_forward::offsets"], [4, 1, 1, "_CPPv430jagged_to_padded_dense_forwardRK6TensorRKNSt6vectorI6TensorEEN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense_forward::padding_value"], [4, 1, 1, "_CPPv430jagged_to_padded_dense_forwardRK6TensorRKNSt6vectorI6TensorEEN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense_forward::values"], [10, 0, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda"], [10, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::D_offsets"], [10, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::cache_hash_size_cumsum"], [10, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::cache_index_table_map"], [10, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::lfu_state"], [10, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::linear_cache_indices"], [10, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::lxu_cache_state"], [10, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::lxu_cache_weights"], [10, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::row_alignment"], [10, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::total_cache_hash_size"], [10, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::weights"], [10, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::weights_offsets"], [10, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::weights_tys"], [10, 0, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda"], [10, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::D_offsets"], [10, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::cache_hash_size_cumsum"], [10, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::cache_index_table_map"], [10, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::lfu_state"], [10, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::linear_cache_indices"], [10, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::lxu_cache_state"], [10, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::lxu_cache_weights"], [10, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::stochastic_rounding"], [10, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::total_cache_hash_size"], [10, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::weights"], [10, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::weights_offsets"], [10, 0, 1, "_CPPv428linearize_cache_indices_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN3c108optionalIN2at6TensorEEEK7int64_t", "linearize_cache_indices_cuda"], [10, 1, 1, "_CPPv428linearize_cache_indices_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN3c108optionalIN2at6TensorEEEK7int64_t", "linearize_cache_indices_cuda::B_offsets"], [10, 1, 1, "_CPPv428linearize_cache_indices_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN3c108optionalIN2at6TensorEEEK7int64_t", "linearize_cache_indices_cuda::cache_hash_size_cumsum"], [10, 1, 1, "_CPPv428linearize_cache_indices_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN3c108optionalIN2at6TensorEEEK7int64_t", "linearize_cache_indices_cuda::indices"], [10, 1, 1, "_CPPv428linearize_cache_indices_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN3c108optionalIN2at6TensorEEEK7int64_t", "linearize_cache_indices_cuda::max_B"], [10, 1, 1, "_CPPv428linearize_cache_indices_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN3c108optionalIN2at6TensorEEEK7int64_t", "linearize_cache_indices_cuda::offsets"], [10, 0, 1, "_CPPv441linearize_cache_indices_from_row_idx_cudaN2at6TensorEN2at6TensorEN2at6TensorE", "linearize_cache_indices_from_row_idx_cuda"], [10, 1, 1, "_CPPv441linearize_cache_indices_from_row_idx_cudaN2at6TensorEN2at6TensorEN2at6TensorE", "linearize_cache_indices_from_row_idx_cuda::cache_hash_size_cumsum"], [10, 1, 1, "_CPPv441linearize_cache_indices_from_row_idx_cudaN2at6TensorEN2at6TensorEN2at6TensorE", "linearize_cache_indices_from_row_idx_cuda::update_row_indices"], [10, 1, 1, "_CPPv441linearize_cache_indices_from_row_idx_cudaN2at6TensorEN2at6TensorEN2at6TensorE", "linearize_cache_indices_from_row_idx_cuda::update_table_indices"], [10, 0, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE", "lru_cache_find_uncached_cuda"], [10, 1, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE", "lru_cache_find_uncached_cuda::gather_cache_stats"], [10, 1, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE", "lru_cache_find_uncached_cuda::lock_cache_line"], [10, 1, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE", "lru_cache_find_uncached_cuda::lru_state"], [10, 1, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE", "lru_cache_find_uncached_cuda::lxu_cache_locking_counter"], [10, 1, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE", "lru_cache_find_uncached_cuda::lxu_cache_state"], [10, 1, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE", "lru_cache_find_uncached_cuda::max_indices"], [10, 1, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE", "lru_cache_find_uncached_cuda::time_stamp"], [10, 1, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE", "lru_cache_find_uncached_cuda::unique_indices"], [10, 1, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE", "lru_cache_find_uncached_cuda::unique_indices_length"], [10, 1, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE", "lru_cache_find_uncached_cuda::uvm_cache_stats"], [10, 0, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda"], [10, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::D_offsets"], [10, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::cache_index_table_map"], [10, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::gather_cache_stats"], [10, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::hash_size_cumsum"], [10, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::linear_cache_indices"], [10, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::lru_state"], [10, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::lxu_cache_state"], [10, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::lxu_cache_weights"], [10, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::row_alignment"], [10, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::time_stamp"], [10, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::total_cache_hash_size"], [10, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::uvm_cache_stats"], [10, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::weights"], [10, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::weights_offsets"], [10, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::weights_tys"], [10, 0, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda"], [10, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::D_offsets"], [10, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::cache_index_table_map"], [10, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::gather_cache_stats"], [10, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::hash_size_cumsum"], [10, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::linear_cache_indices"], [10, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::lock_cache_line"], [10, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::lru_state"], [10, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::lxu_cache_locking_counter"], [10, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::lxu_cache_state"], [10, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::lxu_cache_weights"], [10, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::stochastic_rounding"], [10, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::time_stamp"], [10, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::total_cache_hash_size"], [10, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::uvm_cache_stats"], [10, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::weights"], [10, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::weights_offsets"], [10, 0, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda"], [10, 1, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::D_offsets"], [10, 1, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::cache_hash_size_cumsum"], [10, 1, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::cache_index_table_map"], [10, 1, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::lxu_cache_state"], [10, 1, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::lxu_cache_weights"], [10, 1, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::stochastic_rounding"], [10, 1, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::total_D"], [10, 1, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::uvm_weights"], [10, 1, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::weights_offsets"], [10, 0, 1, "_CPPv431lxu_cache_locations_update_cudaN2at6TensorEN2at6TensorEN3c108optionalIN2at6TensorEEE", "lxu_cache_locations_update_cuda"], [10, 1, 1, "_CPPv431lxu_cache_locations_update_cudaN2at6TensorEN2at6TensorEN3c108optionalIN2at6TensorEEE", "lxu_cache_locations_update_cuda::lxu_cache_locations"], [10, 1, 1, "_CPPv431lxu_cache_locations_update_cudaN2at6TensorEN2at6TensorEN3c108optionalIN2at6TensorEEE", "lxu_cache_locations_update_cuda::lxu_cache_locations_new"], [10, 1, 1, "_CPPv431lxu_cache_locations_update_cudaN2at6TensorEN2at6TensorEN3c108optionalIN2at6TensorEEE", "lxu_cache_locations_update_cuda::num_uniq_cache_indices"], [10, 0, 1, "_CPPv440lxu_cache_locking_counter_decrement_cudaN2at6TensorEN2at6TensorE", "lxu_cache_locking_counter_decrement_cuda"], [10, 1, 1, "_CPPv440lxu_cache_locking_counter_decrement_cudaN2at6TensorEN2at6TensorE", "lxu_cache_locking_counter_decrement_cuda::lxu_cache_locations"], [10, 1, 1, "_CPPv440lxu_cache_locking_counter_decrement_cudaN2at6TensorEN2at6TensorE", "lxu_cache_locking_counter_decrement_cuda::lxu_cache_locking_counter"], [10, 0, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda"], [10, 1, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::gather_cache_stats"], [10, 1, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::invalid_index"], [10, 1, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::linear_cache_indices"], [10, 1, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::lxu_cache_locations_output"], [10, 1, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::lxu_cache_state"], [10, 1, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::num_uniq_cache_indices"], [10, 1, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::uvm_cache_stats"], [6, 0, 1, "_CPPv422new_host_mapped_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_host_mapped_tensor"], [6, 1, 1, "_CPPv422new_host_mapped_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_host_mapped_tensor::self"], [6, 1, 1, "_CPPv422new_host_mapped_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_host_mapped_tensor::sizes"], [6, 0, 1, "_CPPv418new_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_managed_tensor"], [6, 1, 1, "_CPPv418new_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_managed_tensor::self"], [6, 1, 1, "_CPPv418new_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_managed_tensor::sizes"], [6, 0, 1, "_CPPv423new_managed_tensor_metaRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_managed_tensor_meta"], [6, 1, 1, "_CPPv423new_managed_tensor_metaRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_managed_tensor_meta::self"], [6, 1, 1, "_CPPv423new_managed_tensor_metaRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_managed_tensor_meta::sizes"], [6, 0, 1, "_CPPv418new_unified_tensorRK6TensorRKNSt6vectorINSt7int64_tEEEb", "new_unified_tensor"], [6, 1, 1, "_CPPv418new_unified_tensorRK6TensorRKNSt6vectorINSt7int64_tEEEb", "new_unified_tensor::is_host_mapped"], [6, 1, 1, "_CPPv418new_unified_tensorRK6TensorRKNSt6vectorINSt7int64_tEEEb", "new_unified_tensor::self"], [6, 1, 1, "_CPPv418new_unified_tensorRK6TensorRKNSt6vectorINSt7int64_tEEEb", "new_unified_tensor::sizes"], [6, 0, 1, "_CPPv426new_vanilla_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_vanilla_managed_tensor"], [6, 1, 1, "_CPPv426new_vanilla_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_vanilla_managed_tensor::self"], [6, 1, 1, "_CPPv426new_vanilla_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_vanilla_managed_tensor::sizes"], [3, 0, 1, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t", "padding_fused_tbe_input_combine_cpu"], [3, 1, 1, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t", "padding_fused_tbe_input_combine_cpu::batch_size"], [3, 1, 1, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t", "padding_fused_tbe_input_combine_cpu::include_last_offsets"], [3, 1, 1, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t", "padding_fused_tbe_input_combine_cpu::indices_list"], [3, 1, 1, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t", "padding_fused_tbe_input_combine_cpu::offsets_list"], [3, 1, 1, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t", "padding_fused_tbe_input_combine_cpu::per_sample_weights"], [7, 0, 1, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad"], [7, 1, 1, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad::inv_offset_dim_list"], [7, 1, 1, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad::inv_permute_list"], [7, 1, 1, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad::offset_dim_list"], [7, 1, 1, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad::permute_list"], [7, 1, 1, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad::pooled_embs"], [7, 0, 1, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_cpu"], [7, 1, 1, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_cpu::inv_offset_dim_list"], [7, 1, 1, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_cpu::inv_permute_list"], [7, 1, 1, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_cpu::offset_dim_list"], [7, 1, 1, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_cpu::permute_list"], [7, 1, 1, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_cpu::pooled_embs"], [7, 0, 1, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_gpu"], [7, 1, 1, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_gpu::inv_offset_dim_list"], [7, 1, 1, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_gpu::inv_permute_list"], [7, 1, 1, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_gpu::offset_dim_list"], [7, 1, 1, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_gpu::permute_list"], [7, 1, 1, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_gpu::pooled_embs"], [7, 0, 1, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_cpu"], [7, 1, 1, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_cpu::inv_offset_dim_list"], [7, 1, 1, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_cpu::inv_permute_list"], [7, 1, 1, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_cpu::offset_dim_list"], [7, 1, 1, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_cpu::permute_list"], [7, 1, 1, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_cpu::pooled_embs"], [7, 0, 1, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_gpu"], [7, 1, 1, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_gpu::inv_offset_dim_list"], [7, 1, 1, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_gpu::inv_permute_list"], [7, 1, 1, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_gpu::offset_dim_list"], [7, 1, 1, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_gpu::permute_list"], [7, 1, 1, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_gpu::pooled_embs"], [7, 0, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl"], [7, 1, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl::allow_duplicates"], [7, 1, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl::inv_offset_dim_list"], [7, 1, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl::inv_permute_list"], [7, 1, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl::offset_dim_list"], [7, 1, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl::permute_list"], [7, 1, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl::pooled_embs"], [7, 0, 1, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_cpu"], [7, 1, 1, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_cpu::inv_offset_dim_list"], [7, 1, 1, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_cpu::inv_permute_list"], [7, 1, 1, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_cpu::offset_dim_list"], [7, 1, 1, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_cpu::permute_list"], [7, 1, 1, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_cpu::pooled_embs"], [7, 0, 1, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_gpu"], [7, 1, 1, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_gpu::inv_offset_dim_list"], [7, 1, 1, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_gpu::inv_permute_list"], [7, 1, 1, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_gpu::offset_dim_list"], [7, 1, 1, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_gpu::permute_list"], [7, 1, 1, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_gpu::pooled_embs"], [2, 0, 1, "_CPPv423pruned_array_lookup_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cpu"], [2, 1, 1, "_CPPv423pruned_array_lookup_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cpu::index_remappings"], [2, 1, 1, "_CPPv423pruned_array_lookup_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cpu::index_remappings_offsets"], [2, 1, 1, "_CPPv423pruned_array_lookup_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cpu::indices"], [2, 1, 1, "_CPPv423pruned_array_lookup_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cpu::offsets"], [2, 0, 1, "_CPPv424pruned_array_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cuda"], [2, 1, 1, "_CPPv424pruned_array_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cuda::index_remappings"], [2, 1, 1, "_CPPv424pruned_array_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cuda::index_remappings_offsets"], [2, 1, 1, "_CPPv424pruned_array_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cuda::indices"], [2, 1, 1, "_CPPv424pruned_array_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cuda::offsets"], [2, 0, 1, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_insert_unweighted_cpu"], [2, 1, 1, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_insert_unweighted_cpu::dense_indices"], [2, 1, 1, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_insert_unweighted_cpu::hash_table"], [2, 1, 1, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_insert_unweighted_cpu::hash_table_offsets"], [2, 1, 1, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_insert_unweighted_cpu::indices"], [2, 1, 1, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_insert_unweighted_cpu::offsets"], [2, 0, 1, "_CPPv426pruned_hashmap_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_cuda"], [2, 1, 1, "_CPPv426pruned_hashmap_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_cuda::hash_table"], [2, 1, 1, "_CPPv426pruned_hashmap_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_cuda::hash_table_offsets"], [2, 1, 1, "_CPPv426pruned_hashmap_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_cuda::indices"], [2, 1, 1, "_CPPv426pruned_hashmap_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_cuda::offsets"], [2, 0, 1, "_CPPv436pruned_hashmap_lookup_unweighted_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_unweighted_cpu"], [2, 1, 1, "_CPPv436pruned_hashmap_lookup_unweighted_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_unweighted_cpu::hash_table"], [2, 1, 1, "_CPPv436pruned_hashmap_lookup_unweighted_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_unweighted_cpu::hash_table_offsets"], [2, 1, 1, "_CPPv436pruned_hashmap_lookup_unweighted_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_unweighted_cpu::indices"], [2, 1, 1, "_CPPv436pruned_hashmap_lookup_unweighted_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_unweighted_cpu::offsets"], [5, 0, 1, "_CPPv432recat_embedding_grad_output_cuda6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_cuda"], [5, 1, 1, "_CPPv432recat_embedding_grad_output_cuda6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_cuda::grad_output"], [5, 1, 1, "_CPPv432recat_embedding_grad_output_cuda6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_cuda::num_features_per_rank"], [5, 0, 1, "_CPPv446recat_embedding_grad_output_mixed_D_batch_cudaRK6TensorRK6TensorRK6Tensor", "recat_embedding_grad_output_mixed_D_batch_cuda"], [5, 1, 1, "_CPPv446recat_embedding_grad_output_mixed_D_batch_cudaRK6TensorRK6TensorRK6Tensor", "recat_embedding_grad_output_mixed_D_batch_cuda::cumsum_dim_sum_per_rank"], [5, 1, 1, "_CPPv446recat_embedding_grad_output_mixed_D_batch_cudaRK6TensorRK6TensorRK6Tensor", "recat_embedding_grad_output_mixed_D_batch_cuda::dim_sum_per_rank"], [5, 1, 1, "_CPPv446recat_embedding_grad_output_mixed_D_batch_cudaRK6TensorRK6TensorRK6Tensor", "recat_embedding_grad_output_mixed_D_batch_cuda::grad_output"], [5, 0, 1, "_CPPv439recat_embedding_grad_output_mixed_D_cpuRK6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_mixed_D_cpu"], [5, 1, 1, "_CPPv439recat_embedding_grad_output_mixed_D_cpuRK6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_mixed_D_cpu::dim_sum_per_rank"], [5, 1, 1, "_CPPv439recat_embedding_grad_output_mixed_D_cpuRK6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_mixed_D_cpu::grad_output"], [5, 0, 1, "_CPPv440recat_embedding_grad_output_mixed_D_cudaRK6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_mixed_D_cuda"], [5, 1, 1, "_CPPv440recat_embedding_grad_output_mixed_D_cudaRK6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_mixed_D_cuda::dim_sum_per_rank"], [5, 1, 1, "_CPPv440recat_embedding_grad_output_mixed_D_cudaRK6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_mixed_D_cuda::grad_output"], [0, 0, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::A_SYMMETRIC"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::BIAS_TYPE"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::B_SYMMETRIC"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::DIRECT"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::FUSE_RELU"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::HAS_BIAS"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::Q_GRAN"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::block"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::inp"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::ld_in"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::ld_out"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::out"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::r"], [0, 0, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::A_SYMMETRIC"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::BIAS_TYPE"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::B_SYMMETRIC"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::C_PER_G"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::FUSE_RELU"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::HAS_BIAS"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::Q_GRAN"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::block"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::inp"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::ld_in"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::ld_out"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::out"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::r"], [10, 0, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda"], [10, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::D_offsets"], [10, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::buffer_ids"], [10, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::cache_hash_size_cumsum"], [10, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::dev_weights"], [10, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::logical_table_ids"], [10, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::lxu_cache_state"], [10, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::lxu_cache_weights"], [10, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::momentum1_dev"], [10, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::momentum1_offsets"], [10, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::momentum1_placements"], [10, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::momentum1_uvm"], [10, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::pruned_indices"], [10, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::pruned_indices_offsets"], [10, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::total_cache_hash_size"], [10, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::uvm_weights"], [10, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::weights_offsets"], [10, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::weights_placements"], [3, 0, 1, "_CPPv421tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE", "tbe_input_combine_cpu"], [3, 1, 1, "_CPPv421tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE", "tbe_input_combine_cpu::include_last_offsets"], [3, 1, 1, "_CPPv421tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE", "tbe_input_combine_cpu::indices_list"], [3, 1, 1, "_CPPv421tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE", "tbe_input_combine_cpu::offsets_list"], [3, 1, 1, "_CPPv421tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE", "tbe_input_combine_cpu::per_sample_weights"], [6, 0, 1, "_CPPv419uvm_cuda_mem_adviseRK6Tensor7int64_t", "uvm_cuda_mem_advise"], [6, 1, 1, "_CPPv419uvm_cuda_mem_adviseRK6Tensor7int64_t", "uvm_cuda_mem_advise::cuda_memory_advise"], [6, 1, 1, "_CPPv419uvm_cuda_mem_adviseRK6Tensor7int64_t", "uvm_cuda_mem_advise::self"], [6, 0, 1, "_CPPv427uvm_cuda_mem_prefetch_asyncRK6TensorN3c108optionalI6TensorEE", "uvm_cuda_mem_prefetch_async"], [6, 1, 1, "_CPPv427uvm_cuda_mem_prefetch_asyncRK6TensorN3c108optionalI6TensorEE", "uvm_cuda_mem_prefetch_async::device_t"], [6, 1, 1, "_CPPv427uvm_cuda_mem_prefetch_asyncRK6TensorN3c108optionalI6TensorEE", "uvm_cuda_mem_prefetch_async::self"], [6, 0, 1, "_CPPv424uvm_mem_advice_dont_forkRK6Tensor", "uvm_mem_advice_dont_fork"], [6, 1, 1, "_CPPv424uvm_mem_advice_dont_forkRK6Tensor", "uvm_mem_advice_dont_fork::self"], [6, 0, 1, "_CPPv411uvm_storageRK6Tensor", "uvm_storage"], [6, 1, 1, "_CPPv411uvm_storageRK6Tensor", "uvm_storage::self"], [6, 0, 1, "_CPPv410uvm_to_cpuRK6Tensor", "uvm_to_cpu"], [6, 1, 1, "_CPPv410uvm_to_cpuRK6Tensor", "uvm_to_cpu::self"], [6, 0, 1, "_CPPv416uvm_to_cpu_cloneRK6Tensor", "uvm_to_cpu_clone"], [6, 1, 1, "_CPPv416uvm_to_cpu_cloneRK6Tensor", "uvm_to_cpu_clone::self"], [6, 0, 1, "_CPPv413uvm_to_deviceRK6TensorRK6Tensor", "uvm_to_device"], [6, 1, 1, "_CPPv413uvm_to_deviceRK6TensorRK6Tensor", "uvm_to_device::prototype"], [6, 1, 1, "_CPPv413uvm_to_deviceRK6TensorRK6Tensor", "uvm_to_device::self"], [16, 3, 0, "-", "fbgemm_gpu"]], "fbgemm_gpu.docs.examples": [[22, 4, 1, "", "example_method"]], "fbgemm_gpu.split_table_batched_embeddings_ops": [[16, 4, 1, "", "SplitTableBatchedEmbeddingBagsCodegen"]], "torch.ops.fbgemm": [[15, 4, 1, "", "batched_dense_vec_jagged_2d_mul"], [15, 4, 1, "", "dense_to_jagged"], [15, 4, 1, "", "jagged_1d_to_dense"], [15, 4, 1, "", "jagged_2d_to_dense"], [15, 4, 1, "", "jagged_dense_dense_elementwise_add_jagged_output"], [15, 4, 1, "", "jagged_dense_elementwise_add"], [15, 4, 1, "", "jagged_dense_elementwise_add_jagged_output"], [15, 4, 1, "", "jagged_dense_elementwise_mul"], [15, 4, 1, "", "jagged_to_padded_dense"], [15, 4, 1, "", "stacked_jagged_1d_to_dense"], [15, 4, 1, "", "stacked_jagged_2d_to_dense"]]}, "objtypes": {"0": "cpp:function", "1": "cpp:functionParam", "2": "cpp:templateParam", "3": "py:module", "4": "py:function"}, "objnames": {"0": ["cpp", "function", "C++ function"], "1": ["cpp", "functionParam", "C++ function parameter"], "2": ["cpp", "templateParam", "C++ template parameter"], "3": ["py", "module", "Python module"], "4": ["py", "function", "Python function"]}, "titleterms": {"quantiz": [0, 8], "util": 0, "refer": [0, 23], "implement": 0, "method": 0, "avx": 0, "2": 0, "512": 0, "build": [1, 11, 21], "instruct": [1, 11, 12, 13], "fbgemm": [1, 24], "requir": 1, "hardwar": 1, "softwar": 1, "depend": 1, "asmjit": 1, "cpuinfo": 1, "googletest": 1, "set": [1, 11, 12, 21], "up": [1, 11, 12, 21], "an": [1, 11], "isol": [1, 11], "environ": [1, 11, 12, 13, 21], "instal": [1, 11, 12], "tool": [1, 11], "c": [1, 11, 20, 24], "compil": [1, 11], "other": [1, 11, 23], "librari": [1, 12], "prepar": [1, 11], "linux": 1, "maco": 1, "cmake": 1, "gcc": [1, 11], "issu": [1, 18], "12": 1, "clang": [1, 11], "bazel": 1, "window": 1, "embed": [2, 7, 10, 16], "oper": [2, 3, 4, 5, 6, 7, 8, 9, 10, 14, 15, 16], "cuda": [2, 4, 5, 6, 8, 9, 11, 12, 13], "cpu": [2, 4, 5, 8, 9, 11, 12], "combin": [3, 14], "input": 3, "jag": [4, 14, 15], "tensor": [4, 14, 15], "layout": 5, "transform": 5, "memori": 6, "pool": 7, "merg": 7, "permut": 7, "spars": 9, "data": 9, "tabl": [10, 16], "batch": [10, 16], "miniconda": 11, "conda": [11, 12], "onli": [11, 12], "docker": [11, 12], "imag": 11, "cudnn": 11, "rocm": [11, 12, 13], "miopen": 11, "symlink": 11, "pytorch": [11, 12], "through": [11, 12], "pip": [11, 12], "post": [11, 12], "check": [11, 12], "fbgemm_gpu": [11, 12, 13, 21, 24], "packag": [11, 12], "The": 11, "process": 11, "wheel": 11, "variabl": 11, "For": 11, "develop": [11, 24], "undefin": [11, 12], "symbol": [11, 12], "glibc": 11, "version": 11, "compat": 11, "nvidia": 12, "driver": 12, "contain": 12, "runtim": 12, "amdgpu": 12, "python": [12, 22, 24], "public": 12, "pypi": 12, "test": 13, "setup": 13, "run": 13, "variant": 13, "benchmark": 13, "high": 14, "level": 14, "overview": [14, 24], "format": 14, "valu": 14, "offset": 14, "max": 14, "length": 14, "exampl": 14, "arithmet": 14, "convers": 14, "dens": 14, "tbe": 16, "contact": 17, "u": 17, "github": 17, "slack": 17, "contribut": 18, "code": [18, 20, 22, 23], "conduct": 18, "pull": 18, "request": 18, "contributor": 18, "licens": [18, 19], "agreement": 18, "cla": 18, "ad": [20, 22, 23], "document": [20, 21, 22, 23, 24], "gener": [21, 22, 24], "guidelin": 21, "specif": 21, "guid": 21, "toolchain": 21, "lint": 21, "deploy": 21, "preview": 21, "todo": 22, "auto": 22, "sphinx": 23, "pointer": 23, "section": 23, "referenc": 23, "sourc": 23, "latex": 23, "graph": 23, "homepag": 24, "info": 24, "api": 24}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx.ext.todo": 2, "sphinx.ext.viewcode": 1, "sphinx": 57}, "alltitles": {"Installation Instructions": [[12, "installation-instructions"]], "Set Up CPU-Only Environment": [[12, "set-up-cpu-only-environment"]], "Set Up CUDA Environment": [[12, "set-up-cuda-environment"]], "Install NVIDIA Drivers": [[12, "install-nvidia-drivers"]], "Set Up the CUDA Docker Container and Conda Environment": [[12, "set-up-the-cuda-docker-container-and-conda-environment"]], "Install the CUDA Runtime": [[12, "install-the-cuda-runtime"]], "Set Up ROCm Environment": [[12, "set-up-rocm-environment"]], "Install AMDGPU Drivers": [[12, "install-amdgpu-drivers"]], "Set Up the ROCm Docker Container and Conda Environment": [[12, "set-up-the-rocm-docker-container-and-conda-environment"]], "Install Python Libraries": [[12, "install-python-libraries"]], "Install PyTorch": [[12, "install-pytorch"], [11, "install-pytorch"]], "Install the FBGEMM_GPU Package": [[12, "install-the-fbgemm-gpu-package"]], "Install through PyTorch PIP": [[12, "install-through-pytorch-pip"]], "Install through Public PyPI": [[12, "install-through-public-pypi"]], "Post-Installation Checks": [[12, "post-installation-checks"]], "Undefined Symbols": [[12, "undefined-symbols"]], "Test Instructions": [[13, "test-instructions"]], "Setup the FBGEMM_GPU Test Environment": [[13, "setup-the-fbgemm-gpu-test-environment"]], "Running FBGEMM_GPU Tests": [[13, "running-fbgemm-gpu-tests"]], "Testing with the CUDA Variant": [[13, "testing-with-the-cuda-variant"]], "Testing with the ROCm Variant": [[13, "testing-with-the-rocm-variant"]], "Running FBGEMM_GPU Benchmarks": [[13, "running-fbgemm-gpu-benchmarks"]], "Jagged Tensor Operators": [[14, "jagged-tensor-operators"], [15, "jagged-tensor-operators"], [4, "jagged-tensor-operators"]], "High Level Overview": [[14, "high-level-overview"]], "Jagged Tensor Format": [[14, "jagged-tensor-format"]], "Values": [[14, "values"]], "Offsets": [[14, "offsets"]], "Max Lengths": [[14, "max-lengths"]], "Jagged Tensor Example": [[14, "jagged-tensor-example"]], "Jagged Tensor Operations": [[14, "jagged-tensor-operations"]], "Arithmetic Operations": [[14, "arithmetic-operations"]], "Conversion Operations": [[14, "conversion-operations"]], "Jagged to Dense": [[14, "jagged-to-dense"]], "Dense to Jagged": [[14, "dense-to-jagged"]], "Combined Arithmetic + Conversion Operations": [[14, "combined-arithmetic-conversion-operations"]], "Sphinx Documentation Pointers": [[23, "sphinx-documentation-pointers"]], "References Other Sections of the Documentation": [[23, "references-other-sections-of-the-documentation"]], "Referencing the Source Code": [[23, "referencing-the-source-code"]], "Adding LaTeX": [[23, "adding-latex"]], "Adding Graphs": [[23, "adding-graphs"]], "Adding Documentation to Python Code": [[22, "adding-documentation-to-python-code"]], "Todo": [[22, "id1"]], "Adding Documentation to Auto-Generated Python Code": [[22, "adding-documentation-to-auto-generated-python-code"]], "Documentation": [[21, "documentation"]], "General Documentation Guidelines": [[21, "general-documentation-guidelines"]], "Specific Documentation Guides": [[21, "specific-documentation-guides"]], "Building the Documentation": [[21, "building-the-documentation"]], "Set Up Build Environment": [[21, "set-up-build-environment"]], "Build FBGEMM_GPU": [[21, "build-fbgemm-gpu"]], "Set Up the Documentation Toolchain": [[21, "set-up-the-documentation-toolchain"]], "Build the Documentation": [[21, "build-the-documentation"]], "Linting the Documentation": [[21, "linting-the-documentation"]], "Deployment Preview": [[21, "deployment-preview"]], "Table Batched Embedding (TBE) Operators": [[16, "module-fbgemm_gpu"]], "Contact Us": [[17, "contact-us"]], "GitHub": [[17, "github"]], "Slack": [[17, "slack"]], "FBGEMM and FBGEMM_GPU Documentation Homepage": [[24, "fbgemm-and-fbgemm-gpu-documentation-homepage"]], "General Info": [[24, null]], "FBGEMM Development": [[24, null]], "FBGEMM_GPU Development": [[24, null]], "FBGEMM_GPU Overview": [[24, null]], "FBGEMM C++ API": [[24, null]], "FBGEMM_GPU C++ API": [[24, null]], "FBGEMM_GPU Python API": [[24, null]], "Layout Transformation Operators": [[5, "layout-transformation-operators"]], "CUDA Operators": [[5, "cuda-operators"], [4, "cuda-operators"], [9, "cuda-operators"], [8, "cuda-operators"], [2, "cuda-operators"]], "CPU Operators": [[5, "cpu-operators"], [4, "cpu-operators"], [9, "cpu-operators"], [8, "cpu-operators"], [2, "cpu-operators"]], "Combine Input Operators": [[3, "combine-input-operators"]], "Sparse Data Operators": [[9, "sparse-data-operators"]], "Table Batched Embedding Operators": [[10, "table-batched-embedding-operators"]], "Build Instructions": [[11, "build-instructions"], [1, "build-instructions"]], "Set Up an Isolated Build Environment": [[11, "set-up-an-isolated-build-environment"], [1, "set-up-an-isolated-build-environment"]], "Install Miniconda": [[11, "install-miniconda"]], "Set Up the Conda Environment": [[11, "set-up-the-conda-environment"]], "Set Up for CPU-Only Build": [[11, "set-up-for-cpu-only-build"]], "Set Up for CUDA Build": [[11, "set-up-for-cuda-build"]], "CUDA Docker Image": [[11, "cuda-docker-image"]], "Install CUDA": [[11, "install-cuda"]], "Install cuDNN": [[11, "install-cudnn"]], "Set Up for ROCm Build": [[11, "set-up-for-rocm-build"]], "ROCm Docker Image": [[11, "rocm-docker-image"]], "Install ROCm": [[11, "install-rocm"]], "Install MIOpen": [[11, "install-miopen"]], "Install the Build Tools": [[11, "install-the-build-tools"], [1, "install-the-build-tools"]], "C/C++ Compiler (GCC)": [[11, "c-c-compiler-gcc"]], "C/C++ Compiler (Clang)": [[11, "c-c-compiler-clang"]], "Compiler Symlinks": [[11, "compiler-symlinks"]], "Other Build Tools": [[11, "other-build-tools"], [1, "other-build-tools"]], "Installation Through Conda": [[11, "installation-through-conda"]], "Installation Through PyTorch PIP": [[11, "installation-through-pytorch-pip"]], "Post-Install Checks": [[11, "post-install-checks"]], "Build the FBGEMM_GPU Package": [[11, "build-the-fbgemm-gpu-package"]], "Preparing the Build": [[11, "preparing-the-build"], [1, "preparing-the-build"]], "The Build Process": [[11, "the-build-process"]], "Set Wheel Build Variables": [[11, "set-wheel-build-variables"]], "CPU-Only Build": [[11, "cpu-only-build"]], "CUDA Build": [[11, "cuda-build"]], "ROCm Build": [[11, "rocm-build"]], "Post-Build Checks (For Developers)": [[11, "post-build-checks-for-developers"]], "Undefined Symbols Check": [[11, "undefined-symbols-check"]], "GLIBC Version Compatibility Check": [[11, "glibc-version-compatibility-check"]], "Quantization Operators": [[8, "quantization-operators"]], "CUDA Memory Operators": [[6, "cuda-memory-operators"]], "Pooled Embeddings Operators": [[7, "pooled-embeddings-operators"]], "Merge Operators": [[7, "merge-operators"]], "Permutation Operators": [[7, "permutation-operators"]], "Adding Documentation to C++ Code": [[20, "adding-documentation-to-c-code"]], "License": [[19, "license"], [18, "license"]], "Contributing": [[18, "contributing"]], "Code of Conduct": [[18, "code-of-conduct"]], "Pull Requests": [[18, "pull-requests"]], "Contributor License Agreement (\u201cCLA\u201d)": [[18, "contributor-license-agreement-cla"]], "Issues": [[18, "issues"]], "FBGEMM Requirements": [[1, "fbgemm-requirements"]], "Hardware Requirements": [[1, "hardware-requirements"]], "Software Dependencies": [[1, "software-dependencies"]], "asmjit": [[1, "asmjit"]], "cpuinfo": [[1, "cpuinfo"]], "GoogleTest": [[1, "googletest"]], "C/C++ Compiler": [[1, "c-c-compiler"]], "Build the FBGEMM Library": [[1, "build-the-fbgemm-library"]], "Building on Linux and macOS (CMake + GCC)": [[1, "building-on-linux-and-macos-cmake-gcc"]], "Build Issues with GCC 12+": [[1, "build-issues-with-gcc-12"]], "Building on Linux and macOS (CMake + Clang)": [[1, "building-on-linux-and-macos-cmake-clang"]], "Building on Linux (Bazel)": [[1, "building-on-linux-bazel"]], "Building on Windows": [[1, "building-on-windows"]], "Embedding Operators": [[2, "embedding-operators"]], "Quantization Utilities": [[0, "quantization-utilities"]], "Reference Implementation Methods": [[0, "reference-implementation-methods"]], "AVX-2 Implementation Methods": [[0, "avx-2-implementation-methods"]], "AVX-512 Implementation Methods": [[0, "avx-512-implementation-methods"]]}, "indexentries": {"findminmax (c++ function)": [[0, "_CPPv410FindMinMaxPKfPfPf7int64_t"]], "floatorhalftofusednbitrowwisequantizedsbhalf (c++ function)": [[0, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE"]], "fusedquantizedequantize (c++ function)": [[0, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif"]], "quantizegroupwise (c++ function)": [[0, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T"]], "xor128 (c++ function)": [[0, "_CPPv46Xor128v"]], "requantizeoutputprocessingavx2 (c++ function)": [[0, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE"]], "requantizeoutputprocessinggconvavx512 (c++ function)": [[0, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE"]], "bounds_check_indices_cuda (c++ function)": [[2, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKN3c108optionalI6TensorEERKN3c108optionalI6TensorEEK7int64_t"]], "int_nbit_split_embedding_codegen_lookup_function (c++ function)": [[2, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE"]], "int_nbit_split_embedding_codegen_lookup_function_cpu (c++ function)": [[2, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE"]], "int_nbit_split_embedding_uvm_caching_codegen_lookup_function (c++ function)": [[2, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE"]], "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu (c++ function)": [[2, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE"]], "pruned_array_lookup_cpu (c++ function)": [[2, "_CPPv423pruned_array_lookup_cpu6Tensor6Tensor6Tensor6Tensor"]], "pruned_array_lookup_cuda (c++ function)": [[2, "_CPPv424pruned_array_lookup_cuda6Tensor6Tensor6Tensor6Tensor"]], "pruned_hashmap_insert_unweighted_cpu (c++ function)": [[2, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor"]], "pruned_hashmap_lookup_cuda (c++ function)": [[2, "_CPPv426pruned_hashmap_lookup_cuda6Tensor6Tensor6Tensor6Tensor"]], "pruned_hashmap_lookup_unweighted_cpu (c++ function)": [[2, "_CPPv436pruned_hashmap_lookup_unweighted_cpu6Tensor6Tensor6Tensor6Tensor"]], "padding_fused_tbe_input_combine_cpu (c++ function)": [[3, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t"]], "tbe_input_combine_cpu (c++ function)": [[3, "_CPPv421tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE"]], "batched_dense_vec_jagged_2d_mul (c++ function)": [[4, "_CPPv431batched_dense_vec_jagged_2d_mulRK6TensorRK6TensorRK6Tensor"]], "dense_to_jagged (c++ function)": [[4, "_CPPv415dense_to_jaggedRK6TensorRKNSt6vectorI6TensorEEN3c108optionalIN2at6SymIntEEE"]], "jagged_1d_to_dense (c++ function)": [[4, "_CPPv418jagged_1d_to_dense6Tensor6TensorN3c106SymIntE7int64_t"]], "jagged_2d_to_dense (c++ function)": [[4, "_CPPv418jagged_2d_to_dense6Tensor6TensorN3c106SymIntE"]], "jagged_dense_elementwise_add (c++ function)": [[4, "_CPPv428jagged_dense_elementwise_addRK6TensorRKNSt6vectorI6TensorEERK6Tensor"]], "jagged_dense_elementwise_add_jagged_output (c++ function)": [[4, "_CPPv442jagged_dense_elementwise_add_jagged_outputRK6TensorRKNSt6vectorI6TensorEERK6Tensor"]], "jagged_dense_elementwise_add_jagged_output_cuda (c++ function)": [[4, "_CPPv447jagged_dense_elementwise_add_jagged_output_cudaRK6TensorRKNSt6vectorI6TensorEERK6Tensor"]], "jagged_dense_elementwise_mul (c++ function)": [[4, "_CPPv428jagged_dense_elementwise_mulRK6TensorRKNSt6vectorI6TensorEERK6Tensor"]], "jagged_to_padded_dense (c++ function)": [[4, "_CPPv422jagged_to_padded_denseRK6TensorRKNSt6vectorI6TensorEEKN3c1014SymIntArrayRefEKd"]], "jagged_to_padded_dense_forward (c++ function)": [[4, "_CPPv430jagged_to_padded_dense_forwardRK6TensorRKNSt6vectorI6TensorEEN3c1014SymIntArrayRefEKd"]], "recat_embedding_grad_output_cuda (c++ function)": [[5, "_CPPv432recat_embedding_grad_output_cuda6TensorRKNSt6vectorI7int64_tEE"]], "recat_embedding_grad_output_mixed_d_batch_cuda (c++ function)": [[5, "_CPPv446recat_embedding_grad_output_mixed_D_batch_cudaRK6TensorRK6TensorRK6Tensor"]], "recat_embedding_grad_output_mixed_d_cpu (c++ function)": [[5, "_CPPv439recat_embedding_grad_output_mixed_D_cpuRK6TensorRKNSt6vectorI7int64_tEE"]], "recat_embedding_grad_output_mixed_d_cuda (c++ function)": [[5, "_CPPv440recat_embedding_grad_output_mixed_D_cudaRK6TensorRKNSt6vectorI7int64_tEE"]], "is_uvm_tensor (c++ function)": [[6, "_CPPv413is_uvm_tensorRK6Tensor"]], "new_host_mapped_tensor (c++ function)": [[6, "_CPPv422new_host_mapped_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE"]], "new_managed_tensor (c++ function)": [[6, "_CPPv418new_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE"]], "new_managed_tensor_meta (c++ function)": [[6, "_CPPv423new_managed_tensor_metaRK6TensorRKNSt6vectorINSt7int64_tEEE"]], "new_unified_tensor (c++ function)": [[6, "_CPPv418new_unified_tensorRK6TensorRKNSt6vectorINSt7int64_tEEEb"]], "new_vanilla_managed_tensor (c++ function)": [[6, "_CPPv426new_vanilla_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE"]], "uvm_cuda_mem_advise (c++ function)": [[6, "_CPPv419uvm_cuda_mem_adviseRK6Tensor7int64_t"]], "uvm_cuda_mem_prefetch_async (c++ function)": [[6, "_CPPv427uvm_cuda_mem_prefetch_asyncRK6TensorN3c108optionalI6TensorEE"]], "uvm_mem_advice_dont_fork (c++ function)": [[6, "_CPPv424uvm_mem_advice_dont_forkRK6Tensor"]], "uvm_storage (c++ function)": [[6, "_CPPv411uvm_storageRK6Tensor"]], "uvm_to_cpu (c++ function)": [[6, "_CPPv410uvm_to_cpuRK6Tensor"]], "uvm_to_cpu_clone (c++ function)": [[6, "_CPPv416uvm_to_cpu_cloneRK6Tensor"]], "uvm_to_device (c++ function)": [[6, "_CPPv413uvm_to_deviceRK6TensorRK6Tensor"]], "all_to_one_device (c++ function)": [[7, "_CPPv417all_to_one_deviceNSt6vectorIN2at6TensorEEEN2at6DeviceE"]], "permute_pooled_embs_auto_grad (c++ function)": [[7, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor"]], "permute_pooled_embs_auto_grad_cpu (c++ function)": [[7, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor"]], "permute_pooled_embs_auto_grad_gpu (c++ function)": [[7, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor"]], "permute_pooled_embs_auto_grad_split_cpu (c++ function)": [[7, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE"]], "permute_pooled_embs_auto_grad_split_gpu (c++ function)": [[7, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE"]], "permute_pooled_embs_cpu_impl (c++ function)": [[7, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb"]], "permute_pooled_embs_split_cpu (c++ function)": [[7, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE"]], "permute_pooled_embs_split_gpu (c++ function)": [[7, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE"]], "fp8quantizedtofloat_ref (c++ function)": [[8, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi"]], "fp8rowwise_to_float_cpu (c++ function)": [[8, "_CPPv423FP8rowwise_to_float_cpuRK6TensorbK7int64_t"]], "floattofp8quantized_ref (c++ function)": [[8, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd"]], "_fp8rowwise_to_float_gpu (c++ function)": [[8, "_CPPv424_FP8rowwise_to_float_gpuRKN2at6TensorEbK7int64_t"]], "_bfloat16_to_float_gpu (c++ function)": [[8, "_CPPv422_bfloat16_to_float_gpuRKN2at6TensorE"]], "_float_to_fp8rowwise_gpu (c++ function)": [[8, "_CPPv424_float_to_FP8rowwise_gpuRK6TensorKb"]], "_float_to_bfloat16_gpu (c++ function)": [[8, "_CPPv422_float_to_bfloat16_gpuRKN2at6TensorE"]], "_float_to_fused8bitrowwise_cpu_out (c++ function)": [[8, "_CPPv434_float_to_fused8bitrowwise_cpu_outR6TensorRK6Tensor"]], "_float_to_fused8bitrowwise_gpu (c++ function)": [[8, "_CPPv430_float_to_fused8bitrowwise_gpuRK6Tensor"]], "_float_to_fusednbitrowwise_gpu (c++ function)": [[8, "_CPPv430_float_to_fusednbitrowwise_gpuRK6TensorK7int64_t"]], "_float_to_hfp8_gpu (c++ function)": [[8, "_CPPv418_float_to_hfp8_gpuRKN2at6TensorEK7int64_tK7int64_tKd"]], "_float_to_msfp_gpu (c++ function)": [[8, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd"]], "_float_to_paddedfp8rowwise_gpu (c++ function)": [[8, "_CPPv430_float_to_paddedFP8rowwise_gpuRK6TensorKbK7int64_t"]], "_fused8bitrowwise_to_float_cpu_out (c++ function)": [[8, "_CPPv434_fused8bitrowwise_to_float_cpu_outR6TensorRK6Tensor"]], "_fused8bitrowwise_to_float_gpu (c++ function)": [[8, "_CPPv430_fused8bitrowwise_to_float_gpuRKN2at6TensorE"]], "_fused8bitrowwise_to_float_mixed_dim_gpu (c++ function)": [[8, "_CPPv440_fused8bitrowwise_to_float_mixed_dim_gpuRKN2at6TensorERKN2at6TensorEK7int64_t"]], "_fused8bitrowwise_to_half_gpu (c++ function)": [[8, "_CPPv429_fused8bitrowwise_to_half_gpuRKN2at6TensorE"]], "_fused8bitrowwise_to_single_or_half_precision_gpu (c++ function)": [[8, "_CPPv449_fused8bitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tKbKb"]], "_fusednbitrowwise_to_float_gpu (c++ function)": [[8, "_CPPv430_fusednbitrowwise_to_float_gpuRKN2at6TensorEK7int64_t"]], "_fusednbitrowwise_to_half_gpu (c++ function)": [[8, "_CPPv429_fusednbitrowwise_to_half_gpuRKN2at6TensorEK7int64_t"]], "_fusednbitrowwise_to_single_or_half_precision_gpu (c++ function)": [[8, "_CPPv449_fusednbitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tK7int64_t"]], "_half_to_fused8bitrowwise_gpu (c++ function)": [[8, "_CPPv429_half_to_fused8bitrowwise_gpuRK6Tensor"]], "_half_to_fusednbitrowwise_gpu (c++ function)": [[8, "_CPPv429_half_to_fusednbitrowwise_gpuRKN2at6TensorEK7int64_t"]], "_hfp8_to_float_gpu (c++ function)": [[8, "_CPPv418_hfp8_to_float_gpuRKN2at6TensorEK7int64_tK7int64_t"]], "_msfp_to_float_gpu (c++ function)": [[8, "_CPPv418_msfp_to_float_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_t"]], "_paddedfp8rowwise_to_float_gpu (c++ function)": [[8, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t"]], "_single_or_half_precision_to_fused8bitrowwise_gpu (c++ function)": [[8, "_CPPv449_single_or_half_precision_to_fused8bitrowwise_gpuRK6Tensor"]], "_single_or_half_precision_to_fusednbitrowwise_gpu (c++ function)": [[8, "_CPPv449_single_or_half_precision_to_fusednbitrowwise_gpuRK6TensorK7int64_t"]], "float_or_half_to_fused8bitrowwise_cpu (c++ function)": [[8, "_CPPv437float_or_half_to_fused8bitrowwise_cpuRK6Tensor"]], "float_to_fp8rowwise_cpu (c++ function)": [[8, "_CPPv423float_to_FP8rowwise_cpuRK6Tensorb"]], "float_to_fused8bitrowwise_cpu (c++ function)": [[8, "_CPPv429float_to_fused8bitrowwise_cpuRK6Tensor"]], "fused8bitrowwise_to_float_cpu (c++ function)": [[8, "_CPPv429fused8bitrowwise_to_float_cpuRK6Tensor"]], "fused8bitrowwise_to_float_or_half_cpu (c++ function)": [[8, "_CPPv437fused8bitrowwise_to_float_or_half_cpuRK6TensorK7int64_tKbKb"]], "fused8bitrowwise_to_half_cpu (c++ function)": [[8, "_CPPv428fused8bitrowwise_to_half_cpuRK6Tensor"]], "fusednbitrowwise_to_float_cpu (c++ function)": [[8, "_CPPv429fusednbitrowwise_to_float_cpuRK6TensorK7int64_t"]], "fusednbitrowwise_to_float_or_half_cpu (c++ function)": [[8, "_CPPv437fusednbitrowwise_to_float_or_half_cpuRK6TensorK7int64_tK7int64_t"]], "fusednbitrowwise_to_half_cpu (c++ function)": [[8, "_CPPv428fusednbitrowwise_to_half_cpuRK6TensorK7int64_t"]], "half_to_fused8bitrowwise_cpu (c++ function)": [[8, "_CPPv428half_to_fused8bitrowwise_cpuRK6Tensor"]], "expand_into_jagged_permute_cuda (c++ function)": [[9, "_CPPv431expand_into_jagged_permute_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_t"]], "generic_histogram_binning_calibration_by_feature_cpu (c++ function)": [[9, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td"]], "histogram_binning_calibration_cpu (c++ function)": [[9, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td"]], "direct_mapped_lru_cache_populate_byte_cuda (c++ function)": [[10, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE"]], "direct_mapped_lxu_cache_lookup_cuda (c++ function)": [[10, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE"]], "get_unique_indices_cuda (c++ function)": [[10, "_CPPv423get_unique_indices_cudaN2at6TensorE7int64_tb"]], "host_lxu_cache_slot (c++ function)": [[10, "_CPPv419host_lxu_cache_slot7int64_t7int64_t"]], "lfu_cache_populate_byte_cuda (c++ function)": [[10, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t"]], "lfu_cache_populate_cuda (c++ function)": [[10, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb"]], "linearize_cache_indices_cuda (c++ function)": [[10, "_CPPv428linearize_cache_indices_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN3c108optionalIN2at6TensorEEEK7int64_t"]], "linearize_cache_indices_from_row_idx_cuda (c++ function)": [[10, "_CPPv441linearize_cache_indices_from_row_idx_cudaN2at6TensorEN2at6TensorEN2at6TensorE"]], "lru_cache_find_uncached_cuda (c++ function)": [[10, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE"]], "lru_cache_populate_byte_cuda (c++ function)": [[10, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE"]], "lru_cache_populate_cuda (c++ function)": [[10, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE"]], "lxu_cache_flush_cuda (c++ function)": [[10, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb"]], "lxu_cache_locations_update_cuda (c++ function)": [[10, "_CPPv431lxu_cache_locations_update_cudaN2at6TensorEN2at6TensorEN3c108optionalIN2at6TensorEEE"]], "lxu_cache_locking_counter_decrement_cuda (c++ function)": [[10, "_CPPv440lxu_cache_locking_counter_decrement_cudaN2at6TensorEN2at6TensorE"]], "lxu_cache_lookup_cuda (c++ function)": [[10, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEE"]], "reset_weight_momentum_cuda (c++ function)": [[10, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t"]], "batched_dense_vec_jagged_2d_mul() (in module torch.ops.fbgemm)": [[15, "torch.ops.fbgemm.batched_dense_vec_jagged_2d_mul"]], "dense_to_jagged() (in module torch.ops.fbgemm)": [[15, "torch.ops.fbgemm.dense_to_jagged"]], "jagged_1d_to_dense() (in module torch.ops.fbgemm)": [[15, "torch.ops.fbgemm.jagged_1d_to_dense"]], "jagged_2d_to_dense() (in module torch.ops.fbgemm)": [[15, "torch.ops.fbgemm.jagged_2d_to_dense"]], "jagged_dense_dense_elementwise_add_jagged_output() (in module torch.ops.fbgemm)": [[15, "torch.ops.fbgemm.jagged_dense_dense_elementwise_add_jagged_output"]], "jagged_dense_elementwise_add() (in module torch.ops.fbgemm)": [[15, "torch.ops.fbgemm.jagged_dense_elementwise_add"]], "jagged_dense_elementwise_add_jagged_output() (in module torch.ops.fbgemm)": [[15, "torch.ops.fbgemm.jagged_dense_elementwise_add_jagged_output"]], "jagged_dense_elementwise_mul() (in module torch.ops.fbgemm)": [[15, "torch.ops.fbgemm.jagged_dense_elementwise_mul"]], "jagged_to_padded_dense() (in module torch.ops.fbgemm)": [[15, "torch.ops.fbgemm.jagged_to_padded_dense"]], "stacked_jagged_1d_to_dense() (in module torch.ops.fbgemm)": [[15, "torch.ops.fbgemm.stacked_jagged_1d_to_dense"]], "stacked_jagged_2d_to_dense() (in module torch.ops.fbgemm)": [[15, "torch.ops.fbgemm.stacked_jagged_2d_to_dense"]], "splittablebatchedembeddingbagscodegen() (in module fbgemm_gpu.split_table_batched_embeddings_ops)": [[16, "fbgemm_gpu.split_table_batched_embeddings_ops.SplitTableBatchedEmbeddingBagsCodegen"]], "fbgemm_gpu": [[16, "module-fbgemm_gpu"]], "module": [[16, "module-fbgemm_gpu"]], "example_method (c++ function)": [[20, "_CPPv4I0_NSt6size_tEE14example_method7int32_t1Tf"]], "example_method() (in module fbgemm_gpu.docs.examples)": [[22, "fbgemm_gpu.docs.examples.example_method"]]}}) \ No newline at end of file +Search.setIndex({"docnames": ["fbgemm-cpp-api/QuantUtils", "fbgemm-development/BuildInstructions", "fbgemm_gpu-cpp-api/embedding_ops", "fbgemm_gpu-cpp-api/experimental_ops", "fbgemm_gpu-cpp-api/input_combine", "fbgemm_gpu-cpp-api/jagged_tensor_ops", "fbgemm_gpu-cpp-api/layout_transform_ops", "fbgemm_gpu-cpp-api/memory_utils", "fbgemm_gpu-cpp-api/merge_pooled_embeddings", "fbgemm_gpu-cpp-api/quantize_ops", "fbgemm_gpu-cpp-api/sparse_ops", "fbgemm_gpu-cpp-api/split_table_batched_embeddings", "fbgemm_gpu-development/BuildInstructions", "fbgemm_gpu-development/InstallationInstructions", "fbgemm_gpu-development/TestInstructions", "fbgemm_gpu-overview/jagged-tensor-ops/JaggedTensorOps", "fbgemm_gpu-python-api/jagged_tensor_ops", "fbgemm_gpu-python-api/table_batched_embedding_ops", "general/ContactUs", "general/Contributing", "general/License", "general/documentation/Cpp", "general/documentation/Overview", "general/documentation/Python", "general/documentation/Sphinx", "index"], "filenames": ["fbgemm-cpp-api/QuantUtils.rst", "fbgemm-development/BuildInstructions.rst", "fbgemm_gpu-cpp-api/embedding_ops.rst", "fbgemm_gpu-cpp-api/experimental_ops.rst", "fbgemm_gpu-cpp-api/input_combine.rst", "fbgemm_gpu-cpp-api/jagged_tensor_ops.rst", "fbgemm_gpu-cpp-api/layout_transform_ops.rst", "fbgemm_gpu-cpp-api/memory_utils.rst", "fbgemm_gpu-cpp-api/merge_pooled_embeddings.rst", "fbgemm_gpu-cpp-api/quantize_ops.rst", "fbgemm_gpu-cpp-api/sparse_ops.rst", "fbgemm_gpu-cpp-api/split_table_batched_embeddings.rst", "fbgemm_gpu-development/BuildInstructions.rst", "fbgemm_gpu-development/InstallationInstructions.rst", "fbgemm_gpu-development/TestInstructions.rst", "fbgemm_gpu-overview/jagged-tensor-ops/JaggedTensorOps.rst", "fbgemm_gpu-python-api/jagged_tensor_ops.rst", "fbgemm_gpu-python-api/table_batched_embedding_ops.rst", "general/ContactUs.rst", "general/Contributing.rst", "general/License.rst", "general/documentation/Cpp.rst", "general/documentation/Overview.rst", "general/documentation/Python.rst", "general/documentation/Sphinx.rst", "index.rst"], "titles": ["Quantization Utilities", "Build Instructions", "Embedding Operators", "Experimental Operators", "Combine Input Operators", "Jagged Tensor Operators", "Layout Transformation Operators", "CUDA Memory Operators", "Pooled Embeddings Operators", "Quantization Operators", "Sparse Data Operators", "Table Batched Embedding Operators", "Build Instructions", "Installation Instructions", "Test Instructions", "Jagged Tensor Operators", "Jagged Tensor Operators", "Table Batched Embedding (TBE) Operators", "Contact Us", "Contributing", "License", "Adding Documentation to C++ Code", "Documentation", "Adding Documentation to Python Code", "Sphinx Documentation Pointers", "FBGEMM and FBGEMM_GPU Documentation Homepage"], "terms": {"templat": [0, 12, 21], "typenam": [0, 21], "t": [0, 1, 3, 7, 10, 12, 17, 19, 21, 22], "layout_t": 0, "layout": [0, 25], "kcx": 0, "void": [0, 2, 7, 9, 11], "quantizegroupwis": 0, "const": [0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 23], "float": [0, 9, 16, 17, 21, 23], "src": 0, "int": [0, 9, 16, 17, 21, 23], "k": [0, 3], "c": [0, 11, 13, 15, 20, 22, 23, 24], "x": [0, 5, 15, 21, 23], "g": [0, 1, 10, 12, 21, 23], "scale": [0, 3], "std": [0, 3, 4, 5, 6, 7, 8, 10, 11, 12, 21, 23], "int32_t": [0, 21, 23], "zero_point": 0, "dst": 0, "point": [0, 9, 16, 21, 23], "data": [0, 7, 15, 17, 20, 25], "type": [0, 1, 9, 13, 15, 16, 17, 21], "paramet": [0, 3, 7, 9, 10, 16, 17, 21, 22, 23], "output": [0, 3, 5, 9, 10, 16, 17, 21, 23], "int8_t": 0, "uint8_t": [0, 9, 11], "ar": [0, 1, 5, 11, 12, 13, 15, 16, 17, 20, 21, 22, 23], "support": [0, 1, 3, 12, 13, 15, 23, 25], "input": [0, 3, 5, 7, 9, 10, 15, 16, 17, 21, 25], "tensor": [0, 2, 3, 4, 6, 7, 8, 9, 10, 11, 17, 22, 23, 25], "kxc": 0, "correspond": [0, 10, 11, 15, 21, 23], "kcr": 0, "kctr": 0, "weight": [0, 2, 10, 11, 17], "time": [0, 1, 12, 13, 15], "dimens": [0, 3, 5, 7, 10, 15, 16, 17, 23], "krsc": 0, "ktrsc": 0, "channel": [0, 12, 13, 18], "number": [0, 1, 3, 10, 12, 15, 16, 17, 22], "r": [0, 14, 22], "": [0, 1, 7, 12, 14, 15, 19, 21, 22, 23], "group": [0, 3, 15, 21], "function": [0, 1, 12, 21, 23], "perform": [0, 1, 9, 10, 15, 25], "channelwis": 0, "1": [0, 1, 3, 10, 11, 12, 13, 14, 15, 16, 17, 22, 23, 24], "groupwis": 0, "per": [0, 15], "size": [0, 1, 3, 7, 9, 10, 15, 16, 17], "should": [0, 10, 11, 12, 13, 15, 19, 21, 22, 23], "equal": [0, 15, 23], "zero": [0, 16, 23], "reprsent": 0, "fusedquantizedequant": 0, "int64_t": [0, 2, 3, 4, 5, 6, 7, 9, 10, 11], "len": [0, 15], "tensorquantizationparam": 0, "qparam": 0, "thread_id": 0, "0": [0, 1, 9, 10, 11, 12, 13, 15, 16, 17, 23], "num_thread": 0, "noise_ratio": 0, "0f": 0, "fuse": [0, 9, 17], "integ": [0, 7, 9, 15], "dequant": 0, "kernel": [0, 1, 7, 14, 25], "acceler": 0, "awar": 0, "train": [0, 17, 25], "fp32": [0, 9, 17], "valu": [0, 5, 7, 9, 10, 11, 16, 17, 21, 22, 23], "u": [0, 12, 24, 25], "int8": [0, 17], "us": [0, 1, 3, 7, 10, 12, 14, 15, 17, 18, 19, 20, 21, 22, 23, 24, 25], "provid": [0, 1, 12, 13, 14, 20, 21, 22, 23, 25], "back": [0, 7, 11, 12, 13], "inputtyp": 0, "floatorhalftofusednbitrowwisequantizedsbhalf": 0, "bit_rat": [0, 9], "size_t": [0, 9, 21], "input_row": 0, "input_column": 0, "convert": [0, 7, 9, 15, 16, 23], "fp16": [0, 9, 17], "rowwis": [0, 9, 17], "bitrat": 0, "specifi": [0, 1, 9, 10, 12, 16, 17], "bit": [0, 9], "bia": [0, 3, 9], "each": [0, 3, 10, 12, 15, 16, 17, 23], "row": [0, 5, 11, 15, 16, 17, 23], "store": [0, 10, 11], "itself": [0, 15, 22], "end": [0, 13, 15, 24], "can": [0, 1, 9, 10, 12, 13, 15, 21, 22, 23, 24], "4": [0, 12, 13, 15, 16, 17, 23], "8": [0, 9, 12, 15, 17], "uint32_t": 0, "xor128": 0, "random": 0, "gener": [0, 1, 10, 12, 13, 21, 24], "9": [0, 12, 15, 17], "base": [0, 1, 10, 11, 12, 15], "thi": [0, 1, 5, 7, 8, 10, 12, 13, 15, 18, 19, 20, 21, 23, 24, 25], "paper": 0, "findminmax": 0, "m": [0, 12, 13, 14], "min": 0, "max": [0, 3, 17], "find": [0, 11, 12], "matrix": [0, 1, 16, 25], "bool": [0, 7, 8, 9, 11, 17], "a_symmetr": 0, "b_symmetr": 0, "quantizationgranular": 0, "q_gran": 0, "has_bia": 0, "fuse_relu": 0, "bias_typ": 0, "direct": [0, 11, 13, 20, 21, 23, 24], "fals": [0, 7, 17, 22], "requantizeoutputprocessingavx2": 0, "out": [0, 12, 18, 20, 22], "inp": 0, "block_type_t": 0, "block": [0, 21, 23, 24], "ld_out": 0, "ld_in": 0, "requantizationparams_t": 0, "requant": 0, "avx2": [0, 1], "i": [0, 1, 3, 5, 7, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 21, 22, 23, 24, 25], "c_per_g": 0, "requantizeoutputprocessinggconvavx512": 0, "avx512": 0, "note": [1, 11, 12, 13, 21, 22, 23, 24], "The": [1, 3, 7, 9, 10, 13, 14, 15, 16, 17, 19, 21, 22, 23, 24], "most": [1, 12, 13, 15, 22], "date": [1, 12, 13, 22], "embed": [1, 12, 13, 22, 25], "script": [1, 12, 13, 22], "bundl": [1, 12, 13, 22], "repo": [1, 12, 13, 22, 23], "under": [1, 12, 13, 19, 20, 22, 23], "setup_env": [1, 12, 13, 22], "bash": [1, 12, 13, 22], "step": [1, 12, 13, 15, 22, 23], "fbgemm_gpu": [1, 7, 15, 17, 18, 19, 20, 21, 23], "follow": [1, 10, 12, 13, 15, 20, 21, 22, 23], "toolchain": [1, 12, 13], "run": [1, 12, 13, 22], "cpu": [1, 7, 8, 14, 22], "higher": 1, "In": [1, 10, 12, 13, 15, 19, 21, 23], "doe": [1, 2, 13, 21, 22, 23], "have": [1, 10, 11, 12, 15, 22], "ani": [1, 10, 12, 16, 19, 20, 22, 23], "intel": 1, "mkl": 1, "howev": [1, 12, 15, 20], "comparison": 1, "some": [1, 12, 15, 22], "benchmark": 1, "If": [1, 12, 13, 17, 19, 21, 22, 23], "found": [1, 12, 13, 22], "path": [1, 12, 21, 24], "through": [1, 19, 21, 23], "intel_mkl_dir": 1, "variabl": 1, "built": [1, 12, 13, 22, 25], "report": [1, 13], "otherwis": [1, 7, 13, 20], "subset": 1, "all": [1, 10, 11, 12, 13, 15, 17, 20, 22], "three": [1, 15], "git": [1, 12], "submodul": [1, 12], "custom": [1, 24], "version": [1, 13], "desir": [1, 12, 15, 16, 21], "thei": [1, 12, 22, 24], "asmjit_src_dir": 1, "cpuinfo_src_dir": 1, "googletest_source_dir": 1, "With": 1, "inner": [1, 15], "take": [1, 12], "one": [1, 3, 9, 10, 11, 16, 17, 21, 23], "doesn": 1, "fit": [1, 20], "approach": 1, "so": [1, 10, 12, 13, 15], "implement": [1, 3, 12, 15], "dynam": 1, "effici": [1, 25], "shape": [1, 3, 15, 17], "specif": [1, 10, 12, 17, 20], "vector": [1, 4, 5, 6, 7, 8, 16, 23], "code": [1, 12, 20, 22], "third": 1, "parti": 1, "call": [1, 7, 13], "detect": [1, 14], "runtim": [1, 12], "pytorch": [1, 15, 18, 22, 23, 25], "project": [1, 19], "dispatch": [1, 7], "optim": [1, 9, 17], "test": [1, 12, 13, 19, 25], "you": [1, 19, 21, 23], "don": [1, 10, 12, 22], "want": [1, 19], "togeth": [1, 21, 22], "default": [1, 10, 12, 13, 17], "turn": [1, 22], "off": [1, 13, 18], "simpli": [1, 12], "fbgemm_build_test": 1, "conda": [1, 14, 22], "For": [1, 14, 15, 18, 20, 21, 22, 23, 24], "platform": [1, 12, 20], "machin": [1, 12, 13, 14, 25], "microsoft": [1, 9], "visual": 1, "studio": 1, "2019": 1, "newer": [1, 12], "recommend": [1, 5, 12, 13, 15], "here": [1, 7, 12, 19, 21, 22, 23, 24], "necessari": [1, 12], "ninja": [1, 12], "etc": [1, 12, 17], "n": [1, 9, 12, 13, 24], "env_nam": [1, 12, 13], "y": [1, 5, 12, 13, 16, 22], "doxygen": [1, 21, 22], "make": [1, 11, 12, 19, 21, 22, 23], "openbla": 1, "packag": [1, 14, 22], "onli": [1, 3, 10, 11, 14, 15, 19, 21, 22, 24], "clone": [1, 12], "along": [1, 12, 13], "its": [1, 7, 10, 12, 17, 20, 22, 24], "insid": [1, 12, 13, 14, 22, 24], "recurs": [1, 12], "http": [1, 12, 13, 19, 21, 22, 23], "github": [1, 12, 19], "com": [1, 12, 19], "cd": [1, 12, 14, 22], "assum": [1, 10], "process": [1, 5, 13, 15, 19, 23], "straightforward": 1, "creat": [1, 7, 12, 15, 19, 21, 23, 24], "directori": [1, 12, 14, 19, 21, 22], "mkdir": 1, "argument": [1, 10, 21, 22, 23], "build_arg": 1, "duse_sanit": 1, "address": [1, 12], "dfbgemm_library_typ": 1, "share": [1, 7], "dpython_execut": 1, "which": [1, 10, 12, 13, 15, 17, 22], "python3": [1, 13], "option": [1, 2, 5, 7, 11, 12, 16, 17], "document": [1, 7, 19, 20], "dfbgemm_build_doc": 1, "ON": [1, 20], "j": [1, 15], "verbos": 1, "As": [1, 10, 12, 13, 15], "write": [1, 12, 13, 22, 23], "fail": [1, 13, 14, 21], "due": [1, 12], "known": [1, 12, 17], "regress": 1, "To": [1, 12, 14, 24], "work": [1, 12, 13, 15, 19], "around": 1, "append": [1, 12, 21, 23], "export": [1, 12, 14], "prior": [1, 12, 13, 20], "cflag": 1, "wno": 1, "error": [1, 9, 13, 21, 22, 23], "mayb": 1, "uniniti": 1, "restrict": 1, "cxxflag": 1, "pleas": [1, 19, 21, 23], "see": [1, 7, 12, 13, 15, 21, 23, 24], "77939": 1, "1094": 1, "1666": 1, "more": [1, 7, 12, 17, 21, 23, 24], "detail": [1, 13], "exactli": 1, "same": [1, 3, 7, 10, 12, 15, 16, 21, 22, 23], "extra": 1, "need": [1, 12, 13, 14, 15, 19, 21, 23, 24], "ad": [1, 19, 22], "invoc": [1, 12, 22], "llvm": [1, 12], "standard": [1, 12], "libc": 1, "openmp": [1, 12], "libomp": 1, "locat": [1, 7, 11, 12, 15], "cc_path": 1, "cxx_path": 1, "dcmake_c_compil": 1, "dcmake_cxx_compil": 1, "dcmake_c_flag": [1, 12], "fopenmp": 1, "stdlib": [1, 12], "conda_prefix": [1, 12], "includ": [1, 8, 12, 20, 21, 23], "dcmake_cxx_flag": [1, 12], "likewis": 1, "also": [1, 12, 17, 24], "veri": [1, 12, 21, 22, 23], "target": [1, 7, 9, 10, 12, 15, 21, 22, 23, 24], "architectur": [1, 12, 13], "bc": [1, 12], "x64": 1, "program": [1, 19], "file": [1, 12, 13, 18, 19, 21, 22, 23, 24], "x86": [1, 25], "enterpris": 1, "vc": 1, "auxiliari": 1, "vcvarsal": 1, "bat": 1, "build_dir": 1, "dfbgemm_build_benchmark": 1, "dcmake_build_typ": 1, "releas": [1, 13], "cl": 1, "ex": 1, "v": [1, 3, 5, 14, 16], "int_nbit_split_embedding_codegen_lookup_funct": 2, "dev_weight": [2, 11], "uvm_weight": [2, 11], "weights_plac": [2, 11], "weights_offset": [2, 11], "weights_ti": [2, 11], "d_offset": [2, 9, 11], "total_d": [2, 11, 17], "max_int2_d": 2, "max_int4_d": 2, "max_int8_d": 2, "max_float16_d": 2, "max_float32_d": 2, "indic": [2, 11, 15, 17], "offset": [2, 5, 10, 11, 16, 17], "pooling_mod": [2, 17], "c10": [2, 5, 7, 9, 11], "indice_weight": 2, "output_dtyp": [2, 9, 17], "lxu_cache_weight": [2, 11], "lxu_cache_loc": [2, 11], "row_align": [2, 11], "max_float8_d": 2, "fp8_exponent_bit": 2, "fp8_exponent_bia": 2, "int_nbit_split_embedding_uvm_caching_codegen_lookup_funct": 2, "cache_hash_size_cumsum": [2, 11], "total_cache_hash_s": [2, 11], "cache_index_table_map": [2, 11], "lxu_cache_st": [2, 11], "lxu_stat": 2, "simlar": 2, "uvm_cach": 2, "lookup": [2, 11], "pruned_hashmap_lookup_cuda": 2, "hash_tabl": 2, "hash_table_offset": 2, "pruned_array_lookup_cuda": 2, "index_remap": 2, "index_remappings_offset": 2, "bounds_check_indices_cuda": 2, "rows_per_t": 2, "bounds_check_mod": [2, 17], "warn": [2, 17, 21], "b_ofset": 2, "max_b": [2, 11], "int_nbit_split_embedding_codegen_lookup_function_cpu": 2, "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu": 2, "pruned_hashmap_insert_unweighted_cpu": 2, "dense_indic": 2, "pruned_hashmap_lookup_unweighted_cpu": 2, "pruned_array_lookup_cpu": 2, "tupl": [3, 4, 5, 10, 11, 17], "gqa_attn_splitk_cuda": 3, "xq": 3, "cache_k": 3, "cache_v": 3, "seq_posit": 3, "doubl": [3, 5, 9, 10], "qk_scale": 3, "num_split_k": 3, "num_int4_kv_group": 3, "decod": 3, "queri": 3, "split": 3, "w": [3, 14], "bf16": [3, 9], "int4": 3, "kv": 3, "cuda": [3, 8, 17, 25], "gqa": 3, "cach": [3, 11, 12, 17], "It": [3, 12, 13, 15], "current": [3, 12, 13, 15, 17], "context": 3, "length": [3, 5, 10, 16, 17, 23], "16384": 3, "fix": [3, 10, 12], "head": 3, "128": 3, "an": [3, 7, 10, 13, 14, 15, 17, 21, 22, 23, 24], "arbitrari": 3, "b": [3, 10, 12, 15, 16, 17, 21, 22, 23, 24], "h_q": 3, "d": [3, 15, 16, 24], "where": [3, 5, 7, 10, 15, 16, 17], "batch": [3, 5, 10, 15, 16, 25], "num": 3, "max_t": 3, "h_kv": 3, "sequenc": 3, "posit": [3, 10, 17], "contain": [3, 7, 12, 15, 16, 17, 23], "actual": [3, 12], "token": [3, 15], "appli": [3, 10, 12, 15, 17], "after": [3, 10, 12, 13, 14, 15, 17, 22, 23, 24], "qk": 3, "control": 3, "amount": [3, 17], "parallel": 3, "wise": [3, 15, 17], "quantiz": [3, 25], "return": [3, 7, 9, 10, 16, 17, 21, 22, 23], "A": [3, 7, 9, 13, 15, 16, 17, 20, 21, 22, 23], "combin": [3, 25], "non": [3, 7, 17], "metadata": 3, "softmax": 3, "sum": [3, 10, 16, 17], "tbe_input_combine_cpu": 4, "indices_list": 4, "offsets_list": 4, "per_sample_weight": [4, 17], "include_last_offset": 4, "padding_fused_tbe_input_combine_cpu": 4, "batch_siz": 4, "solv": 5, "issu": [5, 7, 12, 13, 18], "when": [5, 10, 12, 14, 15, 17, 21, 22, 24], "differ": [5, 10, 15], "often": 5, "occur": [5, 21], "spars": [5, 15, 25], "featur": [5, 10, 15, 17, 18], "system": [5, 12, 13, 15], "well": [5, 10, 12, 21], "natur": [5, 15], "languag": [5, 15, 24], "jagged_to_padded_dense_forward": 5, "symintarrayref": 5, "max_length": [5, 16], "padding_valu": [5, 16], "jagged_dense_elementwise_add_jagged_output_cuda": 5, "x_valu": [5, 16], "x_offset": [5, 16, 23], "dens": [5, 16, 23], "jagged_to_padded_dens": [5, 16], "jagged_dense_elementwise_add": [5, 16], "jagged_dense_elementwise_mul": [5, 16], "batched_dense_vec_jagged_2d_mul": [5, 16], "a_valu": [5, 16], "a_offset": [5, 16], "dense_to_jag": [5, 16], "symint": 5, "total_l": [5, 16], "jagged_dense_elementwise_add_jagged_output": [5, 16], "jagged_1d_to_dens": [5, 16], "max_l": 5, "jagged_2d_to_dens": [5, 12, 13, 16, 22, 23], "max_sequence_length": [5, 16, 23], "recat_embedding_grad_output_cuda": 6, "grad_output": 6, "num_features_per_rank": 6, "recat_embedding_grad_output_mixed_d_cuda": 6, "dim_sum_per_rank": 6, "recat_embedding_grad_output_mixed_d_batch_cuda": 6, "cumsum_dim_sum_per_rank": 6, "recat_embedding_grad_output_mixed_d_cpu": 6, "new_managed_tensor": 7, "self": 7, "alloc": [7, 21], "unifi": 7, "manag": [7, 12, 13, 17], "uvm": [7, 14], "Then": 7, "set": [7, 11, 14, 15, 16, 17], "prefer": [7, 13], "storag": [7, 9, 11], "host": [7, 12], "establish": 7, "map": [7, 10, 11, 15, 17], "devic": [7, 8, 12, 14, 17], "new": [7, 9, 11, 21, 22, 23], "new_managed_tensor_meta": 7, "placehold": 7, "meta": [7, 20], "kei": 7, "empti": [7, 15, 16, 24], "new_host_mapped_tensor": 7, "new_unified_tensor": 7, "is_host_map": 7, "either": [7, 9, 10, 12, 13], "whether": [7, 12, 20], "depend": [7, 9, 12, 13, 15], "new_vanilla_managed_tensor": 7, "allow": [7, 12], "automat": [7, 10, 14, 22], "uvm_storag": 7, "check": [7, 17], "gpu": [7, 12, 13, 14, 25], "true": [7, 17], "is_uvm_tensor": 7, "BUT": [7, 20], "uvm_to_cpu": 7, "effect": [7, 15], "move": 7, "from": [7, 9, 10, 11, 12, 13, 14, 15, 17, 19, 20, 21, 22, 23, 24], "uvm_to_devic": 7, "prototyp": 7, "whose": 7, "uvm_cuda_mem_advis": 7, "cuda_memory_advis": 7, "cudamemadvis": 7, "cudamemoryadvis": 7, "enum": [7, 9], "avail": [7, 12, 14, 22], "python": [7, 12, 14, 21, 22, 24], "side": [7, 21, 23, 25], "namespac": 7, "over": [7, 12], "valid": 7, "inform": [7, 15, 23, 24], "uvm_cuda_mem_prefetch_async": 7, "device_t": 7, "cudamemprefetchasync": 7, "prefetch": 7, "destin": 7, "uvm_mem_advice_dont_fork": 7, "madvis": 7, "madv_dontfork": 7, "workaround": 7, "driver": [7, 12], "un": 7, "page": [7, 19, 24, 25], "tabl": [7, 10, 15, 25], "fork": [7, 19], "caus": [7, 12, 13, 20, 22], "slowdown": 7, "next": [7, 15, 21, 23], "access": [7, 17], "uvm_to_cpu_clon": 7, "copi": 7, "contigu": [7, 10], "singl": [7, 9], "thread": 7, "memcpi": 7, "section": [8, 12, 23], "variou": 8, "all_to_one_devic": 8, "inputtensor": 8, "target_devic": 8, "permute_pooled_embs_split_gpu": 8, "pooled_emb": 8, "offset_dim_list": 8, "permute_list": 8, "inv_offset_dim_list": 8, "inv_permute_list": 8, "permute_pooled_embs_auto_grad_split_gpu": 8, "permute_pooled_embs_auto_grad_gpu": 8, "permute_pooled_embs_cpu_impl": 8, "allow_dupl": 8, "permute_pooled_embs_split_cpu": 8, "permute_pooled_embs_auto_grad_split_cpu": 8, "permute_pooled_embs_auto_grad": 8, "permute_pooled_embs_auto_grad_cpu": 8, "model": [9, 10], "techniqu": 9, "reduc": 9, "larg": [9, 12], "order": [9, 15, 19], "achiev": [9, 13], "better": [9, 21], "small": 9, "loss": [9, 20], "accuraci": 9, "_float_to_bfloat16_gpu": 9, "brain": 9, "bfloat16": 9, "_bfloat16_to_float_gpu": 9, "_float_to_fp8rowwise_gpu": 9, "forward": 9, "fp8": 9, "dtype": [9, 17], "sparsetyp": [9, 17], "throw": [9, 21], "_fp8rowwise_to_float_gpu": 9, "represent": [9, 15], "_float_to_fused8bitrowwise_gpu": 9, "_half_to_fused8bitrowwise_gpu": 9, "half": 9, "_single_or_half_precision_to_fused8bitrowwise_gpu": 9, "_fused8bitrowwise_to_float_gpu": 9, "_fused8bitrowwise_to_half_gpu": 9, "_fused8bitrowwise_to_single_or_half_precision_gpu": 9, "scale_bias_last": 9, "quant_padding_float_typ": 9, "_fused8bitrowwise_to_float_mixed_dim_gpu": 9, "kfloat": 9, "khalf": 9, "_float_to_fusednbitrowwise_gpu": 9, "_half_to_fusednbitrowwise_gpu": 9, "_single_or_half_precision_to_fusednbitrowwise_gpu": 9, "_fusednbitrowwise_to_float_gpu": 9, "_fusednbitrowwise_to_half_gpu": 9, "_fusednbitrowwise_to_single_or_half_precision_gpu": 9, "_float_to_hfp8_gpu": 9, "ebit": 9, "exponent_bia": 9, "max_po": 9, "hybrid": 9, "hfp8": 9, "_hfp8_to_float_gpu": 9, "_float_to_msfp_gpu": 9, "bounding_box_s": 9, "mbit": 9, "min_po": 9, "msfp": 9, "_msfp_to_float_gpu": 9, "_float_to_paddedfp8rowwise_gpu": 9, "row_dim": 9, "pad": [9, 15, 16, 23], "_paddedfp8rowwise_to_float_gpu": 9, "output_last_dim": 9, "_fused8bitrowwise_to_float_cpu_out": 9, "_float_to_fused8bitrowwise_cpu_out": 9, "float_to_fused8bitrowwise_cpu": 9, "half_to_fused8bitrowwise_cpu": 9, "float_or_half_to_fused8bitrowwise_cpu": 9, "fused8bitrowwise_to_float_cpu": 9, "fused8bitrowwise_to_half_cpu": 9, "fused8bitrowwise_to_float_or_half_cpu": 9, "float_to_fp8rowwise_cpu": 9, "fp8rowwise_to_float_cpu": 9, "fusednbitrowwise_to_float_cpu": 9, "fusednbitrowwise_to_half_cpu": 9, "fusednbitrowwise_to_float_or_half_cpu": 9, "floattofp8quantized_ref": 9, "nrow": 9, "ncol": 9, "fp8quantizedtofloat_ref": 9, "expand_into_jagged_permute_cuda": 10, "permut": 10, "input_offset": 10, "output_offset": 10, "output_s": 10, "expand_into_jagged_permut": 10, "expand": 10, "index": [10, 11, 12, 13, 15, 21, 23], "case": [10, 12, 13, 15, 19], "ha": [10, 13, 15, 19, 21, 22], "across": [10, 12], "rank": [10, 15], "level": 10, "exclus": 10, "op": [10, 13, 16, 23], "bag": [10, 17, 25], "sit": 10, "we": [10, 12, 15, 19], "deriv": [10, 15, 20], "arrai": [10, 16, 23], "comput": [10, 12, 13, 17], "formula": 10, "output_permut": 10, "table_offset": 10, "bag_offset": 10, "histogram_binning_calibration_cpu": 10, "logit": 10, "bin_num_exampl": 10, "bin_num_posit": 10, "positive_weight": 10, "lower_bound": 10, "upper_bound": 10, "bin_ctr_in_use_aft": 10, "bin_ctr_weight_valu": 10, "divid": [10, 15], "predict": 10, "rang": [10, 15], "e": [10, 12, 15, 21, 23, 24], "bin": [10, 12], "two": [10, 15, 16, 17, 22], "exampl": [10, 12, 13, 14, 16, 17, 21, 22, 23, 24], "fall": [10, 12, 13], "bucket": [10, 12], "basic": [10, 23], "histogram": 10, "result": [10, 12, 16], "statist": 10, "real": 10, "ctr": 10, "num_po": 10, "num_exampl": 10, "final": 10, "calibr": 10, "pre": [10, 12, 13], "cali": 10, "wai": [10, 20], "within": 10, "suffici": [10, 19, 22], "That": 10, "fine": 10, "grain": 10, "modul": [10, 13, 17, 23], "theoret": 10, "layer": 10, "uncalibr": 10, "befor": [10, 12, 17, 24], "sigmoid": 10, "calibart": 10, "pass": [10, 17, 19, 22], "lower": 10, "bound": [10, 15], "calibration_target": 10, "observ": 10, "statisct": 10, "final_calibrated_predict": 10, "bin_ctr_weight": 10, "bin_ctr": 10, "calibrated_predict": 10, "bin_id": 10, "generic_histogram_binning_calibration_by_feature_cpu": 10, "segment_valu": 10, "segment_length": 10, "num_seg": 10, "bin_boundari": 10, "extens": [10, 21, 22], "ectr": 10, "abov": [10, 13, 15, 20, 21, 23, 24], "accept": [10, 19], "sort": [10, 11, 12], "keyjaggedtensor": 10, "num_bin": 10, "longer": [10, 18, 21], "still": [10, 12], "parambin_ctr_weight_valu": 10, "get_unique_indices_cuda": 11, "linear_indic": 11, "max_indic": 11, "compute_count": 11, "dedupl": 11, "pair": [11, 24], "lru_cache_find_uncached_cuda": 11, "unique_indic": 11, "unique_indices_length": 11, "time_stamp": 11, "lru_stat": 11, "gather_cache_stat": 11, "uvm_cache_stat": 11, "lock_cache_lin": 11, "lxu_cache_locking_count": 11, "lru": [11, 17], "uncach": 11, "them": 11, "host_lxu_cache_slot": 11, "h_in": 11, "cache_set": [11, 17], "linearize_cache_indices_cuda": 11, "b_offset": 11, "linear": 11, "uniqu": [11, 24], "linearize_cache_indices_from_row_idx_cuda": 11, "update_table_indic": 11, "update_row_indic": 11, "format": [11, 22, 23], "inplac": 11, "updat": [11, 12, 13, 17, 19], "lru_cache_populate_cuda": 11, "hash_size_cumsum": 11, "linear_cache_indic": 11, "stochastic_round": [11, 17], "fetch": 11, "insert": [11, 24], "timestep": 11, "lru_cache_populate_byte_cuda": 11, "byte": 11, "element": [11, 15], "direct_mapped_lru_cache_populate_byte_cuda": 11, "lxu_cache_miss_timestamp": 11, "assoc": 11, "variant": [11, 12, 13, 22], "lfu_cache_populate_cuda": 11, "lfu_stat": 11, "lfu": [11, 17], "lfu_cache_populate_byte_cuda": 11, "lxu_cache_lookup_cuda": 11, "invalid_index": 11, "num_uniq_cache_indic": 11, "lxu_cache_locations_output": 11, "look": [11, 17], "up": [11, 17], "slot": 11, "sentinel": 11, "miss": [11, 12], "direct_mapped_lxu_cache_lookup_cuda": 11, "lxu_cache_flush_cuda": 11, "flush": 11, "reset_weight_momentum_cuda": 11, "momentum1_dev": 11, "momentum1_uvm": 11, "momentum1_plac": 11, "momentum1_offset": 11, "pruned_indic": 11, "pruned_indices_offset": 11, "logical_table_id": 11, "buffer_id": 11, "lxu_cache_locking_counter_decrement_cuda": 11, "decrement": 11, "counter": 11, "lxu_cache_locations_update_cuda": 11, "lxu_cache_locations_new": 11, "fbgemm": [12, 13, 16, 18, 19, 20, 22, 23], "reproduc": [12, 13, 19, 20], "platform_nam": 12, "unam": 12, "prefix": [12, 24], "miniconda_prefix": 12, "home": 12, "download": [12, 13], "wget": 12, "q": 12, "anaconda": 12, "miniconda3": 12, "latest": 12, "sh": 12, "o": [12, 13], "p": 12, "load": [12, 15, 23], "shortcut": 12, "bashrc": 12, "command": [12, 13, 21, 22], "against": [12, 14], "env": [12, 13], "name": [12, 13, 20, 21, 23], "python_vers": 12, "3": [12, 15, 16, 17, 20, 23], "12": [12, 15, 17], "upgrad": 12, "pyopenssl": 12, "22": [12, 15], "requir": [12, 13, 14, 15, 17, 22, 23], "recent": [12, 13], "nvcc": 12, "capabl": [12, 14], "5": [12, 15, 17], "done": [12, 13], "bare": 12, "metal": 12, "neither": [12, 20], "nor": [12, 20], "nvidia": 12, "present": [12, 23], "sinc": [12, 15], "setup": [12, 13], "pull": [12, 13, 22], "linux": [12, 13], "distribut": [12, 20], "ubuntu": 12, "04": 12, "11": [12, 13, 15], "entrypoint": 12, "devel": 12, "ubuntu22": 12, "rest": [12, 13], "mai": [12, 13, 15, 20], "construct": [12, 13, 15], "mechan": 12, "full": [12, 13, 24], "nvml": 12, "org": [12, 13, 23], "cuda_vers": 12, "label": 12, "verifi": [12, 13, 21, 23], "cuda_runtim": 12, "h": [12, 16, 21], "libnvidia": [12, 13], "ml": [12, 13], "printenv": 12, "extract": 12, "given": [12, 15, 16], "url": [12, 13], "builder": 12, "blob": 12, "main": [12, 19], "common": [12, 13, 15, 23], "install_cuda": 12, "cudnn_url": 12, "redist": 12, "x86_64": 12, "2": [12, 13, 15, 16, 17, 21, 23, 24], "26_cuda12": 12, "archiv": 12, "tar": 12, "xz": 12, "unpack": 12, "xvf": 12, "amd": [12, 13], "minim": 12, "6": [12, 13, 15], "termin": 12, "while": [12, 22], "come": 12, "reason": [12, 13, 22], "oper": [12, 13, 25], "guid": [12, 23], "disabl": 12, "apt": 12, "prompt": 12, "debian_frontend": 12, "noninteract": 12, "db": 12, "radeon": 12, "amdgpu": 12, "focal": 12, "install_5": 12, "50601": 12, "1_all": 12, "deb": 12, "usecas": 12, "hiplibsdk": 12, "dkm": 12, "hipifi": 12, "hip": 12, "dev": 12, "20": 12, "sysroot": 12, "avoid": 12, "glibcxx": 12, "fbgemm_cpu": 12, "10": [12, 13, 15], "keep": 12, "older": [12, 13], "gcc_version": 12, "15": 12, "7": [12, 13, 15, 16, 17], "forg": [12, 22], "gxx_linux": 12, "64": [12, 15], "sysroot_linux": 12, "17": 12, "binari": [12, 20], "cento": 12, "stream": 12, "becaus": [12, 15], "librari": [12, 22, 25], "refer": [12, 15, 22, 23], "libstdc": 12, "what": [12, 22], "libcxx_path": 12, "print": [12, 13, 17, 23], "objdump": 12, "tc": 12, "grep": 12, "glibc_": 12, "sed": 12, "vu": 12, "cat": 12, "glibcxx_": 12, "possibl": [12, 15, 19, 20], "just": 12, "do": [12, 13, 19], "llvm_version": 12, "libcxx": 12, "outdat": 12, "aarch64": [12, 13], "cannot": 12, "explicitli": 12, "clangxx": 12, "rt": 12, "lib": [12, 13], "ld_library_path": [12, 13], "config": 12, "var": 12, "nvcc_prepend_flag": 12, "correctli": [12, 13, 14, 21, 22], "xcompil": 12, "ccbin": 12, "clangxx_path": 12, "unsupport": 12, "even": [12, 20], "though": [12, 13], "libstd": 12, "being": [12, 22], "mean": [12, 15, 17], "regardless": 12, "scenario": 12, "first": [12, 21, 23, 24], "binpath": 12, "overrid": 12, "exist": [12, 21, 23], "ln": 12, "sf": 12, "path_to_either_gcc_or_clang": 12, "cc": 12, "These": 12, "later": 12, "configur": [12, 21], "stage": [12, 15], "cmake": 12, "click": 12, "hypothesi": [12, 13], "jinja2": 12, "ncurs": 12, "numpi": [12, 13], "scikit": [12, 13], "offici": 12, "homepag": 12, "authorit": [12, 13, 22], "how": [12, 13, 14, 23], "nightli": [12, 13], "rc": 12, "without": [12, 20], "alwai": 12, "reliabl": 12, "arriv": 12, "hour": 12, "than": [12, 15], "window": 12, "silent": 12, "both": [12, 18, 20, 22], "place": [12, 17], "artifact": 12, "select": 12, "dure": [12, 15, 17, 23], "thu": [12, 17], "import": [12, 13, 17, 23, 24], "much": [12, 21], "determinist": 12, "torch": [12, 13, 16, 17, 22, 23], "whl": [12, 13], "cu121": [12, 13], "rocm5": [12, 13], "ensur": [12, 13, 19], "properli": 12, "__version__": 12, "minimum": [12, 21, 22, 23], "cuda_cmake_macro": 12, "txt": [12, 14, 22, 24], "tag": [12, 21, 24], "fbgemm_vers": 12, "v0": 12, "fbgemm_": 12, "addit": [12, 14, 15, 16], "flow": 12, "state": 12, "becom": 12, "stale": 12, "problem": 12, "re": [12, 13], "attempt": 12, "failur": [12, 13], "clear": [12, 19], "py": [12, 13, 14, 22, 23], "clean": [12, 22], "must": [12, 13, 14, 15, 17, 20, 24], "package_nam": 12, "fbgemm_gpu_": 12, "convent": 12, "major": 12, "minor": 12, "py312": 12, "python_tag": 12, "determin": [12, 15], "processor": 12, "arch": 12, "python_plat_nam": 12, "manylinux2014_": 12, "maco": 12, "macosx_10_9_": 12, "arm64": 12, "macosx_11_0_": 12, "win_": 12, "cpu_onli": 12, "flag": [12, 22], "bdist_wheel": 12, "package_vari": 12, "plat": 12, "instead": [12, 22], "cxxprefix": 12, "presum": 12, "made": [12, 22], "presenc": 12, "similar": [12, 15, 17], "enabl": [12, 14], "been": [12, 21], "unabl": 12, "cudacxx": 12, "cuda_bin_path": 12, "cub": 12, "applic": [12, 17, 21, 23], "cub_dir": 12, "header": [12, 21, 24], "cudnn_include_dir": 12, "cudnn_librari": 12, "nvml_lib_path": 12, "sm70": [12, 13], "80": 12, "v100": [12, 13], "a100": [12, 13], "cuda_arch_list": 12, "unset": 12, "torch_cuda_arch_list": 12, "preced": 12, "dtorch_cuda_arch_list": 12, "rocm_path": 12, "pytorch_rocm_arch": 12, "gfx906": 12, "gfx908": 12, "gfx90a": 12, "wiki": 12, "gentoo": 12, "list": [12, 15, 16, 17, 20, 21, 23], "rocminfo": 12, "gfx": 12, "dhip_root_dir": 12, "dtorch_use_hip_dsa": 12, "complet": [12, 19, 22], "correct": 12, "lot": 12, "jinja": 12, "instanti": 12, "sure": [12, 19, 21, 23], "accident": 12, "cours": 12, "fbgemm_gpu_lib_path": 12, "fbgemm_gpu_pi": [12, 13], "defin": [12, 15, 21], "nm": 12, "gdcu": 12, "referenc": 12, "certain": 12, "gdc": 12, "merge_pooled_embed": [12, 13], "isol": [13, 22], "build": [13, 14, 21, 23, 25], "sm80": 13, "respect": 13, "other": [13, 15, 20, 21, 22, 23], "scratch": 13, "guarante": 13, "especi": 13, "displai": [13, 24], "smi": 13, "515": 13, "76": 13, "persist": 13, "bu": [13, 24], "id": 13, "disp": 13, "volatil": 13, "uncorr": 13, "ecc": 13, "fan": 13, "temp": 13, "perf": 13, "pwr": 13, "usag": [13, 22, 23], "cap": 13, "memori": [13, 17, 25], "util": [13, 25], "mig": 13, "a10g": 13, "00000000": 13, "00": 13, "1e": 13, "31c": 13, "p0": 13, "59w": 13, "300w": 13, "0mib": 13, "23028mib": 13, "gi": 13, "ci": 13, "pid": 13, "No": 13, "expos": 13, "onc": [13, 19], "imag": 13, "launch": 13, "alreadi": [13, 19, 21, 23], "toolkit": 13, "interfac": 13, "concis": 13, "info": [13, 21, 23], "dieedg": 13, "avgpwr": 13, "sclk": 13, "mclk": 13, "pwrcap": 13, "vram": 13, "33": 13, "0c": 13, "37": 13, "0w": 13, "300mhz": 13, "1200mhz": 13, "auto": [13, 22], "290": 13, "32": 13, "39": 13, "log": 13, "difficult": 13, "relev": [13, 21], "link": [13, 22], "encount": 13, "signatur": [13, 22], "traceback": 13, "last": 13, "root": [13, 19], "miniconda": 13, "mycondaenv": 13, "site": 13, "_op": [13, 22], "line": [13, 23, 24], "565": 13, "__getattr__": 13, "overload_nam": 13, "_c": 13, "_jit_get_oper": 13, "qualified_op_nam": 13, "runtimeerror": 13, "except": [13, 21, 23], "wa": 13, "string": [13, 24], "post47": 13, "py3": 13, "egg": 13, "__init__": [13, 23], "21": 13, "_fbgemm_gpu_doc": 13, "noqa": 13, "f401": 13, "e402": 13, "18": 13, "569": 13, "rais": [13, 23], "attributeerror": [13, 23], "_opnamespac": 13, "object": [13, 15], "attribut": [13, 23], "cli": 13, "main_run": 13, "execut": [13, 14], "47": 13, "_zn6fbgemm48floatorhalftofusednbitrowwisequantizedsbhalfavx2itli2eeevpkt_miph": 13, "appear": 13, "libtorch": 13, "visibl": 13, "incorrectli": [13, 22], "declar": [13, 21], "were": [13, 16], "pr": [13, 21, 22, 23], "1618": 13, "former": 13, "resolv": 13, "manual": [13, 21], "latter": 13, "seriou": 13, "tha": 13, "develop": [13, 22], "bench": 14, "good": [14, 20], "instal": [14, 22, 25], "pip": [14, 22], "pytest": 14, "rsx": 14, "ignor": [14, 17, 22], "pytestcollectionwarn": 14, "split_table_batched_embeddings_test": 14, "quantize_ops_test": 14, "sparse_ops_test": 14, "split_embedding_inference_converter_test": 14, "mode": [14, 17], "cuda_visible_devic": 14, "debug": 14, "cuda_launch_block": 14, "fbgemm_test_with_rocm": 14, "hip_launch_block": 14, "split_table_batched_embeddings_benchmark": 14, "purpos": [15, 16, 17, 20], "handl": 15, "consecut": 15, "nestedtensor": 15, "raggedtensor": 15, "tensorflow": 15, "notabl": 15, "sentenc": 15, "repres": 15, "maxlength": 15, "2d": [15, 16, 17, 23], "numel": 15, "greatest": 15, "divisor": 15, "smallest": 15, "sub": 15, "exclud": 15, "partit": 15, "impli": [15, 20], "denot": [15, 21, 23], "offest": 15, "outer": 15, "would": 15, "begin": 15, "maximum": [15, 16, 23], "between": [15, 21, 22, 24], "normal": 15, "densor": 15, "form": [15, 20], "figur": 15, "below": 15, "show": [15, 22], "accomod": 15, "logic": [15, 21], "At": [15, 21, 22, 23], "multipl": [15, 16, 17, 23, 25], "hadamard": 15, "product": [15, 20], "involv": 15, "bmatrix": 15, "rightarrow": 15, "16": 15, "25": 15, "36": 15, "49": 15, "81": 15, "50": 15, "operand": 15, "word": 15, "ax": 15, "properti": 15, "hold": 15, "elementwis": [15, 16], "equival": 15, "start": [15, 16, 23, 24], "dim": 15, "onto": 15, "part": 15, "everi": 15, "those": [15, 16, 19, 23], "converson": 15, "could": 15, "lead": 15, "read": [15, 17], "relat": 15, "smaller": 15, "expect": 15, "happen": 15, "give": 15, "situat": 15, "like": 15, "dense_tensor": 15, "jagged_tensor": 15, "break": 15, "exact": 15, "usual": 15, "1d": [16, 17, 23], "area": 16, "outsid": 16, "coverag": 16, "total": [16, 17], "identit": 16, "add": [16, 19, 21, 22, 23], "structur": 16, "jagged_dense_dense_elementwise_add_jagged_output": 16, "y_0": 16, "y_1": 16, "multipli": [16, 17], "max_n": 16, "matmul": 16, "stacked_jagged_1d_to_dens": 16, "arg": [16, 23], "kwarg": 16, "stacked_jagged_2d_to_dens": 16, "split_table_batched_embeddings_op": 17, "splittablebatchedembeddingbagscodegen": 17, "embedding_spec": 17, "feature_table_map": 17, "none": 17, "cache_algorithm": 17, "cachealgorithm": 17, "cache_load_factor": 17, "cache_reserved_memori": 17, "cache_precis": 17, "weights_precis": 17, "enforce_hbm": 17, "optimtyp": 17, "exact_sgd": 17, "record_cache_metr": 17, "gradient_clip": 17, "max_gradi": 17, "learning_r": 17, "01": 17, "ep": 17, "0e": 17, "momentum": 17, "weight_decai": 17, "weight_decay_mod": 17, "weightdecaymod": 17, "eta": 17, "001": 17, "beta1": 17, "beta2": 17, "999": 17, "poolingmod": 17, "boundscheckmod": 17, "sourc": [17, 19, 20, 21, 22, 23], "backward": 17, "embeddingloc": 17, "computedevic": 17, "spec": 17, "placement": 17, "lxu": 17, "algorithm": 17, "capac": 17, "reserv": [17, 20], "hbm": 17, "adam": 17, "exact_adagrad": 17, "exact_rowwise_adagrad": 17, "lamb": 17, "lars_sgd": 17, "partial_rowwise_adam": 17, "partial_rowwise_lamb": 17, "sgd": 17, "recordcachemetr": 17, "record": 17, "hit": 17, "request": [17, 18, 22], "record_cache_miss_count": 17, "metric": 17, "record_tablewise_cache_miss": 17, "stochast": 17, "round": 17, "gradient": 17, "clip": 17, "learn": 17, "rate": 17, "epsilon": 17, "adagrad": 17, "lar": 17, "decai": 17, "l2": 17, "decoupl": 17, "pool": [17, 25], "boundari": 17, "fatal": 17, "conatin": 17, "column": 17, "feature_requires_grad": 17, "split_table_batched_embeddings_ops_common": 17, "split_table_batched_embeddings_ops_train": 17, "init_embedding_weights_uniform": 17, "split_embedding_weight": 17, "9426": 17, "7046": 17, "4214": 17, "0419": 17, "1331": 17, "7856": 17, "8124": 17, "2021": 17, "5771": 17, "5911": 17, "7792": 17, "1068": 17, "6203": 17, "4813": 17, "1677": 17, "4790": 17, "5587": 17, "0941": 17, "5754": 17, "3475": 17, "8952": 17, "1964": 17, "0810": 17, "4174": 17, "2513": 17, "4039": 17, "3775": 17, "3273": 17, "5399": 17, "0229": 17, "1455": 17, "8770": 17, "9520": 17, "4593": 17, "7169": 17, "6307": 17, "1765": 17, "8757": 17, "8614": 17, "2051": 17, "0603": 17, "9980": 17, "7958": 17, "5826": 17, "long": 17, "13": 17, "5197": 17, "2957": 17, "3578": 17, "1487": 17, "4873": 17, "3044": 17, "9801": 17, "2769": 17, "7164": 17, "8528": 17, "7159": 17, "6719": 17, "0784": 17, "2016": 17, "2176": 17, "1988": 17, "3825": 17, "5008": 17, "8991": 17, "1405": 17, "2637": 17, "9427": 17, "8902": 17, "3754": 17, "5013": 17, "6105": 17, "9968": 17, "3057": 17, "7621": 17, "9821": 17, "7314": 17, "6195": 17, "grad_fn": 17, "cppnode": 17, "splitlookupfunction_sgd_op": 17, "question": 18, "concern": 18, "discuss": 18, "kick": 18, "regard": 18, "feel": 18, "free": 18, "reach": 18, "easi": 19, "transpar": 19, "describ": 19, "activ": 19, "welcom": [19, 25], "your": [19, 22, 23], "repositori": 19, "branch": 19, "ve": 19, "chang": [19, 21, 23], "api": [19, 21, 22, 23], "suit": 19, "lint": 19, "haven": 19, "submit": [19, 21, 23], "facebook": [19, 20, 25], "open": 19, "track": 19, "public": [19, 22], "bug": 19, "descript": [19, 21, 22, 23, 24], "instruct": [19, 21, 22, 23, 25], "abl": 19, "bounti": 19, "safe": 19, "disclosur": 19, "secur": 19, "go": 19, "outlin": 19, "By": 19, "agre": 19, "tree": 19, "claus": 20, "bsd": 20, "softwar": 20, "copyright": 20, "inc": 20, "affili": 20, "right": [20, 24], "redistribut": 20, "modif": 20, "permit": 20, "condit": 20, "met": 20, "retain": 20, "notic": 20, "disclaim": 20, "materi": 20, "contributor": 20, "endors": 20, "promot": 20, "written": 20, "permiss": 20, "BY": 20, "THE": 20, "holder": 20, "AND": 20, "AS": 20, "express": [20, 24], "OR": 20, "warranti": 20, "NOT": 20, "limit": [20, 22], "TO": 20, "OF": 20, "merchant": 20, "FOR": 20, "particular": 20, "IN": 20, "NO": 20, "event": 20, "shall": 20, "BE": 20, "liabl": 20, "indirect": 20, "incident": 20, "special": 20, "exemplari": 20, "consequenti": 20, "damag": 20, "procur": 20, "substitut": 20, "servic": 20, "profit": 20, "busi": 20, "interrupt": 20, "theori": 20, "liabil": 20, "contract": 20, "strict": 20, "tort": 20, "neglig": 20, "aris": 20, "IF": 20, "advis": 20, "SUCH": 20, "javadoc": 21, "style": [21, 23], "comment": [21, 22, 24], "sphinx": [21, 22, 23], "breath": 21, "kept": 21, "cpp": [21, 23, 24], "cu": 21, "cuh": 21, "everyth": 21, "ifndef": 21, "doxygen_this_will_be_skip": 21, "endif": 21, "hidden": 21, "html": [21, 22, 23], "descriptionss": 21, "publish": [21, 23], "docstr": [21, 22, 23], "method": [21, 22, 23], "organ": 21, "yet": 21, "top": [21, 25], "defgroup": 21, "directli": [21, 23], "behavior": [21, 23], "tparam": 21, "param": [21, 23], "thrown": [21, 23], "ingroup": 21, "brief": 21, "short": 21, "example_method": [21, 23], "def": [21, 23], "foo": [21, 23], "lst": [21, 23], "And": [21, 23], "verbatim": [21, 23], "text": [21, 23, 24], "diagram": [21, 23], "unpars": 21, "second": [21, 23], "prev": [21, 23], "usabl": [21, 23], "space": [21, 22, 23], "endcod": 21, "align": [21, 23], "param1": [21, 23], "param2": 21, "bad_alloc": 21, "logic_error": 21, "href": 21, "www": [21, 23], "nl": 21, "cmdlink": 21, "On": [21, 23], "doxygengroup": 21, "rst": [21, 23, 24], "content": [21, 24, 25], "toctre": [21, 23], "ini": 21, "taken": 21, "care": 21, "doc": [21, 22, 23, 24], "local": [21, 23], "netlifi": [21, 22, 23], "preview": [21, 23], "serv": 22, "accompani": 22, "put": 22, "yourself": 22, "shoe": 22, "who": 22, "understand": 22, "live": 22, "easier": 22, "leav": 22, "separ": 22, "task": 22, "pointer": 22, "tool": 22, "graphviz": [22, 24], "assembl": 22, "view": 22, "prepend": 22, "sphinx_lint": 22, "technic": 22, "why": 22, "invok": 22, "occasion": 22, "unresolv": 22, "might": 22, "opt": 22, "pycapsul": 22, "class": [22, 23], "neg": 22, "silenc": 22, "nitpick": 22, "conf": 22, "domain": 22, "deploi": 22, "app": 22, "googl": 23, "c_size_t": 23, "about": 23, "ret": 23, "emplace_back": 23, "item": 23, "valueerror": 23, "14": 23, "restructuredtext": 23, "en": 23, "master": 23, "__": 23, "pep": 23, "0287": 23, "42": 23, "autofunct": 23, "c_ulong": 23, "mani": 23, "attach": 23, "fact": 23, "helper": 23, "codebas": 23, "add_doc": 23, "jag": [23, 25], "forc": 23, "hoc": 23, "the_new_doc_modul": 23, "remain": 23, "render": [23, 24], "anchor": 24, "_doc": 24, "underscor": 24, "_": 24, "There": 24, "elsewher": 24, "ref": 24, "anoth": 24, "literalinclud": 24, "rel": 24, "enclos": 24, "bracket": 24, "skiplin": 24, "suppli": 24, "math": 24, "inlin": 24, "k_": 24, "k_n": 24, "expressino": 24, "int_a": 24, "frac": 24, "2v": 24, "dx": 24, "left": 24, "dv": 24, "_a": 24, "du": 24, "digraph": 24, "altern": 24, "extern": 24, "dot": 24, "examplegraph": 24, "low": 25, "precis": 25, "high": 25, "convolut": 25, "server": 25, "infer": 25, "backend": 25, "caffe2": 25, "collect": 25, "transform": 25, "contribut": 25, "contact": 25, "licens": 25, "experiment": 25, "tbe": 25}, "objects": {"": [[9, 0, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref"], [9, 1, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref::ebits"], [9, 1, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref::exponent_bias"], [9, 1, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref::input"], [9, 1, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref::ncols"], [9, 1, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref::nrows"], [9, 1, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref::output"], [9, 0, 1, "_CPPv423FP8rowwise_to_float_cpuRK6TensorbK7int64_t", "FP8rowwise_to_float_cpu"], [9, 1, 1, "_CPPv423FP8rowwise_to_float_cpuRK6TensorbK7int64_t", "FP8rowwise_to_float_cpu::forward"], [9, 1, 1, "_CPPv423FP8rowwise_to_float_cpuRK6TensorbK7int64_t", "FP8rowwise_to_float_cpu::input"], [9, 1, 1, "_CPPv423FP8rowwise_to_float_cpuRK6TensorbK7int64_t", "FP8rowwise_to_float_cpu::output_dtype"], [0, 0, 1, "_CPPv410FindMinMaxPKfPfPf7int64_t", "FindMinMax"], [0, 1, 1, "_CPPv410FindMinMaxPKfPfPf7int64_t", "FindMinMax::len"], [0, 1, 1, "_CPPv410FindMinMaxPKfPfPf7int64_t", "FindMinMax::m"], [0, 1, 1, "_CPPv410FindMinMaxPKfPfPf7int64_t", "FindMinMax::max"], [0, 1, 1, "_CPPv410FindMinMaxPKfPfPf7int64_t", "FindMinMax::min"], [0, 0, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf"], [0, 2, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf::InputType"], [0, 1, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf::bit_rate"], [0, 1, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf::input"], [0, 1, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf::input_columns"], [0, 1, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf::input_rows"], [0, 1, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf::output"], [9, 0, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref"], [9, 1, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::ebits"], [9, 1, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::exponent_bias"], [9, 1, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::input"], [9, 1, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::max_pos"], [9, 1, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::ncols"], [9, 1, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::nrows"], [9, 1, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::output"], [0, 0, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize"], [0, 2, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::T"], [0, 1, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::dst"], [0, 1, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::len"], [0, 1, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::noise_ratio"], [0, 1, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::num_threads"], [0, 1, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::qparams"], [0, 1, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::src"], [0, 1, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::thread_id"], [0, 0, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise"], [0, 1, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::C"], [0, 1, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::G"], [0, 1, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::K"], [0, 2, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::LAYOUT"], [0, 2, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::T"], [0, 1, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::X"], [0, 1, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::dst"], [0, 1, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::scales"], [0, 1, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::src"], [0, 1, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::zero_points"], [0, 0, 1, "_CPPv46Xor128v", "Xor128"], [9, 0, 1, "_CPPv424_FP8rowwise_to_float_gpuRKN2at6TensorEbK7int64_t", "_FP8rowwise_to_float_gpu"], [9, 1, 1, "_CPPv424_FP8rowwise_to_float_gpuRKN2at6TensorEbK7int64_t", "_FP8rowwise_to_float_gpu::forward"], [9, 1, 1, "_CPPv424_FP8rowwise_to_float_gpuRKN2at6TensorEbK7int64_t", "_FP8rowwise_to_float_gpu::input"], [9, 1, 1, "_CPPv424_FP8rowwise_to_float_gpuRKN2at6TensorEbK7int64_t", "_FP8rowwise_to_float_gpu::output_dtype"], [9, 0, 1, "_CPPv422_bfloat16_to_float_gpuRKN2at6TensorE", "_bfloat16_to_float_gpu"], [9, 1, 1, "_CPPv422_bfloat16_to_float_gpuRKN2at6TensorE", "_bfloat16_to_float_gpu::input"], [9, 0, 1, "_CPPv424_float_to_FP8rowwise_gpuRK6TensorKb", "_float_to_FP8rowwise_gpu"], [9, 1, 1, "_CPPv424_float_to_FP8rowwise_gpuRK6TensorKb", "_float_to_FP8rowwise_gpu::forward"], [9, 1, 1, "_CPPv424_float_to_FP8rowwise_gpuRK6TensorKb", "_float_to_FP8rowwise_gpu::input"], [9, 0, 1, "_CPPv422_float_to_bfloat16_gpuRKN2at6TensorE", "_float_to_bfloat16_gpu"], [9, 1, 1, "_CPPv422_float_to_bfloat16_gpuRKN2at6TensorE", "_float_to_bfloat16_gpu::input"], [9, 0, 1, "_CPPv434_float_to_fused8bitrowwise_cpu_outR6TensorRK6Tensor", "_float_to_fused8bitrowwise_cpu_out"], [9, 1, 1, "_CPPv434_float_to_fused8bitrowwise_cpu_outR6TensorRK6Tensor", "_float_to_fused8bitrowwise_cpu_out::input"], [9, 1, 1, "_CPPv434_float_to_fused8bitrowwise_cpu_outR6TensorRK6Tensor", "_float_to_fused8bitrowwise_cpu_out::output"], [9, 0, 1, "_CPPv430_float_to_fused8bitrowwise_gpuRK6Tensor", "_float_to_fused8bitrowwise_gpu"], [9, 1, 1, "_CPPv430_float_to_fused8bitrowwise_gpuRK6Tensor", "_float_to_fused8bitrowwise_gpu::input"], [9, 0, 1, "_CPPv430_float_to_fusednbitrowwise_gpuRK6TensorK7int64_t", "_float_to_fusednbitrowwise_gpu"], [9, 1, 1, "_CPPv430_float_to_fusednbitrowwise_gpuRK6TensorK7int64_t", "_float_to_fusednbitrowwise_gpu::bit_rate"], [9, 1, 1, "_CPPv430_float_to_fusednbitrowwise_gpuRK6TensorK7int64_t", "_float_to_fusednbitrowwise_gpu::input"], [9, 0, 1, "_CPPv418_float_to_hfp8_gpuRKN2at6TensorEK7int64_tK7int64_tKd", "_float_to_hfp8_gpu"], [9, 1, 1, "_CPPv418_float_to_hfp8_gpuRKN2at6TensorEK7int64_tK7int64_tKd", "_float_to_hfp8_gpu::ebits"], [9, 1, 1, "_CPPv418_float_to_hfp8_gpuRKN2at6TensorEK7int64_tK7int64_tKd", "_float_to_hfp8_gpu::exponent_bias"], [9, 1, 1, "_CPPv418_float_to_hfp8_gpuRKN2at6TensorEK7int64_tK7int64_tKd", "_float_to_hfp8_gpu::input"], [9, 1, 1, "_CPPv418_float_to_hfp8_gpuRKN2at6TensorEK7int64_tK7int64_tKd", "_float_to_hfp8_gpu::max_pos"], [9, 0, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu"], [9, 1, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::bias"], [9, 1, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::bounding_box_size"], [9, 1, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::ebits"], [9, 1, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::input"], [9, 1, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::max_pos"], [9, 1, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::mbits"], [9, 1, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::min_pos"], [9, 0, 1, "_CPPv430_float_to_paddedFP8rowwise_gpuRK6TensorKbK7int64_t", "_float_to_paddedFP8rowwise_gpu"], [9, 1, 1, "_CPPv430_float_to_paddedFP8rowwise_gpuRK6TensorKbK7int64_t", "_float_to_paddedFP8rowwise_gpu::forward"], [9, 1, 1, "_CPPv430_float_to_paddedFP8rowwise_gpuRK6TensorKbK7int64_t", "_float_to_paddedFP8rowwise_gpu::input"], [9, 1, 1, "_CPPv430_float_to_paddedFP8rowwise_gpuRK6TensorKbK7int64_t", "_float_to_paddedFP8rowwise_gpu::row_dim"], [9, 0, 1, "_CPPv434_fused8bitrowwise_to_float_cpu_outR6TensorRK6Tensor", "_fused8bitrowwise_to_float_cpu_out"], [9, 1, 1, "_CPPv434_fused8bitrowwise_to_float_cpu_outR6TensorRK6Tensor", "_fused8bitrowwise_to_float_cpu_out::input"], [9, 1, 1, "_CPPv434_fused8bitrowwise_to_float_cpu_outR6TensorRK6Tensor", "_fused8bitrowwise_to_float_cpu_out::output"], [9, 0, 1, "_CPPv430_fused8bitrowwise_to_float_gpuRKN2at6TensorE", "_fused8bitrowwise_to_float_gpu"], [9, 1, 1, "_CPPv430_fused8bitrowwise_to_float_gpuRKN2at6TensorE", "_fused8bitrowwise_to_float_gpu::input"], [9, 0, 1, "_CPPv440_fused8bitrowwise_to_float_mixed_dim_gpuRKN2at6TensorERKN2at6TensorEK7int64_t", "_fused8bitrowwise_to_float_mixed_dim_gpu"], [9, 1, 1, "_CPPv440_fused8bitrowwise_to_float_mixed_dim_gpuRKN2at6TensorERKN2at6TensorEK7int64_t", "_fused8bitrowwise_to_float_mixed_dim_gpu::D_offsets"], [9, 1, 1, "_CPPv440_fused8bitrowwise_to_float_mixed_dim_gpuRKN2at6TensorERKN2at6TensorEK7int64_t", "_fused8bitrowwise_to_float_mixed_dim_gpu::input"], [9, 1, 1, "_CPPv440_fused8bitrowwise_to_float_mixed_dim_gpuRKN2at6TensorERKN2at6TensorEK7int64_t", "_fused8bitrowwise_to_float_mixed_dim_gpu::output_dtype"], [9, 0, 1, "_CPPv429_fused8bitrowwise_to_half_gpuRKN2at6TensorE", "_fused8bitrowwise_to_half_gpu"], [9, 1, 1, "_CPPv429_fused8bitrowwise_to_half_gpuRKN2at6TensorE", "_fused8bitrowwise_to_half_gpu::input"], [9, 0, 1, "_CPPv449_fused8bitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tKbKb", "_fused8bitrowwise_to_single_or_half_precision_gpu"], [9, 1, 1, "_CPPv449_fused8bitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tKbKb", "_fused8bitrowwise_to_single_or_half_precision_gpu::input"], [9, 1, 1, "_CPPv449_fused8bitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tKbKb", "_fused8bitrowwise_to_single_or_half_precision_gpu::output_dtype"], [9, 1, 1, "_CPPv449_fused8bitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tKbKb", "_fused8bitrowwise_to_single_or_half_precision_gpu::quant_padding_float_type"], [9, 1, 1, "_CPPv449_fused8bitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tKbKb", "_fused8bitrowwise_to_single_or_half_precision_gpu::scale_bias_last"], [9, 0, 1, "_CPPv430_fusednbitrowwise_to_float_gpuRKN2at6TensorEK7int64_t", "_fusednbitrowwise_to_float_gpu"], [9, 1, 1, "_CPPv430_fusednbitrowwise_to_float_gpuRKN2at6TensorEK7int64_t", "_fusednbitrowwise_to_float_gpu::bit_rate"], [9, 1, 1, "_CPPv430_fusednbitrowwise_to_float_gpuRKN2at6TensorEK7int64_t", "_fusednbitrowwise_to_float_gpu::input"], [9, 0, 1, "_CPPv429_fusednbitrowwise_to_half_gpuRKN2at6TensorEK7int64_t", "_fusednbitrowwise_to_half_gpu"], [9, 1, 1, "_CPPv429_fusednbitrowwise_to_half_gpuRKN2at6TensorEK7int64_t", "_fusednbitrowwise_to_half_gpu::bit_rate"], [9, 1, 1, "_CPPv429_fusednbitrowwise_to_half_gpuRKN2at6TensorEK7int64_t", "_fusednbitrowwise_to_half_gpu::input"], [9, 0, 1, "_CPPv449_fusednbitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tK7int64_t", "_fusednbitrowwise_to_single_or_half_precision_gpu"], [9, 1, 1, "_CPPv449_fusednbitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tK7int64_t", "_fusednbitrowwise_to_single_or_half_precision_gpu::bit_rate"], [9, 1, 1, "_CPPv449_fusednbitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tK7int64_t", "_fusednbitrowwise_to_single_or_half_precision_gpu::input"], [9, 1, 1, "_CPPv449_fusednbitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tK7int64_t", "_fusednbitrowwise_to_single_or_half_precision_gpu::output_dtype"], [9, 0, 1, "_CPPv429_half_to_fused8bitrowwise_gpuRK6Tensor", "_half_to_fused8bitrowwise_gpu"], [9, 1, 1, "_CPPv429_half_to_fused8bitrowwise_gpuRK6Tensor", "_half_to_fused8bitrowwise_gpu::input"], [9, 0, 1, "_CPPv429_half_to_fusednbitrowwise_gpuRKN2at6TensorEK7int64_t", "_half_to_fusednbitrowwise_gpu"], [9, 1, 1, "_CPPv429_half_to_fusednbitrowwise_gpuRKN2at6TensorEK7int64_t", "_half_to_fusednbitrowwise_gpu::bit_rate"], [9, 1, 1, "_CPPv429_half_to_fusednbitrowwise_gpuRKN2at6TensorEK7int64_t", "_half_to_fusednbitrowwise_gpu::input"], [9, 0, 1, "_CPPv418_hfp8_to_float_gpuRKN2at6TensorEK7int64_tK7int64_t", "_hfp8_to_float_gpu"], [9, 1, 1, "_CPPv418_hfp8_to_float_gpuRKN2at6TensorEK7int64_tK7int64_t", "_hfp8_to_float_gpu::ebits"], [9, 1, 1, "_CPPv418_hfp8_to_float_gpuRKN2at6TensorEK7int64_tK7int64_t", "_hfp8_to_float_gpu::exponent_bias"], [9, 1, 1, "_CPPv418_hfp8_to_float_gpuRKN2at6TensorEK7int64_tK7int64_t", "_hfp8_to_float_gpu::input"], [9, 0, 1, "_CPPv418_msfp_to_float_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_t", "_msfp_to_float_gpu"], [9, 1, 1, "_CPPv418_msfp_to_float_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_t", "_msfp_to_float_gpu::bias"], [9, 1, 1, "_CPPv418_msfp_to_float_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_t", "_msfp_to_float_gpu::ebits"], [9, 1, 1, "_CPPv418_msfp_to_float_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_t", "_msfp_to_float_gpu::input"], [9, 1, 1, "_CPPv418_msfp_to_float_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_t", "_msfp_to_float_gpu::mbits"], [9, 0, 1, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t", "_paddedFP8rowwise_to_float_gpu"], [9, 1, 1, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t", "_paddedFP8rowwise_to_float_gpu::forward"], [9, 1, 1, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t", "_paddedFP8rowwise_to_float_gpu::input"], [9, 1, 1, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t", "_paddedFP8rowwise_to_float_gpu::output_dtype"], [9, 1, 1, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t", "_paddedFP8rowwise_to_float_gpu::output_last_dim"], [9, 1, 1, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t", "_paddedFP8rowwise_to_float_gpu::row_dim"], [9, 0, 1, "_CPPv449_single_or_half_precision_to_fused8bitrowwise_gpuRK6Tensor", "_single_or_half_precision_to_fused8bitrowwise_gpu"], [9, 1, 1, "_CPPv449_single_or_half_precision_to_fused8bitrowwise_gpuRK6Tensor", "_single_or_half_precision_to_fused8bitrowwise_gpu::input"], [9, 0, 1, "_CPPv449_single_or_half_precision_to_fusednbitrowwise_gpuRK6TensorK7int64_t", "_single_or_half_precision_to_fusednbitrowwise_gpu"], [9, 1, 1, "_CPPv449_single_or_half_precision_to_fusednbitrowwise_gpuRK6TensorK7int64_t", "_single_or_half_precision_to_fusednbitrowwise_gpu::bit_rate"], [9, 1, 1, "_CPPv449_single_or_half_precision_to_fusednbitrowwise_gpuRK6TensorK7int64_t", "_single_or_half_precision_to_fusednbitrowwise_gpu::input"], [8, 0, 1, "_CPPv417all_to_one_deviceNSt6vectorIN2at6TensorEEEN2at6DeviceE", "all_to_one_device"], [8, 1, 1, "_CPPv417all_to_one_deviceNSt6vectorIN2at6TensorEEEN2at6DeviceE", "all_to_one_device::inputTensors"], [8, 1, 1, "_CPPv417all_to_one_deviceNSt6vectorIN2at6TensorEEEN2at6DeviceE", "all_to_one_device::target_device"], [5, 0, 1, "_CPPv431batched_dense_vec_jagged_2d_mulRK6TensorRK6TensorRK6Tensor", "batched_dense_vec_jagged_2d_mul"], [5, 1, 1, "_CPPv431batched_dense_vec_jagged_2d_mulRK6TensorRK6TensorRK6Tensor", "batched_dense_vec_jagged_2d_mul::a_offsets"], [5, 1, 1, "_CPPv431batched_dense_vec_jagged_2d_mulRK6TensorRK6TensorRK6Tensor", "batched_dense_vec_jagged_2d_mul::a_values"], [5, 1, 1, "_CPPv431batched_dense_vec_jagged_2d_mulRK6TensorRK6TensorRK6Tensor", "batched_dense_vec_jagged_2d_mul::v"], [2, 0, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKN3c108optionalI6TensorEERKN3c108optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda"], [2, 1, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKN3c108optionalI6TensorEERKN3c108optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::B_ofsets"], [2, 1, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKN3c108optionalI6TensorEERKN3c108optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::bounds_check_mode"], [2, 1, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKN3c108optionalI6TensorEERKN3c108optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::indices"], [2, 1, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKN3c108optionalI6TensorEERKN3c108optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::max_B"], [2, 1, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKN3c108optionalI6TensorEERKN3c108optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::offsets"], [2, 1, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKN3c108optionalI6TensorEERKN3c108optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::rows_per_table"], [2, 1, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKN3c108optionalI6TensorEERKN3c108optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::warning"], [2, 1, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKN3c108optionalI6TensorEERKN3c108optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::weights"], [5, 0, 1, "_CPPv415dense_to_jaggedRK6TensorRKNSt6vectorI6TensorEEN3c108optionalIN2at6SymIntEEE", "dense_to_jagged"], [5, 1, 1, "_CPPv415dense_to_jaggedRK6TensorRKNSt6vectorI6TensorEEN3c108optionalIN2at6SymIntEEE", "dense_to_jagged::dense"], [5, 1, 1, "_CPPv415dense_to_jaggedRK6TensorRKNSt6vectorI6TensorEEN3c108optionalIN2at6SymIntEEE", "dense_to_jagged::offsets"], [5, 1, 1, "_CPPv415dense_to_jaggedRK6TensorRKNSt6vectorI6TensorEEN3c108optionalIN2at6SymIntEEE", "dense_to_jagged::total_L"], [11, 0, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda"], [11, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::D_offsets"], [11, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::cache_index_table_map"], [11, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::gather_cache_stats"], [11, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::hash_size_cumsum"], [11, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::linear_cache_indices"], [11, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::lru_state"], [11, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::lxu_cache_miss_timestamp"], [11, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::lxu_cache_state"], [11, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::lxu_cache_weights"], [11, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::row_alignment"], [11, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::time_stamp"], [11, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::total_cache_hash_size"], [11, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::uvm_cache_stats"], [11, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::weights"], [11, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::weights_offsets"], [11, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::weights_tys"], [11, 0, 1, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lxu_cache_lookup_cuda"], [11, 1, 1, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lxu_cache_lookup_cuda::gather_cache_stats"], [11, 1, 1, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lxu_cache_lookup_cuda::invalid_index"], [11, 1, 1, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lxu_cache_lookup_cuda::linear_cache_indices"], [11, 1, 1, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lxu_cache_lookup_cuda::lxu_cache_state"], [11, 1, 1, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lxu_cache_lookup_cuda::uvm_cache_stats"], [21, 0, 1, "_CPPv4I0_NSt6size_tEE14example_method7int32_t1Tf", "example_method"], [21, 2, 1, "_CPPv4I0_NSt6size_tEE14example_method7int32_t1Tf", "example_method::Alignment"], [21, 2, 1, "_CPPv4I0_NSt6size_tEE14example_method7int32_t1Tf", "example_method::T"], [21, 1, 1, "_CPPv4I0_NSt6size_tEE14example_method7int32_t1Tf", "example_method::param1"], [21, 1, 1, "_CPPv4I0_NSt6size_tEE14example_method7int32_t1Tf", "example_method::param2"], [10, 0, 1, "_CPPv431expand_into_jagged_permute_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_t", "expand_into_jagged_permute_cuda"], [10, 1, 1, "_CPPv431expand_into_jagged_permute_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_t", "expand_into_jagged_permute_cuda::input_offsets"], [10, 1, 1, "_CPPv431expand_into_jagged_permute_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_t", "expand_into_jagged_permute_cuda::output_offsets"], [10, 1, 1, "_CPPv431expand_into_jagged_permute_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_t", "expand_into_jagged_permute_cuda::output_size"], [10, 1, 1, "_CPPv431expand_into_jagged_permute_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_t", "expand_into_jagged_permute_cuda::permute"], [9, 0, 1, "_CPPv437float_or_half_to_fused8bitrowwise_cpuRK6Tensor", "float_or_half_to_fused8bitrowwise_cpu"], [9, 1, 1, "_CPPv437float_or_half_to_fused8bitrowwise_cpuRK6Tensor", "float_or_half_to_fused8bitrowwise_cpu::input"], [9, 0, 1, "_CPPv423float_to_FP8rowwise_cpuRK6Tensorb", "float_to_FP8rowwise_cpu"], [9, 1, 1, "_CPPv423float_to_FP8rowwise_cpuRK6Tensorb", "float_to_FP8rowwise_cpu::forward"], [9, 1, 1, "_CPPv423float_to_FP8rowwise_cpuRK6Tensorb", "float_to_FP8rowwise_cpu::input"], [9, 0, 1, "_CPPv429float_to_fused8bitrowwise_cpuRK6Tensor", "float_to_fused8bitrowwise_cpu"], [9, 1, 1, "_CPPv429float_to_fused8bitrowwise_cpuRK6Tensor", "float_to_fused8bitrowwise_cpu::input"], [9, 0, 1, "_CPPv429fused8bitrowwise_to_float_cpuRK6Tensor", "fused8bitrowwise_to_float_cpu"], [9, 1, 1, "_CPPv429fused8bitrowwise_to_float_cpuRK6Tensor", "fused8bitrowwise_to_float_cpu::input"], [9, 0, 1, "_CPPv437fused8bitrowwise_to_float_or_half_cpuRK6TensorK7int64_tKbKb", "fused8bitrowwise_to_float_or_half_cpu"], [9, 1, 1, "_CPPv437fused8bitrowwise_to_float_or_half_cpuRK6TensorK7int64_tKbKb", "fused8bitrowwise_to_float_or_half_cpu::input"], [9, 1, 1, "_CPPv437fused8bitrowwise_to_float_or_half_cpuRK6TensorK7int64_tKbKb", "fused8bitrowwise_to_float_or_half_cpu::output_dtype"], [9, 1, 1, "_CPPv437fused8bitrowwise_to_float_or_half_cpuRK6TensorK7int64_tKbKb", "fused8bitrowwise_to_float_or_half_cpu::quant_padding_float_type"], [9, 1, 1, "_CPPv437fused8bitrowwise_to_float_or_half_cpuRK6TensorK7int64_tKbKb", "fused8bitrowwise_to_float_or_half_cpu::scale_bias_last"], [9, 0, 1, "_CPPv428fused8bitrowwise_to_half_cpuRK6Tensor", "fused8bitrowwise_to_half_cpu"], [9, 1, 1, "_CPPv428fused8bitrowwise_to_half_cpuRK6Tensor", "fused8bitrowwise_to_half_cpu::input"], [9, 0, 1, "_CPPv429fusednbitrowwise_to_float_cpuRK6TensorK7int64_t", "fusednbitrowwise_to_float_cpu"], [9, 1, 1, "_CPPv429fusednbitrowwise_to_float_cpuRK6TensorK7int64_t", "fusednbitrowwise_to_float_cpu::bit_rate"], [9, 1, 1, "_CPPv429fusednbitrowwise_to_float_cpuRK6TensorK7int64_t", "fusednbitrowwise_to_float_cpu::input"], [9, 0, 1, "_CPPv437fusednbitrowwise_to_float_or_half_cpuRK6TensorK7int64_tK7int64_t", "fusednbitrowwise_to_float_or_half_cpu"], [9, 1, 1, "_CPPv437fusednbitrowwise_to_float_or_half_cpuRK6TensorK7int64_tK7int64_t", "fusednbitrowwise_to_float_or_half_cpu::bit_rate"], [9, 1, 1, "_CPPv437fusednbitrowwise_to_float_or_half_cpuRK6TensorK7int64_tK7int64_t", "fusednbitrowwise_to_float_or_half_cpu::input"], [9, 1, 1, "_CPPv437fusednbitrowwise_to_float_or_half_cpuRK6TensorK7int64_tK7int64_t", "fusednbitrowwise_to_float_or_half_cpu::output_dtype"], [9, 0, 1, "_CPPv428fusednbitrowwise_to_half_cpuRK6TensorK7int64_t", "fusednbitrowwise_to_half_cpu"], [9, 1, 1, "_CPPv428fusednbitrowwise_to_half_cpuRK6TensorK7int64_t", "fusednbitrowwise_to_half_cpu::bit_rate"], [9, 1, 1, "_CPPv428fusednbitrowwise_to_half_cpuRK6TensorK7int64_t", "fusednbitrowwise_to_half_cpu::input"], [10, 0, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu"], [10, 1, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::bin_boundaries"], [10, 1, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::bin_ctr_in_use_after"], [10, 1, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::bin_ctr_weight_value"], [10, 1, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::bin_num_examples"], [10, 1, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::bin_num_positives"], [10, 1, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::logit"], [10, 1, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::num_segments"], [10, 1, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::positive_weight"], [10, 1, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::segment_lengths"], [10, 1, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::segment_value"], [11, 0, 1, "_CPPv423get_unique_indices_cudaN2at6TensorE7int64_tb", "get_unique_indices_cuda"], [11, 1, 1, "_CPPv423get_unique_indices_cudaN2at6TensorE7int64_tb", "get_unique_indices_cuda::compute_count"], [11, 1, 1, "_CPPv423get_unique_indices_cudaN2at6TensorE7int64_tb", "get_unique_indices_cuda::linear_indices"], [11, 1, 1, "_CPPv423get_unique_indices_cudaN2at6TensorE7int64_tb", "get_unique_indices_cuda::max_indices"], [3, 0, 1, "_CPPv420gqa_attn_splitk_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_t", "gqa_attn_splitk_cuda"], [3, 1, 1, "_CPPv420gqa_attn_splitk_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_t", "gqa_attn_splitk_cuda::XQ"], [3, 1, 1, "_CPPv420gqa_attn_splitk_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_t", "gqa_attn_splitk_cuda::cache_K"], [3, 1, 1, "_CPPv420gqa_attn_splitk_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_t", "gqa_attn_splitk_cuda::cache_V"], [3, 1, 1, "_CPPv420gqa_attn_splitk_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_t", "gqa_attn_splitk_cuda::num_int4_kv_groups"], [3, 1, 1, "_CPPv420gqa_attn_splitk_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_t", "gqa_attn_splitk_cuda::num_split_ks"], [3, 1, 1, "_CPPv420gqa_attn_splitk_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_t", "gqa_attn_splitk_cuda::qk_scale"], [3, 1, 1, "_CPPv420gqa_attn_splitk_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_t", "gqa_attn_splitk_cuda::seq_positions"], [9, 0, 1, "_CPPv428half_to_fused8bitrowwise_cpuRK6Tensor", "half_to_fused8bitrowwise_cpu"], [9, 1, 1, "_CPPv428half_to_fused8bitrowwise_cpuRK6Tensor", "half_to_fused8bitrowwise_cpu::input"], [10, 0, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu"], [10, 1, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::bin_ctr_in_use_after"], [10, 1, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::bin_ctr_weight_value"], [10, 1, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::bin_num_examples"], [10, 1, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::bin_num_positives"], [10, 1, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::logit"], [10, 1, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::lower_bound"], [10, 1, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::positive_weight"], [10, 1, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::upper_bound"], [11, 0, 1, "_CPPv419host_lxu_cache_slot7int64_t7int64_t", "host_lxu_cache_slot"], [11, 1, 1, "_CPPv419host_lxu_cache_slot7int64_t7int64_t", "host_lxu_cache_slot::C"], [11, 1, 1, "_CPPv419host_lxu_cache_slot7int64_t7int64_t", "host_lxu_cache_slot::h_in"], [2, 0, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::D_offsets"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::dev_weights"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::fp8_exponent_bias"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::fp8_exponent_bits"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::indice_weights"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::indices"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::lxu_cache_locations"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::lxu_cache_weights"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::max_float16_D"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::max_float32_D"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::max_float8_D"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::max_int2_D"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::max_int4_D"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::max_int8_D"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::offsets"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::output_dtype"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::pooling_mode"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::row_alignment"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::total_D"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::uvm_weights"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::weights_offsets"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::weights_placements"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::weights_tys"], [2, 0, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::D_offsets"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::dev_weights"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::fp8_exponent_bias"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::fp8_exponent_bits"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::indice_weights"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::indices"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::lxu_cache_locations"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::lxu_cache_weights"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::max_float16_D"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::max_float32_D"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::max_float8_D"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::max_int2_D"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::max_int4_D"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::max_int8_D"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::offsets"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::output_dtype"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::pooling_mode"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::row_alignment"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::total_D"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::uvm_weights"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::weights_offsets"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::weights_placements"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::weights_tys"], [2, 0, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::D_offsets"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::cache_hash_size_cumsum"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::cache_index_table_map"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::dev_weights"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::fp8_exponent_bias"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::fp8_exponent_bits"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::indice_weights"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::indices"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::lxu_cache_locations"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::lxu_cache_state"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::lxu_cache_weights"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::lxu_state"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::max_float16_D"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::max_float32_D"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::max_float8_D"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::max_int2_D"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::max_int4_D"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::max_int8_D"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::offsets"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::output_dtype"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::pooling_mode"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::row_alignment"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::total_D"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::total_cache_hash_size"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::uvm_weights"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::weights_offsets"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::weights_placements"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::weights_tys"], [2, 0, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::D_offsets"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::cache_hash_size_cumsum"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::cache_index_table_map"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::dev_weights"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::fp8_exponent_bias"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::fp8_exponent_bits"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::indice_weights"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::indices"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::lxu_cache_locations"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::lxu_cache_state"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::lxu_cache_weights"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::lxu_state"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::max_float16_D"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::max_float32_D"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::max_float8_D"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::max_int2_D"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::max_int4_D"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::max_int8_D"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::offsets"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::output_dtype"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::pooling_mode"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::row_alignment"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::total_D"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::total_cache_hash_size"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::uvm_weights"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::weights_offsets"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::weights_placements"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::weights_tys"], [7, 0, 1, "_CPPv413is_uvm_tensorRK6Tensor", "is_uvm_tensor"], [7, 1, 1, "_CPPv413is_uvm_tensorRK6Tensor", "is_uvm_tensor::self"], [5, 0, 1, "_CPPv418jagged_1d_to_dense6Tensor6TensorN3c106SymIntE7int64_t", "jagged_1d_to_dense"], [5, 1, 1, "_CPPv418jagged_1d_to_dense6Tensor6TensorN3c106SymIntE7int64_t", "jagged_1d_to_dense::max_L"], [5, 1, 1, "_CPPv418jagged_1d_to_dense6Tensor6TensorN3c106SymIntE7int64_t", "jagged_1d_to_dense::offsets"], [5, 1, 1, "_CPPv418jagged_1d_to_dense6Tensor6TensorN3c106SymIntE7int64_t", "jagged_1d_to_dense::padding_value"], [5, 1, 1, "_CPPv418jagged_1d_to_dense6Tensor6TensorN3c106SymIntE7int64_t", "jagged_1d_to_dense::values"], [5, 0, 1, "_CPPv418jagged_2d_to_dense6Tensor6TensorN3c106SymIntE", "jagged_2d_to_dense"], [5, 1, 1, "_CPPv418jagged_2d_to_dense6Tensor6TensorN3c106SymIntE", "jagged_2d_to_dense::max_sequence_length"], [5, 1, 1, "_CPPv418jagged_2d_to_dense6Tensor6TensorN3c106SymIntE", "jagged_2d_to_dense::offsets"], [5, 1, 1, "_CPPv418jagged_2d_to_dense6Tensor6TensorN3c106SymIntE", "jagged_2d_to_dense::values"], [5, 0, 1, "_CPPv428jagged_dense_elementwise_addRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add"], [5, 1, 1, "_CPPv428jagged_dense_elementwise_addRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add::x_offsets"], [5, 1, 1, "_CPPv428jagged_dense_elementwise_addRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add::x_values"], [5, 1, 1, "_CPPv428jagged_dense_elementwise_addRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add::y"], [5, 0, 1, "_CPPv442jagged_dense_elementwise_add_jagged_outputRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output"], [5, 1, 1, "_CPPv442jagged_dense_elementwise_add_jagged_outputRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output::x_offsets"], [5, 1, 1, "_CPPv442jagged_dense_elementwise_add_jagged_outputRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output::x_values"], [5, 1, 1, "_CPPv442jagged_dense_elementwise_add_jagged_outputRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output::y"], [5, 0, 1, "_CPPv447jagged_dense_elementwise_add_jagged_output_cudaRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output_cuda"], [5, 1, 1, "_CPPv447jagged_dense_elementwise_add_jagged_output_cudaRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output_cuda::x_offsets"], [5, 1, 1, "_CPPv447jagged_dense_elementwise_add_jagged_output_cudaRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output_cuda::x_values"], [5, 1, 1, "_CPPv447jagged_dense_elementwise_add_jagged_output_cudaRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output_cuda::y"], [5, 0, 1, "_CPPv428jagged_dense_elementwise_mulRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_mul"], [5, 1, 1, "_CPPv428jagged_dense_elementwise_mulRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_mul::x_offsets"], [5, 1, 1, "_CPPv428jagged_dense_elementwise_mulRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_mul::x_values"], [5, 1, 1, "_CPPv428jagged_dense_elementwise_mulRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_mul::y"], [5, 0, 1, "_CPPv422jagged_to_padded_denseRK6TensorRKNSt6vectorI6TensorEEKN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense"], [5, 1, 1, "_CPPv422jagged_to_padded_denseRK6TensorRKNSt6vectorI6TensorEEKN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense::max_lengths"], [5, 1, 1, "_CPPv422jagged_to_padded_denseRK6TensorRKNSt6vectorI6TensorEEKN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense::offsets"], [5, 1, 1, "_CPPv422jagged_to_padded_denseRK6TensorRKNSt6vectorI6TensorEEKN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense::padding_value"], [5, 1, 1, "_CPPv422jagged_to_padded_denseRK6TensorRKNSt6vectorI6TensorEEKN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense::values"], [5, 0, 1, "_CPPv430jagged_to_padded_dense_forwardRK6TensorRKNSt6vectorI6TensorEEN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense_forward"], [5, 1, 1, "_CPPv430jagged_to_padded_dense_forwardRK6TensorRKNSt6vectorI6TensorEEN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense_forward::max_lengths"], [5, 1, 1, "_CPPv430jagged_to_padded_dense_forwardRK6TensorRKNSt6vectorI6TensorEEN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense_forward::offsets"], [5, 1, 1, "_CPPv430jagged_to_padded_dense_forwardRK6TensorRKNSt6vectorI6TensorEEN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense_forward::padding_value"], [5, 1, 1, "_CPPv430jagged_to_padded_dense_forwardRK6TensorRKNSt6vectorI6TensorEEN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense_forward::values"], [11, 0, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda"], [11, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::D_offsets"], [11, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::cache_hash_size_cumsum"], [11, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::cache_index_table_map"], [11, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::lfu_state"], [11, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::linear_cache_indices"], [11, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::lxu_cache_state"], [11, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::lxu_cache_weights"], [11, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::row_alignment"], [11, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::total_cache_hash_size"], [11, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::weights"], [11, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::weights_offsets"], [11, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::weights_tys"], [11, 0, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda"], [11, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::D_offsets"], [11, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::cache_hash_size_cumsum"], [11, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::cache_index_table_map"], [11, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::lfu_state"], [11, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::linear_cache_indices"], [11, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::lxu_cache_state"], [11, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::lxu_cache_weights"], [11, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::stochastic_rounding"], [11, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::total_cache_hash_size"], [11, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::weights"], [11, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::weights_offsets"], [11, 0, 1, "_CPPv428linearize_cache_indices_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN3c108optionalIN2at6TensorEEEK7int64_t", "linearize_cache_indices_cuda"], [11, 1, 1, "_CPPv428linearize_cache_indices_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN3c108optionalIN2at6TensorEEEK7int64_t", "linearize_cache_indices_cuda::B_offsets"], [11, 1, 1, "_CPPv428linearize_cache_indices_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN3c108optionalIN2at6TensorEEEK7int64_t", "linearize_cache_indices_cuda::cache_hash_size_cumsum"], [11, 1, 1, "_CPPv428linearize_cache_indices_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN3c108optionalIN2at6TensorEEEK7int64_t", "linearize_cache_indices_cuda::indices"], [11, 1, 1, "_CPPv428linearize_cache_indices_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN3c108optionalIN2at6TensorEEEK7int64_t", "linearize_cache_indices_cuda::max_B"], [11, 1, 1, "_CPPv428linearize_cache_indices_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN3c108optionalIN2at6TensorEEEK7int64_t", "linearize_cache_indices_cuda::offsets"], [11, 0, 1, "_CPPv441linearize_cache_indices_from_row_idx_cudaN2at6TensorEN2at6TensorEN2at6TensorE", "linearize_cache_indices_from_row_idx_cuda"], [11, 1, 1, "_CPPv441linearize_cache_indices_from_row_idx_cudaN2at6TensorEN2at6TensorEN2at6TensorE", "linearize_cache_indices_from_row_idx_cuda::cache_hash_size_cumsum"], [11, 1, 1, "_CPPv441linearize_cache_indices_from_row_idx_cudaN2at6TensorEN2at6TensorEN2at6TensorE", "linearize_cache_indices_from_row_idx_cuda::update_row_indices"], [11, 1, 1, "_CPPv441linearize_cache_indices_from_row_idx_cudaN2at6TensorEN2at6TensorEN2at6TensorE", "linearize_cache_indices_from_row_idx_cuda::update_table_indices"], [11, 0, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE", "lru_cache_find_uncached_cuda"], [11, 1, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE", "lru_cache_find_uncached_cuda::gather_cache_stats"], [11, 1, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE", "lru_cache_find_uncached_cuda::lock_cache_line"], [11, 1, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE", "lru_cache_find_uncached_cuda::lru_state"], [11, 1, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE", "lru_cache_find_uncached_cuda::lxu_cache_locking_counter"], [11, 1, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE", "lru_cache_find_uncached_cuda::lxu_cache_state"], [11, 1, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE", "lru_cache_find_uncached_cuda::max_indices"], [11, 1, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE", "lru_cache_find_uncached_cuda::time_stamp"], [11, 1, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE", "lru_cache_find_uncached_cuda::unique_indices"], [11, 1, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE", "lru_cache_find_uncached_cuda::unique_indices_length"], [11, 1, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE", "lru_cache_find_uncached_cuda::uvm_cache_stats"], [11, 0, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda"], [11, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::D_offsets"], [11, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::cache_index_table_map"], [11, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::gather_cache_stats"], [11, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::hash_size_cumsum"], [11, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::linear_cache_indices"], [11, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::lru_state"], [11, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::lxu_cache_state"], [11, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::lxu_cache_weights"], [11, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::row_alignment"], [11, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::time_stamp"], [11, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::total_cache_hash_size"], [11, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::uvm_cache_stats"], [11, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::weights"], [11, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::weights_offsets"], [11, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::weights_tys"], [11, 0, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda"], [11, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::D_offsets"], [11, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::cache_index_table_map"], [11, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::gather_cache_stats"], [11, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::hash_size_cumsum"], [11, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::linear_cache_indices"], [11, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::lock_cache_line"], [11, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::lru_state"], [11, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::lxu_cache_locking_counter"], [11, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::lxu_cache_state"], [11, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::lxu_cache_weights"], [11, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::stochastic_rounding"], [11, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::time_stamp"], [11, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::total_cache_hash_size"], [11, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::uvm_cache_stats"], [11, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::weights"], [11, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::weights_offsets"], [11, 0, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda"], [11, 1, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::D_offsets"], [11, 1, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::cache_hash_size_cumsum"], [11, 1, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::cache_index_table_map"], [11, 1, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::lxu_cache_state"], [11, 1, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::lxu_cache_weights"], [11, 1, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::stochastic_rounding"], [11, 1, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::total_D"], [11, 1, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::uvm_weights"], [11, 1, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::weights_offsets"], [11, 0, 1, "_CPPv431lxu_cache_locations_update_cudaN2at6TensorEN2at6TensorEN3c108optionalIN2at6TensorEEE", "lxu_cache_locations_update_cuda"], [11, 1, 1, "_CPPv431lxu_cache_locations_update_cudaN2at6TensorEN2at6TensorEN3c108optionalIN2at6TensorEEE", "lxu_cache_locations_update_cuda::lxu_cache_locations"], [11, 1, 1, "_CPPv431lxu_cache_locations_update_cudaN2at6TensorEN2at6TensorEN3c108optionalIN2at6TensorEEE", "lxu_cache_locations_update_cuda::lxu_cache_locations_new"], [11, 1, 1, "_CPPv431lxu_cache_locations_update_cudaN2at6TensorEN2at6TensorEN3c108optionalIN2at6TensorEEE", "lxu_cache_locations_update_cuda::num_uniq_cache_indices"], [11, 0, 1, "_CPPv440lxu_cache_locking_counter_decrement_cudaN2at6TensorEN2at6TensorE", "lxu_cache_locking_counter_decrement_cuda"], [11, 1, 1, "_CPPv440lxu_cache_locking_counter_decrement_cudaN2at6TensorEN2at6TensorE", "lxu_cache_locking_counter_decrement_cuda::lxu_cache_locations"], [11, 1, 1, "_CPPv440lxu_cache_locking_counter_decrement_cudaN2at6TensorEN2at6TensorE", "lxu_cache_locking_counter_decrement_cuda::lxu_cache_locking_counter"], [11, 0, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda"], [11, 1, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::gather_cache_stats"], [11, 1, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::invalid_index"], [11, 1, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::linear_cache_indices"], [11, 1, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::lxu_cache_locations_output"], [11, 1, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::lxu_cache_state"], [11, 1, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::num_uniq_cache_indices"], [11, 1, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::uvm_cache_stats"], [7, 0, 1, "_CPPv422new_host_mapped_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_host_mapped_tensor"], [7, 1, 1, "_CPPv422new_host_mapped_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_host_mapped_tensor::self"], [7, 1, 1, "_CPPv422new_host_mapped_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_host_mapped_tensor::sizes"], [7, 0, 1, "_CPPv418new_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_managed_tensor"], [7, 1, 1, "_CPPv418new_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_managed_tensor::self"], [7, 1, 1, "_CPPv418new_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_managed_tensor::sizes"], [7, 0, 1, "_CPPv423new_managed_tensor_metaRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_managed_tensor_meta"], [7, 1, 1, "_CPPv423new_managed_tensor_metaRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_managed_tensor_meta::self"], [7, 1, 1, "_CPPv423new_managed_tensor_metaRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_managed_tensor_meta::sizes"], [7, 0, 1, "_CPPv418new_unified_tensorRK6TensorRKNSt6vectorINSt7int64_tEEEb", "new_unified_tensor"], [7, 1, 1, "_CPPv418new_unified_tensorRK6TensorRKNSt6vectorINSt7int64_tEEEb", "new_unified_tensor::is_host_mapped"], [7, 1, 1, "_CPPv418new_unified_tensorRK6TensorRKNSt6vectorINSt7int64_tEEEb", "new_unified_tensor::self"], [7, 1, 1, "_CPPv418new_unified_tensorRK6TensorRKNSt6vectorINSt7int64_tEEEb", "new_unified_tensor::sizes"], [7, 0, 1, "_CPPv426new_vanilla_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_vanilla_managed_tensor"], [7, 1, 1, "_CPPv426new_vanilla_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_vanilla_managed_tensor::self"], [7, 1, 1, "_CPPv426new_vanilla_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_vanilla_managed_tensor::sizes"], [4, 0, 1, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t", "padding_fused_tbe_input_combine_cpu"], [4, 1, 1, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t", "padding_fused_tbe_input_combine_cpu::batch_size"], [4, 1, 1, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t", "padding_fused_tbe_input_combine_cpu::include_last_offsets"], [4, 1, 1, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t", "padding_fused_tbe_input_combine_cpu::indices_list"], [4, 1, 1, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t", "padding_fused_tbe_input_combine_cpu::offsets_list"], [4, 1, 1, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t", "padding_fused_tbe_input_combine_cpu::per_sample_weights"], [8, 0, 1, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad"], [8, 1, 1, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad::inv_offset_dim_list"], [8, 1, 1, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad::inv_permute_list"], [8, 1, 1, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad::offset_dim_list"], [8, 1, 1, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad::permute_list"], [8, 1, 1, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad::pooled_embs"], [8, 0, 1, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_cpu"], [8, 1, 1, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_cpu::inv_offset_dim_list"], [8, 1, 1, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_cpu::inv_permute_list"], [8, 1, 1, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_cpu::offset_dim_list"], [8, 1, 1, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_cpu::permute_list"], [8, 1, 1, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_cpu::pooled_embs"], [8, 0, 1, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_gpu"], [8, 1, 1, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_gpu::inv_offset_dim_list"], [8, 1, 1, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_gpu::inv_permute_list"], [8, 1, 1, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_gpu::offset_dim_list"], [8, 1, 1, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_gpu::permute_list"], [8, 1, 1, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_gpu::pooled_embs"], [8, 0, 1, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_cpu"], [8, 1, 1, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_cpu::inv_offset_dim_list"], [8, 1, 1, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_cpu::inv_permute_list"], [8, 1, 1, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_cpu::offset_dim_list"], [8, 1, 1, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_cpu::permute_list"], [8, 1, 1, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_cpu::pooled_embs"], [8, 0, 1, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_gpu"], [8, 1, 1, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_gpu::inv_offset_dim_list"], [8, 1, 1, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_gpu::inv_permute_list"], [8, 1, 1, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_gpu::offset_dim_list"], [8, 1, 1, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_gpu::permute_list"], [8, 1, 1, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_gpu::pooled_embs"], [8, 0, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl"], [8, 1, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl::allow_duplicates"], [8, 1, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl::inv_offset_dim_list"], [8, 1, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl::inv_permute_list"], [8, 1, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl::offset_dim_list"], [8, 1, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl::permute_list"], [8, 1, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl::pooled_embs"], [8, 0, 1, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_cpu"], [8, 1, 1, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_cpu::inv_offset_dim_list"], [8, 1, 1, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_cpu::inv_permute_list"], [8, 1, 1, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_cpu::offset_dim_list"], [8, 1, 1, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_cpu::permute_list"], [8, 1, 1, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_cpu::pooled_embs"], [8, 0, 1, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_gpu"], [8, 1, 1, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_gpu::inv_offset_dim_list"], [8, 1, 1, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_gpu::inv_permute_list"], [8, 1, 1, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_gpu::offset_dim_list"], [8, 1, 1, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_gpu::permute_list"], [8, 1, 1, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_gpu::pooled_embs"], [2, 0, 1, "_CPPv423pruned_array_lookup_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cpu"], [2, 1, 1, "_CPPv423pruned_array_lookup_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cpu::index_remappings"], [2, 1, 1, "_CPPv423pruned_array_lookup_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cpu::index_remappings_offsets"], [2, 1, 1, "_CPPv423pruned_array_lookup_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cpu::indices"], [2, 1, 1, "_CPPv423pruned_array_lookup_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cpu::offsets"], [2, 0, 1, "_CPPv424pruned_array_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cuda"], [2, 1, 1, "_CPPv424pruned_array_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cuda::index_remappings"], [2, 1, 1, "_CPPv424pruned_array_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cuda::index_remappings_offsets"], [2, 1, 1, "_CPPv424pruned_array_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cuda::indices"], [2, 1, 1, "_CPPv424pruned_array_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cuda::offsets"], [2, 0, 1, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_insert_unweighted_cpu"], [2, 1, 1, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_insert_unweighted_cpu::dense_indices"], [2, 1, 1, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_insert_unweighted_cpu::hash_table"], [2, 1, 1, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_insert_unweighted_cpu::hash_table_offsets"], [2, 1, 1, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_insert_unweighted_cpu::indices"], [2, 1, 1, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_insert_unweighted_cpu::offsets"], [2, 0, 1, "_CPPv426pruned_hashmap_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_cuda"], [2, 1, 1, "_CPPv426pruned_hashmap_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_cuda::hash_table"], [2, 1, 1, "_CPPv426pruned_hashmap_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_cuda::hash_table_offsets"], [2, 1, 1, "_CPPv426pruned_hashmap_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_cuda::indices"], [2, 1, 1, "_CPPv426pruned_hashmap_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_cuda::offsets"], [2, 0, 1, "_CPPv436pruned_hashmap_lookup_unweighted_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_unweighted_cpu"], [2, 1, 1, "_CPPv436pruned_hashmap_lookup_unweighted_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_unweighted_cpu::hash_table"], [2, 1, 1, "_CPPv436pruned_hashmap_lookup_unweighted_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_unweighted_cpu::hash_table_offsets"], [2, 1, 1, "_CPPv436pruned_hashmap_lookup_unweighted_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_unweighted_cpu::indices"], [2, 1, 1, "_CPPv436pruned_hashmap_lookup_unweighted_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_unweighted_cpu::offsets"], [6, 0, 1, "_CPPv432recat_embedding_grad_output_cuda6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_cuda"], [6, 1, 1, "_CPPv432recat_embedding_grad_output_cuda6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_cuda::grad_output"], [6, 1, 1, "_CPPv432recat_embedding_grad_output_cuda6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_cuda::num_features_per_rank"], [6, 0, 1, "_CPPv446recat_embedding_grad_output_mixed_D_batch_cudaRK6TensorRK6TensorRK6Tensor", "recat_embedding_grad_output_mixed_D_batch_cuda"], [6, 1, 1, "_CPPv446recat_embedding_grad_output_mixed_D_batch_cudaRK6TensorRK6TensorRK6Tensor", "recat_embedding_grad_output_mixed_D_batch_cuda::cumsum_dim_sum_per_rank"], [6, 1, 1, "_CPPv446recat_embedding_grad_output_mixed_D_batch_cudaRK6TensorRK6TensorRK6Tensor", "recat_embedding_grad_output_mixed_D_batch_cuda::dim_sum_per_rank"], [6, 1, 1, "_CPPv446recat_embedding_grad_output_mixed_D_batch_cudaRK6TensorRK6TensorRK6Tensor", "recat_embedding_grad_output_mixed_D_batch_cuda::grad_output"], [6, 0, 1, "_CPPv439recat_embedding_grad_output_mixed_D_cpuRK6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_mixed_D_cpu"], [6, 1, 1, "_CPPv439recat_embedding_grad_output_mixed_D_cpuRK6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_mixed_D_cpu::dim_sum_per_rank"], [6, 1, 1, "_CPPv439recat_embedding_grad_output_mixed_D_cpuRK6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_mixed_D_cpu::grad_output"], [6, 0, 1, "_CPPv440recat_embedding_grad_output_mixed_D_cudaRK6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_mixed_D_cuda"], [6, 1, 1, "_CPPv440recat_embedding_grad_output_mixed_D_cudaRK6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_mixed_D_cuda::dim_sum_per_rank"], [6, 1, 1, "_CPPv440recat_embedding_grad_output_mixed_D_cudaRK6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_mixed_D_cuda::grad_output"], [0, 0, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::A_SYMMETRIC"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::BIAS_TYPE"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::B_SYMMETRIC"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::DIRECT"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::FUSE_RELU"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::HAS_BIAS"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::Q_GRAN"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::block"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::inp"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::ld_in"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::ld_out"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::out"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::r"], [0, 0, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::A_SYMMETRIC"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::BIAS_TYPE"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::B_SYMMETRIC"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::C_PER_G"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::FUSE_RELU"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::HAS_BIAS"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::Q_GRAN"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::block"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::inp"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::ld_in"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::ld_out"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::out"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::r"], [11, 0, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda"], [11, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::D_offsets"], [11, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::buffer_ids"], [11, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::cache_hash_size_cumsum"], [11, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::dev_weights"], [11, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::logical_table_ids"], [11, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::lxu_cache_state"], [11, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::lxu_cache_weights"], [11, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::momentum1_dev"], [11, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::momentum1_offsets"], [11, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::momentum1_placements"], [11, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::momentum1_uvm"], [11, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::pruned_indices"], [11, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::pruned_indices_offsets"], [11, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::total_cache_hash_size"], [11, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::uvm_weights"], [11, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::weights_offsets"], [11, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::weights_placements"], [4, 0, 1, "_CPPv421tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE", "tbe_input_combine_cpu"], [4, 1, 1, "_CPPv421tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE", "tbe_input_combine_cpu::include_last_offsets"], [4, 1, 1, "_CPPv421tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE", "tbe_input_combine_cpu::indices_list"], [4, 1, 1, "_CPPv421tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE", "tbe_input_combine_cpu::offsets_list"], [4, 1, 1, "_CPPv421tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE", "tbe_input_combine_cpu::per_sample_weights"], [7, 0, 1, "_CPPv419uvm_cuda_mem_adviseRK6Tensor7int64_t", "uvm_cuda_mem_advise"], [7, 1, 1, "_CPPv419uvm_cuda_mem_adviseRK6Tensor7int64_t", "uvm_cuda_mem_advise::cuda_memory_advise"], [7, 1, 1, "_CPPv419uvm_cuda_mem_adviseRK6Tensor7int64_t", "uvm_cuda_mem_advise::self"], [7, 0, 1, "_CPPv427uvm_cuda_mem_prefetch_asyncRK6TensorN3c108optionalI6TensorEE", "uvm_cuda_mem_prefetch_async"], [7, 1, 1, "_CPPv427uvm_cuda_mem_prefetch_asyncRK6TensorN3c108optionalI6TensorEE", "uvm_cuda_mem_prefetch_async::device_t"], [7, 1, 1, "_CPPv427uvm_cuda_mem_prefetch_asyncRK6TensorN3c108optionalI6TensorEE", "uvm_cuda_mem_prefetch_async::self"], [7, 0, 1, "_CPPv424uvm_mem_advice_dont_forkRK6Tensor", "uvm_mem_advice_dont_fork"], [7, 1, 1, "_CPPv424uvm_mem_advice_dont_forkRK6Tensor", "uvm_mem_advice_dont_fork::self"], [7, 0, 1, "_CPPv411uvm_storageRK6Tensor", "uvm_storage"], [7, 1, 1, "_CPPv411uvm_storageRK6Tensor", "uvm_storage::self"], [7, 0, 1, "_CPPv410uvm_to_cpuRK6Tensor", "uvm_to_cpu"], [7, 1, 1, "_CPPv410uvm_to_cpuRK6Tensor", "uvm_to_cpu::self"], [7, 0, 1, "_CPPv416uvm_to_cpu_cloneRK6Tensor", "uvm_to_cpu_clone"], [7, 1, 1, "_CPPv416uvm_to_cpu_cloneRK6Tensor", "uvm_to_cpu_clone::self"], [7, 0, 1, "_CPPv413uvm_to_deviceRK6TensorRK6Tensor", "uvm_to_device"], [7, 1, 1, "_CPPv413uvm_to_deviceRK6TensorRK6Tensor", "uvm_to_device::prototype"], [7, 1, 1, "_CPPv413uvm_to_deviceRK6TensorRK6Tensor", "uvm_to_device::self"], [17, 3, 0, "-", "fbgemm_gpu"]], "fbgemm_gpu.docs.examples": [[23, 4, 1, "", "example_method"]], "fbgemm_gpu.split_table_batched_embeddings_ops": [[17, 4, 1, "", "SplitTableBatchedEmbeddingBagsCodegen"]], "torch.ops.fbgemm": [[16, 4, 1, "", "batched_dense_vec_jagged_2d_mul"], [16, 4, 1, "", "dense_to_jagged"], [16, 4, 1, "", "jagged_1d_to_dense"], [16, 4, 1, "", "jagged_2d_to_dense"], [16, 4, 1, "", "jagged_dense_dense_elementwise_add_jagged_output"], [16, 4, 1, "", "jagged_dense_elementwise_add"], [16, 4, 1, "", "jagged_dense_elementwise_add_jagged_output"], [16, 4, 1, "", "jagged_dense_elementwise_mul"], [16, 4, 1, "", "jagged_to_padded_dense"], [16, 4, 1, "", "stacked_jagged_1d_to_dense"], [16, 4, 1, "", "stacked_jagged_2d_to_dense"]]}, "objtypes": {"0": "cpp:function", "1": "cpp:functionParam", "2": "cpp:templateParam", "3": "py:module", "4": "py:function"}, "objnames": {"0": ["cpp", "function", "C++ function"], "1": ["cpp", "functionParam", "C++ function parameter"], "2": ["cpp", "templateParam", "C++ template parameter"], "3": ["py", "module", "Python module"], "4": ["py", "function", "Python function"]}, "titleterms": {"quantiz": [0, 9], "util": 0, "refer": [0, 24], "implement": 0, "method": 0, "avx": 0, "2": 0, "512": 0, "build": [1, 12, 22], "instruct": [1, 12, 13, 14], "fbgemm": [1, 25], "requir": 1, "hardwar": 1, "softwar": 1, "depend": 1, "asmjit": 1, "cpuinfo": 1, "googletest": 1, "set": [1, 12, 13, 22], "up": [1, 12, 13, 22], "an": [1, 12], "isol": [1, 12], "environ": [1, 12, 13, 14, 22], "instal": [1, 12, 13], "tool": [1, 12], "c": [1, 12, 21, 25], "compil": [1, 12], "other": [1, 12, 24], "librari": [1, 13], "prepar": [1, 12], "linux": 1, "maco": 1, "cmake": 1, "gcc": [1, 12], "issu": [1, 19], "12": 1, "clang": [1, 12], "bazel": 1, "window": 1, "embed": [2, 8, 11, 17], "oper": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 15, 16, 17], "cuda": [2, 5, 6, 7, 9, 10, 12, 13, 14], "cpu": [2, 5, 6, 9, 10, 12, 13], "experiment": 3, "attent": 3, "combin": [4, 15], "input": 4, "jag": [5, 15, 16], "tensor": [5, 15, 16], "layout": 6, "transform": 6, "memori": 7, "pool": 8, "merg": 8, "permut": 8, "spars": 10, "data": 10, "tabl": [11, 17], "batch": [11, 17], "miniconda": 12, "conda": [12, 13], "onli": [12, 13], "docker": [12, 13], "imag": 12, "cudnn": 12, "rocm": [12, 13, 14], "miopen": 12, "symlink": 12, "pytorch": [12, 13], "through": [12, 13], "pip": [12, 13], "post": [12, 13], "check": [12, 13], "fbgemm_gpu": [12, 13, 14, 22, 25], "packag": [12, 13], "The": 12, "process": 12, "wheel": 12, "variabl": 12, "For": 12, "develop": [12, 25], "undefin": [12, 13], "symbol": [12, 13], "glibc": 12, "version": 12, "compat": 12, "nvidia": 13, "driver": 13, "contain": 13, "runtim": 13, "amdgpu": 13, "python": [13, 23, 25], "public": 13, "pypi": 13, "test": 14, "setup": 14, "run": 14, "variant": 14, "benchmark": 14, "high": 15, "level": 15, "overview": [15, 25], "format": 15, "valu": 15, "offset": 15, "max": 15, "length": 15, "exampl": 15, "arithmet": 15, "convers": 15, "dens": 15, "tbe": 17, "contact": 18, "u": 18, "github": 18, "slack": 18, "contribut": 19, "code": [19, 21, 23, 24], "conduct": 19, "pull": 19, "request": 19, "contributor": 19, "licens": [19, 20], "agreement": 19, "cla": 19, "ad": [21, 23, 24], "document": [21, 22, 23, 24, 25], "gener": [22, 23, 25], "guidelin": 22, "specif": 22, "guid": 22, "toolchain": 22, "lint": 22, "deploy": 22, "preview": 22, "todo": 23, "auto": 23, "sphinx": 24, "pointer": 24, "section": 24, "referenc": 24, "sourc": 24, "latex": 24, "graph": 24, "homepag": 25, "info": 25, "api": 25}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx.ext.todo": 2, "sphinx.ext.viewcode": 1, "sphinx": 57}, "alltitles": {"Installation Instructions": [[13, "installation-instructions"]], "Set Up CPU-Only Environment": [[13, "set-up-cpu-only-environment"]], "Set Up CUDA Environment": [[13, "set-up-cuda-environment"]], "Install NVIDIA Drivers": [[13, "install-nvidia-drivers"]], "Set Up the CUDA Docker Container and Conda Environment": [[13, "set-up-the-cuda-docker-container-and-conda-environment"]], "Install the CUDA Runtime": [[13, "install-the-cuda-runtime"]], "Set Up ROCm Environment": [[13, "set-up-rocm-environment"]], "Install AMDGPU Drivers": [[13, "install-amdgpu-drivers"]], "Set Up the ROCm Docker Container and Conda Environment": [[13, "set-up-the-rocm-docker-container-and-conda-environment"]], "Install Python Libraries": [[13, "install-python-libraries"]], "Install PyTorch": [[13, "install-pytorch"], [12, "install-pytorch"]], "Install the FBGEMM_GPU Package": [[13, "install-the-fbgemm-gpu-package"]], "Install through PyTorch PIP": [[13, "install-through-pytorch-pip"]], "Install through Public PyPI": [[13, "install-through-public-pypi"]], "Post-Installation Checks": [[13, "post-installation-checks"]], "Undefined Symbols": [[13, "undefined-symbols"]], "Test Instructions": [[14, "test-instructions"]], "Setup the FBGEMM_GPU Test Environment": [[14, "setup-the-fbgemm-gpu-test-environment"]], "Running FBGEMM_GPU Tests": [[14, "running-fbgemm-gpu-tests"]], "Testing with the CUDA Variant": [[14, "testing-with-the-cuda-variant"]], "Testing with the ROCm Variant": [[14, "testing-with-the-rocm-variant"]], "Running FBGEMM_GPU Benchmarks": [[14, "running-fbgemm-gpu-benchmarks"]], "Build Instructions": [[12, "build-instructions"], [1, "build-instructions"]], "Set Up an Isolated Build Environment": [[12, "set-up-an-isolated-build-environment"], [1, "set-up-an-isolated-build-environment"]], "Install Miniconda": [[12, "install-miniconda"]], "Set Up the Conda Environment": [[12, "set-up-the-conda-environment"]], "Set Up for CPU-Only Build": [[12, "set-up-for-cpu-only-build"]], "Set Up for CUDA Build": [[12, "set-up-for-cuda-build"]], "CUDA Docker Image": [[12, "cuda-docker-image"]], "Install CUDA": [[12, "install-cuda"]], "Install cuDNN": [[12, "install-cudnn"]], "Set Up for ROCm Build": [[12, "set-up-for-rocm-build"]], "ROCm Docker Image": [[12, "rocm-docker-image"]], "Install ROCm": [[12, "install-rocm"]], "Install MIOpen": [[12, "install-miopen"]], "Install the Build Tools": [[12, "install-the-build-tools"], [1, "install-the-build-tools"]], "C/C++ Compiler (GCC)": [[12, "c-c-compiler-gcc"]], "C/C++ Compiler (Clang)": [[12, "c-c-compiler-clang"]], "Compiler Symlinks": [[12, "compiler-symlinks"]], "Other Build Tools": [[12, "other-build-tools"], [1, "other-build-tools"]], "Installation Through Conda": [[12, "installation-through-conda"]], "Installation Through PyTorch PIP": [[12, "installation-through-pytorch-pip"]], "Post-Install Checks": [[12, "post-install-checks"]], "Build the FBGEMM_GPU Package": [[12, "build-the-fbgemm-gpu-package"]], "Preparing the Build": [[12, "preparing-the-build"], [1, "preparing-the-build"]], "The Build Process": [[12, "the-build-process"]], "Set Wheel Build Variables": [[12, "set-wheel-build-variables"]], "CPU-Only Build": [[12, "cpu-only-build"]], "CUDA Build": [[12, "cuda-build"]], "ROCm Build": [[12, "rocm-build"]], "Post-Build Checks (For Developers)": [[12, "post-build-checks-for-developers"]], "Undefined Symbols Check": [[12, "undefined-symbols-check"]], "GLIBC Version Compatibility Check": [[12, "glibc-version-compatibility-check"]], "Contributing": [[19, "contributing"]], "Code of Conduct": [[19, "code-of-conduct"]], "Pull Requests": [[19, "pull-requests"]], "Contributor License Agreement (\u201cCLA\u201d)": [[19, "contributor-license-agreement-cla"]], "Issues": [[19, "issues"]], "License": [[19, "license"], [20, "license"]], "Contact Us": [[18, "contact-us"]], "GitHub": [[18, "github"]], "Slack": [[18, "slack"]], "Jagged Tensor Operators": [[15, "jagged-tensor-operators"], [16, "jagged-tensor-operators"], [5, "jagged-tensor-operators"]], "High Level Overview": [[15, "high-level-overview"]], "Jagged Tensor Format": [[15, "jagged-tensor-format"]], "Values": [[15, "values"]], "Offsets": [[15, "offsets"]], "Max Lengths": [[15, "max-lengths"]], "Jagged Tensor Example": [[15, "jagged-tensor-example"]], "Jagged Tensor Operations": [[15, "jagged-tensor-operations"]], "Arithmetic Operations": [[15, "arithmetic-operations"]], "Conversion Operations": [[15, "conversion-operations"]], "Jagged to Dense": [[15, "jagged-to-dense"]], "Dense to Jagged": [[15, "dense-to-jagged"]], "Combined Arithmetic + Conversion Operations": [[15, "combined-arithmetic-conversion-operations"]], "Table Batched Embedding (TBE) Operators": [[17, "module-fbgemm_gpu"]], "Sphinx Documentation Pointers": [[24, "sphinx-documentation-pointers"]], "References Other Sections of the Documentation": [[24, "references-other-sections-of-the-documentation"]], "Referencing the Source Code": [[24, "referencing-the-source-code"]], "Adding LaTeX": [[24, "adding-latex"]], "Adding Graphs": [[24, "adding-graphs"]], "FBGEMM and FBGEMM_GPU Documentation Homepage": [[25, "fbgemm-and-fbgemm-gpu-documentation-homepage"]], "General Info": [[25, null]], "FBGEMM Development": [[25, null]], "FBGEMM_GPU Development": [[25, null]], "FBGEMM_GPU Overview": [[25, null]], "FBGEMM C++ API": [[25, null]], "FBGEMM_GPU C++ API": [[25, null]], "FBGEMM_GPU Python API": [[25, null]], "Combine Input Operators": [[4, "combine-input-operators"]], "Experimental Operators": [[3, "experimental-operators"]], "Attention Operators": [[3, "attention-operators"]], "CUDA Operators": [[5, "cuda-operators"], [2, "cuda-operators"], [6, "cuda-operators"], [10, "cuda-operators"], [9, "cuda-operators"]], "CPU Operators": [[5, "cpu-operators"], [2, "cpu-operators"], [6, "cpu-operators"], [10, "cpu-operators"], [9, "cpu-operators"]], "Quantization Utilities": [[0, "quantization-utilities"]], "Reference Implementation Methods": [[0, "reference-implementation-methods"]], "AVX-2 Implementation Methods": [[0, "avx-2-implementation-methods"]], "AVX-512 Implementation Methods": [[0, "avx-512-implementation-methods"]], "Embedding Operators": [[2, "embedding-operators"]], "FBGEMM Requirements": [[1, "fbgemm-requirements"]], "Hardware Requirements": [[1, "hardware-requirements"]], "Software Dependencies": [[1, "software-dependencies"]], "asmjit": [[1, "asmjit"]], "cpuinfo": [[1, "cpuinfo"]], "GoogleTest": [[1, "googletest"]], "C/C++ Compiler": [[1, "c-c-compiler"]], "Build the FBGEMM Library": [[1, "build-the-fbgemm-library"]], "Building on Linux and macOS (CMake + GCC)": [[1, "building-on-linux-and-macos-cmake-gcc"]], "Build Issues with GCC 12+": [[1, "build-issues-with-gcc-12"]], "Building on Linux and macOS (CMake + Clang)": [[1, "building-on-linux-and-macos-cmake-clang"]], "Building on Linux (Bazel)": [[1, "building-on-linux-bazel"]], "Building on Windows": [[1, "building-on-windows"]], "Documentation": [[22, "documentation"]], "General Documentation Guidelines": [[22, "general-documentation-guidelines"]], "Specific Documentation Guides": [[22, "specific-documentation-guides"]], "Building the Documentation": [[22, "building-the-documentation"]], "Set Up Build Environment": [[22, "set-up-build-environment"]], "Build FBGEMM_GPU": [[22, "build-fbgemm-gpu"]], "Set Up the Documentation Toolchain": [[22, "set-up-the-documentation-toolchain"]], "Build the Documentation": [[22, "build-the-documentation"]], "Linting the Documentation": [[22, "linting-the-documentation"]], "Deployment Preview": [[22, "deployment-preview"]], "Adding Documentation to C++ Code": [[21, "adding-documentation-to-c-code"]], "Adding Documentation to Python Code": [[23, "adding-documentation-to-python-code"]], "Todo": [[23, "id1"]], "Adding Documentation to Auto-Generated Python Code": [[23, "adding-documentation-to-auto-generated-python-code"]], "Layout Transformation Operators": [[6, "layout-transformation-operators"]], "CUDA Memory Operators": [[7, "cuda-memory-operators"]], "Pooled Embeddings Operators": [[8, "pooled-embeddings-operators"]], "Merge Operators": [[8, "merge-operators"]], "Permutation Operators": [[8, "permutation-operators"]], "Sparse Data Operators": [[10, "sparse-data-operators"]], "Table Batched Embedding Operators": [[11, "table-batched-embedding-operators"]], "Quantization Operators": [[9, "quantization-operators"]]}, "indexentries": {"findminmax (c++ function)": [[0, "_CPPv410FindMinMaxPKfPfPf7int64_t"]], "floatorhalftofusednbitrowwisequantizedsbhalf (c++ function)": [[0, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE"]], "fusedquantizedequantize (c++ function)": [[0, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif"]], "quantizegroupwise (c++ function)": [[0, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T"]], "xor128 (c++ function)": [[0, "_CPPv46Xor128v"]], "requantizeoutputprocessingavx2 (c++ function)": [[0, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE"]], "requantizeoutputprocessinggconvavx512 (c++ function)": [[0, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE"]], "bounds_check_indices_cuda (c++ function)": [[2, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKN3c108optionalI6TensorEERKN3c108optionalI6TensorEEK7int64_t"]], "int_nbit_split_embedding_codegen_lookup_function (c++ function)": [[2, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE"]], "int_nbit_split_embedding_codegen_lookup_function_cpu (c++ function)": [[2, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE"]], "int_nbit_split_embedding_uvm_caching_codegen_lookup_function (c++ function)": [[2, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE"]], "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu (c++ function)": [[2, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE"]], "pruned_array_lookup_cpu (c++ function)": [[2, "_CPPv423pruned_array_lookup_cpu6Tensor6Tensor6Tensor6Tensor"]], "pruned_array_lookup_cuda (c++ function)": [[2, "_CPPv424pruned_array_lookup_cuda6Tensor6Tensor6Tensor6Tensor"]], "pruned_hashmap_insert_unweighted_cpu (c++ function)": [[2, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor"]], "pruned_hashmap_lookup_cuda (c++ function)": [[2, "_CPPv426pruned_hashmap_lookup_cuda6Tensor6Tensor6Tensor6Tensor"]], "pruned_hashmap_lookup_unweighted_cpu (c++ function)": [[2, "_CPPv436pruned_hashmap_lookup_unweighted_cpu6Tensor6Tensor6Tensor6Tensor"]], "gqa_attn_splitk_cuda (c++ function)": [[3, "_CPPv420gqa_attn_splitk_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_t"]], "padding_fused_tbe_input_combine_cpu (c++ function)": [[4, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t"]], "tbe_input_combine_cpu (c++ function)": [[4, "_CPPv421tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE"]], "batched_dense_vec_jagged_2d_mul (c++ function)": [[5, "_CPPv431batched_dense_vec_jagged_2d_mulRK6TensorRK6TensorRK6Tensor"]], "dense_to_jagged (c++ function)": [[5, "_CPPv415dense_to_jaggedRK6TensorRKNSt6vectorI6TensorEEN3c108optionalIN2at6SymIntEEE"]], "jagged_1d_to_dense (c++ function)": [[5, "_CPPv418jagged_1d_to_dense6Tensor6TensorN3c106SymIntE7int64_t"]], "jagged_2d_to_dense (c++ function)": [[5, "_CPPv418jagged_2d_to_dense6Tensor6TensorN3c106SymIntE"]], "jagged_dense_elementwise_add (c++ function)": [[5, "_CPPv428jagged_dense_elementwise_addRK6TensorRKNSt6vectorI6TensorEERK6Tensor"]], "jagged_dense_elementwise_add_jagged_output (c++ function)": [[5, "_CPPv442jagged_dense_elementwise_add_jagged_outputRK6TensorRKNSt6vectorI6TensorEERK6Tensor"]], "jagged_dense_elementwise_add_jagged_output_cuda (c++ function)": [[5, "_CPPv447jagged_dense_elementwise_add_jagged_output_cudaRK6TensorRKNSt6vectorI6TensorEERK6Tensor"]], "jagged_dense_elementwise_mul (c++ function)": [[5, "_CPPv428jagged_dense_elementwise_mulRK6TensorRKNSt6vectorI6TensorEERK6Tensor"]], "jagged_to_padded_dense (c++ function)": [[5, "_CPPv422jagged_to_padded_denseRK6TensorRKNSt6vectorI6TensorEEKN3c1014SymIntArrayRefEKd"]], "jagged_to_padded_dense_forward (c++ function)": [[5, "_CPPv430jagged_to_padded_dense_forwardRK6TensorRKNSt6vectorI6TensorEEN3c1014SymIntArrayRefEKd"]], "recat_embedding_grad_output_cuda (c++ function)": [[6, "_CPPv432recat_embedding_grad_output_cuda6TensorRKNSt6vectorI7int64_tEE"]], "recat_embedding_grad_output_mixed_d_batch_cuda (c++ function)": [[6, "_CPPv446recat_embedding_grad_output_mixed_D_batch_cudaRK6TensorRK6TensorRK6Tensor"]], "recat_embedding_grad_output_mixed_d_cpu (c++ function)": [[6, "_CPPv439recat_embedding_grad_output_mixed_D_cpuRK6TensorRKNSt6vectorI7int64_tEE"]], "recat_embedding_grad_output_mixed_d_cuda (c++ function)": [[6, "_CPPv440recat_embedding_grad_output_mixed_D_cudaRK6TensorRKNSt6vectorI7int64_tEE"]], "is_uvm_tensor (c++ function)": [[7, "_CPPv413is_uvm_tensorRK6Tensor"]], "new_host_mapped_tensor (c++ function)": [[7, "_CPPv422new_host_mapped_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE"]], "new_managed_tensor (c++ function)": [[7, "_CPPv418new_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE"]], "new_managed_tensor_meta (c++ function)": [[7, "_CPPv423new_managed_tensor_metaRK6TensorRKNSt6vectorINSt7int64_tEEE"]], "new_unified_tensor (c++ function)": [[7, "_CPPv418new_unified_tensorRK6TensorRKNSt6vectorINSt7int64_tEEEb"]], "new_vanilla_managed_tensor (c++ function)": [[7, "_CPPv426new_vanilla_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE"]], "uvm_cuda_mem_advise (c++ function)": [[7, "_CPPv419uvm_cuda_mem_adviseRK6Tensor7int64_t"]], "uvm_cuda_mem_prefetch_async (c++ function)": [[7, "_CPPv427uvm_cuda_mem_prefetch_asyncRK6TensorN3c108optionalI6TensorEE"]], "uvm_mem_advice_dont_fork (c++ function)": [[7, "_CPPv424uvm_mem_advice_dont_forkRK6Tensor"]], "uvm_storage (c++ function)": [[7, "_CPPv411uvm_storageRK6Tensor"]], "uvm_to_cpu (c++ function)": [[7, "_CPPv410uvm_to_cpuRK6Tensor"]], "uvm_to_cpu_clone (c++ function)": [[7, "_CPPv416uvm_to_cpu_cloneRK6Tensor"]], "uvm_to_device (c++ function)": [[7, "_CPPv413uvm_to_deviceRK6TensorRK6Tensor"]], "all_to_one_device (c++ function)": [[8, "_CPPv417all_to_one_deviceNSt6vectorIN2at6TensorEEEN2at6DeviceE"]], "permute_pooled_embs_auto_grad (c++ function)": [[8, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor"]], "permute_pooled_embs_auto_grad_cpu (c++ function)": [[8, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor"]], "permute_pooled_embs_auto_grad_gpu (c++ function)": [[8, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor"]], "permute_pooled_embs_auto_grad_split_cpu (c++ function)": [[8, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE"]], "permute_pooled_embs_auto_grad_split_gpu (c++ function)": [[8, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE"]], "permute_pooled_embs_cpu_impl (c++ function)": [[8, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb"]], "permute_pooled_embs_split_cpu (c++ function)": [[8, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE"]], "permute_pooled_embs_split_gpu (c++ function)": [[8, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE"]], "fp8quantizedtofloat_ref (c++ function)": [[9, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi"]], "fp8rowwise_to_float_cpu (c++ function)": [[9, "_CPPv423FP8rowwise_to_float_cpuRK6TensorbK7int64_t"]], "floattofp8quantized_ref (c++ function)": [[9, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd"]], "_fp8rowwise_to_float_gpu (c++ function)": [[9, "_CPPv424_FP8rowwise_to_float_gpuRKN2at6TensorEbK7int64_t"]], "_bfloat16_to_float_gpu (c++ function)": [[9, "_CPPv422_bfloat16_to_float_gpuRKN2at6TensorE"]], "_float_to_fp8rowwise_gpu (c++ function)": [[9, "_CPPv424_float_to_FP8rowwise_gpuRK6TensorKb"]], "_float_to_bfloat16_gpu (c++ function)": [[9, "_CPPv422_float_to_bfloat16_gpuRKN2at6TensorE"]], "_float_to_fused8bitrowwise_cpu_out (c++ function)": [[9, "_CPPv434_float_to_fused8bitrowwise_cpu_outR6TensorRK6Tensor"]], "_float_to_fused8bitrowwise_gpu (c++ function)": [[9, "_CPPv430_float_to_fused8bitrowwise_gpuRK6Tensor"]], "_float_to_fusednbitrowwise_gpu (c++ function)": [[9, "_CPPv430_float_to_fusednbitrowwise_gpuRK6TensorK7int64_t"]], "_float_to_hfp8_gpu (c++ function)": [[9, "_CPPv418_float_to_hfp8_gpuRKN2at6TensorEK7int64_tK7int64_tKd"]], "_float_to_msfp_gpu (c++ function)": [[9, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd"]], "_float_to_paddedfp8rowwise_gpu (c++ function)": [[9, "_CPPv430_float_to_paddedFP8rowwise_gpuRK6TensorKbK7int64_t"]], "_fused8bitrowwise_to_float_cpu_out (c++ function)": [[9, "_CPPv434_fused8bitrowwise_to_float_cpu_outR6TensorRK6Tensor"]], "_fused8bitrowwise_to_float_gpu (c++ function)": [[9, "_CPPv430_fused8bitrowwise_to_float_gpuRKN2at6TensorE"]], "_fused8bitrowwise_to_float_mixed_dim_gpu (c++ function)": [[9, "_CPPv440_fused8bitrowwise_to_float_mixed_dim_gpuRKN2at6TensorERKN2at6TensorEK7int64_t"]], "_fused8bitrowwise_to_half_gpu (c++ function)": [[9, "_CPPv429_fused8bitrowwise_to_half_gpuRKN2at6TensorE"]], "_fused8bitrowwise_to_single_or_half_precision_gpu (c++ function)": [[9, "_CPPv449_fused8bitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tKbKb"]], "_fusednbitrowwise_to_float_gpu (c++ function)": [[9, "_CPPv430_fusednbitrowwise_to_float_gpuRKN2at6TensorEK7int64_t"]], "_fusednbitrowwise_to_half_gpu (c++ function)": [[9, "_CPPv429_fusednbitrowwise_to_half_gpuRKN2at6TensorEK7int64_t"]], "_fusednbitrowwise_to_single_or_half_precision_gpu (c++ function)": [[9, "_CPPv449_fusednbitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tK7int64_t"]], "_half_to_fused8bitrowwise_gpu (c++ function)": [[9, "_CPPv429_half_to_fused8bitrowwise_gpuRK6Tensor"]], "_half_to_fusednbitrowwise_gpu (c++ function)": [[9, "_CPPv429_half_to_fusednbitrowwise_gpuRKN2at6TensorEK7int64_t"]], "_hfp8_to_float_gpu (c++ function)": [[9, "_CPPv418_hfp8_to_float_gpuRKN2at6TensorEK7int64_tK7int64_t"]], "_msfp_to_float_gpu (c++ function)": [[9, "_CPPv418_msfp_to_float_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_t"]], "_paddedfp8rowwise_to_float_gpu (c++ function)": [[9, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t"]], "_single_or_half_precision_to_fused8bitrowwise_gpu (c++ function)": [[9, "_CPPv449_single_or_half_precision_to_fused8bitrowwise_gpuRK6Tensor"]], "_single_or_half_precision_to_fusednbitrowwise_gpu (c++ function)": [[9, "_CPPv449_single_or_half_precision_to_fusednbitrowwise_gpuRK6TensorK7int64_t"]], "float_or_half_to_fused8bitrowwise_cpu (c++ function)": [[9, "_CPPv437float_or_half_to_fused8bitrowwise_cpuRK6Tensor"]], "float_to_fp8rowwise_cpu (c++ function)": [[9, "_CPPv423float_to_FP8rowwise_cpuRK6Tensorb"]], "float_to_fused8bitrowwise_cpu (c++ function)": [[9, "_CPPv429float_to_fused8bitrowwise_cpuRK6Tensor"]], "fused8bitrowwise_to_float_cpu (c++ function)": [[9, "_CPPv429fused8bitrowwise_to_float_cpuRK6Tensor"]], "fused8bitrowwise_to_float_or_half_cpu (c++ function)": [[9, "_CPPv437fused8bitrowwise_to_float_or_half_cpuRK6TensorK7int64_tKbKb"]], "fused8bitrowwise_to_half_cpu (c++ function)": [[9, "_CPPv428fused8bitrowwise_to_half_cpuRK6Tensor"]], "fusednbitrowwise_to_float_cpu (c++ function)": [[9, "_CPPv429fusednbitrowwise_to_float_cpuRK6TensorK7int64_t"]], "fusednbitrowwise_to_float_or_half_cpu (c++ function)": [[9, "_CPPv437fusednbitrowwise_to_float_or_half_cpuRK6TensorK7int64_tK7int64_t"]], "fusednbitrowwise_to_half_cpu (c++ function)": [[9, "_CPPv428fusednbitrowwise_to_half_cpuRK6TensorK7int64_t"]], "half_to_fused8bitrowwise_cpu (c++ function)": [[9, "_CPPv428half_to_fused8bitrowwise_cpuRK6Tensor"]], "expand_into_jagged_permute_cuda (c++ function)": [[10, "_CPPv431expand_into_jagged_permute_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_t"]], "generic_histogram_binning_calibration_by_feature_cpu (c++ function)": [[10, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td"]], "histogram_binning_calibration_cpu (c++ function)": [[10, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td"]], "direct_mapped_lru_cache_populate_byte_cuda (c++ function)": [[11, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE"]], "direct_mapped_lxu_cache_lookup_cuda (c++ function)": [[11, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE"]], "get_unique_indices_cuda (c++ function)": [[11, "_CPPv423get_unique_indices_cudaN2at6TensorE7int64_tb"]], "host_lxu_cache_slot (c++ function)": [[11, "_CPPv419host_lxu_cache_slot7int64_t7int64_t"]], "lfu_cache_populate_byte_cuda (c++ function)": [[11, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t"]], "lfu_cache_populate_cuda (c++ function)": [[11, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb"]], "linearize_cache_indices_cuda (c++ function)": [[11, "_CPPv428linearize_cache_indices_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN3c108optionalIN2at6TensorEEEK7int64_t"]], "linearize_cache_indices_from_row_idx_cuda (c++ function)": [[11, "_CPPv441linearize_cache_indices_from_row_idx_cudaN2at6TensorEN2at6TensorEN2at6TensorE"]], "lru_cache_find_uncached_cuda (c++ function)": [[11, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE"]], "lru_cache_populate_byte_cuda (c++ function)": [[11, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE"]], "lru_cache_populate_cuda (c++ function)": [[11, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE"]], "lxu_cache_flush_cuda (c++ function)": [[11, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb"]], "lxu_cache_locations_update_cuda (c++ function)": [[11, "_CPPv431lxu_cache_locations_update_cudaN2at6TensorEN2at6TensorEN3c108optionalIN2at6TensorEEE"]], "lxu_cache_locking_counter_decrement_cuda (c++ function)": [[11, "_CPPv440lxu_cache_locking_counter_decrement_cudaN2at6TensorEN2at6TensorE"]], "lxu_cache_lookup_cuda (c++ function)": [[11, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEE"]], "reset_weight_momentum_cuda (c++ function)": [[11, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t"]], "batched_dense_vec_jagged_2d_mul() (in module torch.ops.fbgemm)": [[16, "torch.ops.fbgemm.batched_dense_vec_jagged_2d_mul"]], "dense_to_jagged() (in module torch.ops.fbgemm)": [[16, "torch.ops.fbgemm.dense_to_jagged"]], "jagged_1d_to_dense() (in module torch.ops.fbgemm)": [[16, "torch.ops.fbgemm.jagged_1d_to_dense"]], "jagged_2d_to_dense() (in module torch.ops.fbgemm)": [[16, "torch.ops.fbgemm.jagged_2d_to_dense"]], "jagged_dense_dense_elementwise_add_jagged_output() (in module torch.ops.fbgemm)": [[16, "torch.ops.fbgemm.jagged_dense_dense_elementwise_add_jagged_output"]], "jagged_dense_elementwise_add() (in module torch.ops.fbgemm)": [[16, "torch.ops.fbgemm.jagged_dense_elementwise_add"]], "jagged_dense_elementwise_add_jagged_output() (in module torch.ops.fbgemm)": [[16, "torch.ops.fbgemm.jagged_dense_elementwise_add_jagged_output"]], "jagged_dense_elementwise_mul() (in module torch.ops.fbgemm)": [[16, "torch.ops.fbgemm.jagged_dense_elementwise_mul"]], "jagged_to_padded_dense() (in module torch.ops.fbgemm)": [[16, "torch.ops.fbgemm.jagged_to_padded_dense"]], "stacked_jagged_1d_to_dense() (in module torch.ops.fbgemm)": [[16, "torch.ops.fbgemm.stacked_jagged_1d_to_dense"]], "stacked_jagged_2d_to_dense() (in module torch.ops.fbgemm)": [[16, "torch.ops.fbgemm.stacked_jagged_2d_to_dense"]], "splittablebatchedembeddingbagscodegen() (in module fbgemm_gpu.split_table_batched_embeddings_ops)": [[17, "fbgemm_gpu.split_table_batched_embeddings_ops.SplitTableBatchedEmbeddingBagsCodegen"]], "fbgemm_gpu": [[17, "module-fbgemm_gpu"]], "module": [[17, "module-fbgemm_gpu"]], "example_method (c++ function)": [[21, "_CPPv4I0_NSt6size_tEE14example_method7int32_t1Tf"]], "example_method() (in module fbgemm_gpu.docs.examples)": [[23, "fbgemm_gpu.docs.examples.example_method"]]}}) \ No newline at end of file diff --git a/structfbgemm_1_1_b_c_s_r_matrix-members.html b/structfbgemm_1_1_b_c_s_r_matrix-members.html index e94f7b910..74e3a4289 100644 --- a/structfbgemm_1_1_b_c_s_r_matrix-members.html +++ b/structfbgemm_1_1_b_c_s_r_matrix-members.html @@ -71,7 +71,7 @@
    diff --git a/structfbgemm_1_1_b_c_s_r_matrix.html b/structfbgemm_1_1_b_c_s_r_matrix.html index b4dc019f8..b2796577b 100644 --- a/structfbgemm_1_1_b_c_s_r_matrix.html +++ b/structfbgemm_1_1_b_c_s_r_matrix.html @@ -71,7 +71,7 @@
    diff --git a/structfbgemm_1_1_blocking_factors-members.html b/structfbgemm_1_1_blocking_factors-members.html index 454ab0188..bdddba653 100644 --- a/structfbgemm_1_1_blocking_factors-members.html +++ b/structfbgemm_1_1_blocking_factors-members.html @@ -71,7 +71,7 @@
    diff --git a/structfbgemm_1_1_blocking_factors.html b/structfbgemm_1_1_blocking_factors.html index 684ea9ff9..707cee4e5 100644 --- a/structfbgemm_1_1_blocking_factors.html +++ b/structfbgemm_1_1_blocking_factors.html @@ -71,7 +71,7 @@
    diff --git a/structfbgemm_1_1_packing_traits.html b/structfbgemm_1_1_packing_traits.html index dd10abb86..c59cfa8a8 100644 --- a/structfbgemm_1_1_packing_traits.html +++ b/structfbgemm_1_1_packing_traits.html @@ -71,7 +71,7 @@
    @@ -79,7 +79,7 @@

    Detailed Description

    -
    template<typename T, typename accT, inst_set_t instSet, typename int8Type = void>
    +
    template<typename T, typename accT, inst_set_t instSet, typename int8Type = void>
    struct fbgemm::PackingTraits< T, accT, instSet, int8Type >
    Template Parameters
    diff --git a/structfbgemm_1_1_requantization_params-members.html b/structfbgemm_1_1_requantization_params-members.html index b1df7f70c..094edeca0 100644 --- a/structfbgemm_1_1_requantization_params-members.html +++ b/structfbgemm_1_1_requantization_params-members.html @@ -71,7 +71,7 @@
    diff --git a/structfbgemm_1_1_requantization_params.html b/structfbgemm_1_1_requantization_params.html index df49c8090..2a84c2693 100644 --- a/structfbgemm_1_1_requantization_params.html +++ b/structfbgemm_1_1_requantization_params.html @@ -71,7 +71,7 @@
    diff --git a/structfbgemm_1_1_tensor_quantization_params-members.html b/structfbgemm_1_1_tensor_quantization_params-members.html index b291fa77c..9c1661b20 100644 --- a/structfbgemm_1_1_tensor_quantization_params-members.html +++ b/structfbgemm_1_1_tensor_quantization_params-members.html @@ -71,7 +71,7 @@
    diff --git a/structfbgemm_1_1_tensor_quantization_params.html b/structfbgemm_1_1_tensor_quantization_params.html index 5d442e263..b8fffd460 100644 --- a/structfbgemm_1_1_tensor_quantization_params.html +++ b/structfbgemm_1_1_tensor_quantization_params.html @@ -71,7 +71,7 @@
    diff --git a/structfbgemm_1_1block__type__t-members.html b/structfbgemm_1_1block__type__t-members.html index 642731189..ef3abb0c8 100644 --- a/structfbgemm_1_1block__type__t-members.html +++ b/structfbgemm_1_1block__type__t-members.html @@ -71,7 +71,7 @@
    diff --git a/structfbgemm_1_1block__type__t.html b/structfbgemm_1_1block__type__t.html index 0d349b6cd..165103a53 100644 --- a/structfbgemm_1_1block__type__t.html +++ b/structfbgemm_1_1block__type__t.html @@ -71,7 +71,7 @@
    diff --git a/structfbgemm_1_1conv__param__t-members.html b/structfbgemm_1_1conv__param__t-members.html index 16462957b..4f6922709 100644 --- a/structfbgemm_1_1conv__param__t-members.html +++ b/structfbgemm_1_1conv__param__t-members.html @@ -71,7 +71,7 @@
    diff --git a/structfbgemm_1_1conv__param__t.html b/structfbgemm_1_1conv__param__t.html index 5de7862e7..4eb99ed8f 100644 --- a/structfbgemm_1_1conv__param__t.html +++ b/structfbgemm_1_1conv__param__t.html @@ -71,7 +71,7 @@
    diff --git a/structfbgemm_1_1is__8bit-members.html b/structfbgemm_1_1is__8bit-members.html index 3c554ab49..492824906 100644 --- a/structfbgemm_1_1is__8bit-members.html +++ b/structfbgemm_1_1is__8bit-members.html @@ -71,7 +71,7 @@
    diff --git a/structfbgemm_1_1is__8bit.html b/structfbgemm_1_1is__8bit.html index 1528420cf..cc9ed8044 100644 --- a/structfbgemm_1_1is__8bit.html +++ b/structfbgemm_1_1is__8bit.html @@ -71,7 +71,7 @@
    diff --git a/structfbgemm_1_1requantization_for_float_params__t-members.html b/structfbgemm_1_1requantization_for_float_params__t-members.html index ffbcc9887..1a943ceb4 100644 --- a/structfbgemm_1_1requantization_for_float_params__t-members.html +++ b/structfbgemm_1_1requantization_for_float_params__t-members.html @@ -71,7 +71,7 @@
    diff --git a/structfbgemm_1_1requantization_for_float_params__t.html b/structfbgemm_1_1requantization_for_float_params__t.html index 0f925c5e2..8f4c2f6db 100644 --- a/structfbgemm_1_1requantization_for_float_params__t.html +++ b/structfbgemm_1_1requantization_for_float_params__t.html @@ -71,7 +71,7 @@
    diff --git a/structfbgemm_1_1requantization_params__t-members.html b/structfbgemm_1_1requantization_params__t-members.html index 1f1b28dd1..3f81214da 100644 --- a/structfbgemm_1_1requantization_params__t-members.html +++ b/structfbgemm_1_1requantization_params__t-members.html @@ -71,7 +71,7 @@
    diff --git a/structfbgemm_1_1requantization_params__t.html b/structfbgemm_1_1requantization_params__t.html index 2ce92ddf3..d9f33dc58 100644 --- a/structfbgemm_1_1requantization_params__t.html +++ b/structfbgemm_1_1requantization_params__t.html @@ -71,7 +71,7 @@
    diff --git a/structfbgemm_1_1simd__info.html b/structfbgemm_1_1simd__info.html index 0d3c8ab17..dfc3d9086 100644 --- a/structfbgemm_1_1simd__info.html +++ b/structfbgemm_1_1simd__info.html @@ -71,7 +71,7 @@
    diff --git a/structfbgemm_1_1thread__type__t-members.html b/structfbgemm_1_1thread__type__t-members.html index 89f376656..3286142fc 100644 --- a/structfbgemm_1_1thread__type__t-members.html +++ b/structfbgemm_1_1thread__type__t-members.html @@ -71,7 +71,7 @@
    diff --git a/structfbgemm_1_1thread__type__t.html b/structfbgemm_1_1thread__type__t.html index acdef2d44..73c3d3d4e 100644 --- a/structfbgemm_1_1thread__type__t.html +++ b/structfbgemm_1_1thread__type__t.html @@ -71,7 +71,7 @@
    diff --git a/topics.html b/topics.html index 06a4f1885..4d0467513 100644 --- a/topics.html +++ b/topics.html @@ -86,16 +86,17 @@
    - - - - - - - - - - + + + + + + + + + + +
    Tinput type
     CUDA Operators
     Embedding CUDA Operators
     Embedding CPU Operators
     Example Method Group
     Jagged Tensor CUDA Operators
     Jagged Tensor Operators
     Layout Transformation CUDA Operators
     Layout Transformation CPU Operators
     Quantization Operators (CUDA)
     Quantize Data CPU Operators
     Quantization Utilities (Generic)
     Quantization Utilities (AVX2)
     Quantization Utilities (AVX512)
     Experimental-gen-ai-attention
     Example Method Group
     Jagged Tensor CUDA Operators
     Jagged Tensor Operators
     Layout Transformation CUDA Operators
     Layout Transformation CPU Operators
     Quantization Operators (CUDA)
     Quantize Data CPU Operators
     Quantization Utilities (Generic)
     Quantization Utilities (AVX2)
     Quantization Utilities (AVX512)