diff --git a/fbgemm_gpu-python-api/table_batched_embedding_ops.html b/fbgemm_gpu-python-api/table_batched_embedding_ops.html index b5a646dea..46530588a 100644 --- a/fbgemm_gpu-python-api/table_batched_embedding_ops.html +++ b/fbgemm_gpu-python-api/table_batched_embedding_ops.html @@ -409,7 +409,7 @@
  • beta1 (float, optional) – The beta1 value used by LAMB and ADAM

  • beta2 (float, optional) – The beta2 value used by LAMB and ADAM

  • pooling_mode (PoolingMode, optional) – Pooling mode (PoolingMode.SUM, PoolingMode.MEAN, PoolingMode.NONE)

  • -
  • device (torch.device, optional) – The current device to place tensors on

  • +
  • device (torch.device, optional) – The current device to place tensors on

  • bounds_check_mode (BoundsCheckMode, optional) – If not set to BoundsCheckMode.NONE, apply boundary check for indices (BoundsCheckMode.NONE, BoundsCheckMode.FATAL, BoundsCheckMode.WARNING, BoundsCheckMode.IGNORE)

  • diff --git a/searchindex.js b/searchindex.js index 309befdb8..12d59510b 100644 --- a/searchindex.js +++ b/searchindex.js @@ -1 +1 @@ -Search.setIndex({"docnames": ["fbgemm-cpp-api/QuantUtils", "fbgemm-development/BuildInstructions", "fbgemm_gpu-cpp-api/embedding_ops", "fbgemm_gpu-cpp-api/input_combine", "fbgemm_gpu-cpp-api/jagged_tensor_ops", "fbgemm_gpu-cpp-api/layout_transform_ops", "fbgemm_gpu-cpp-api/memory_utils", "fbgemm_gpu-cpp-api/merge_pooled_embeddings", "fbgemm_gpu-cpp-api/quantize_ops", "fbgemm_gpu-cpp-api/sparse_ops", "fbgemm_gpu-cpp-api/split_table_batched_embeddings", "fbgemm_gpu-development/BuildInstructions", "fbgemm_gpu-development/InstallationInstructions", "fbgemm_gpu-development/TestInstructions", "fbgemm_gpu-overview/jagged-tensor-ops/JaggedTensorOps", "fbgemm_gpu-python-api/jagged_tensor_ops", "fbgemm_gpu-python-api/table_batched_embedding_ops", "general/ContactUs", "general/Contributing", "general/License", "general/documentation/Cpp", "general/documentation/Overview", "general/documentation/Python", "general/documentation/Sphinx", "index"], "filenames": ["fbgemm-cpp-api/QuantUtils.rst", "fbgemm-development/BuildInstructions.rst", "fbgemm_gpu-cpp-api/embedding_ops.rst", "fbgemm_gpu-cpp-api/input_combine.rst", "fbgemm_gpu-cpp-api/jagged_tensor_ops.rst", "fbgemm_gpu-cpp-api/layout_transform_ops.rst", "fbgemm_gpu-cpp-api/memory_utils.rst", "fbgemm_gpu-cpp-api/merge_pooled_embeddings.rst", "fbgemm_gpu-cpp-api/quantize_ops.rst", "fbgemm_gpu-cpp-api/sparse_ops.rst", "fbgemm_gpu-cpp-api/split_table_batched_embeddings.rst", "fbgemm_gpu-development/BuildInstructions.rst", "fbgemm_gpu-development/InstallationInstructions.rst", "fbgemm_gpu-development/TestInstructions.rst", "fbgemm_gpu-overview/jagged-tensor-ops/JaggedTensorOps.rst", "fbgemm_gpu-python-api/jagged_tensor_ops.rst", "fbgemm_gpu-python-api/table_batched_embedding_ops.rst", "general/ContactUs.rst", "general/Contributing.rst", "general/License.rst", "general/documentation/Cpp.rst", "general/documentation/Overview.rst", "general/documentation/Python.rst", "general/documentation/Sphinx.rst", "index.rst"], "titles": ["Quantization Utilities", "Build Instructions", "Embedding Operators", "Combine Input Operators", "Jagged Tensor Operators", "Layout Transformation Operators", "CUDA Memory Operators", "Pooled Embeddings Operators", "Quantization Operators", "Sparse Data Operators", "Table Batched Embedding Operators", "Build Instructions", "Installation Instructions", "Testing FBGEMM_GPU", "Jagged Tensor Operators", "Jagged Tensor Operators", "Table Batched Embedding (TBE) Operators", "Contact Us", "Contributing", "License", "Adding Documentation to C++ Code", "Documentation", "Adding Documentation to Python Code", "Sphinx Documentation Pointers", "FBGEMM and FBGEMM_GPU Documentation Homepage"], "terms": {"templat": [0, 11, 20], "typenam": [0, 20], "t": [0, 1, 6, 9, 11, 16, 18, 20, 21], "layout_t": 0, "layout": [0, 24], "kcx": 0, "void": [0, 2, 6, 8, 10], "quantizegroupwis": 0, "const": [0, 2, 3, 4, 5, 6, 7, 8, 9, 22], "float": [0, 8, 15, 16, 20, 22], "src": 0, "int": [0, 8, 15, 16, 20, 22], "k": 0, "c": [0, 10, 12, 14, 19, 21, 22, 23], "x": [0, 4, 14, 20, 22], "g": [0, 1, 9, 11, 20, 22], "scale": 0, "std": [0, 3, 4, 5, 6, 7, 9, 10, 20, 22], "int32_t": [0, 20, 22], "zero_point": 0, "dst": 0, "point": [0, 8, 15, 20, 22], "data": [0, 6, 14, 16, 19, 24], "type": [0, 1, 8, 12, 14, 15, 16, 20], "paramet": [0, 6, 8, 9, 15, 16, 20, 21, 22], "output": [0, 4, 8, 9, 15, 16, 20, 22], "int8_t": 0, "uint8_t": [0, 8, 10], "ar": [0, 1, 4, 10, 11, 12, 14, 15, 16, 19, 20, 21, 22], "support": [0, 1, 11, 12, 14, 22, 24], "input": [0, 4, 6, 8, 9, 14, 15, 16, 20, 24], "tensor": [0, 2, 3, 5, 6, 7, 8, 9, 10, 16, 21, 22, 24], "kxc": 0, "correspond": [0, 9, 10, 14, 20, 22], "kcr": 0, "kctr": 0, "weight": [0, 2, 9, 10, 16], "time": [0, 11, 12, 14], "dimens": [0, 4, 6, 9, 14, 15, 16, 22], "krsc": 0, "ktrsc": 0, "channel": [0, 11, 12, 17], "number": [0, 1, 9, 11, 14, 15, 16, 21], "r": [0, 21], "": [0, 1, 6, 11, 13, 14, 18, 20, 21, 22], "group": [0, 14, 20], "function": [0, 1, 11, 20, 22], "perform": [0, 1, 8, 9, 14, 24], "channelwis": 0, "1": [0, 1, 9, 10, 11, 12, 13, 14, 15, 16, 21, 22, 23], "groupwis": 0, "per": [0, 14], "size": [0, 1, 6, 8, 9, 14, 15, 16], "should": [0, 9, 10, 11, 12, 14, 18, 20, 21, 22], "equal": [0, 14, 22], "zero": [0, 15, 22], "reprsent": 0, "fusedquantizedequant": 0, "int64_t": [0, 2, 3, 4, 5, 6, 8, 9, 10], "len": [0, 14], "tensorquantizationparam": 0, "qparam": 0, "thread_id": 0, "0": [0, 1, 8, 9, 10, 11, 12, 14, 15, 16, 22], "num_thread": 0, "noise_ratio": 0, "0f": 0, "fuse": [0, 8, 16], "integ": [0, 6, 8, 14], "dequant": 0, "kernel": [0, 1, 6, 13, 24], "acceler": 0, "awar": 0, "train": [0, 16, 24], "fp32": [0, 8, 16], "valu": [0, 4, 6, 8, 9, 10, 15, 16, 20, 21, 22], "u": [0, 11, 23, 24], "int8": [0, 16], "us": [0, 1, 6, 9, 11, 13, 14, 16, 17, 18, 19, 20, 21, 22, 23, 24], "provid": [0, 1, 11, 12, 13, 19, 20, 21, 22, 24], "back": [0, 6, 10, 11, 12], "inputtyp": 0, "floatorhalftofusednbitrowwisequantizedsbhalf": 0, "bit_rat": [0, 8], "size_t": [0, 8, 20], "input_row": 0, "input_column": 0, "convert": [0, 6, 8, 14, 15, 22], "fp16": [0, 8, 16], "rowwis": [0, 8, 16], "bitrat": 0, "specifi": [0, 1, 8, 9, 11, 15, 16], "bit": [0, 8], "bia": [0, 8], "each": [0, 9, 11, 14, 15, 16, 22], "row": [0, 4, 10, 14, 15, 16, 22], "store": [0, 9, 10], "itself": [0, 14, 21], "end": [0, 12, 14, 23], "can": [0, 1, 8, 9, 11, 12, 14, 20, 21, 22, 23], "4": [0, 1, 11, 12, 14, 15, 16, 22], "8": [0, 1, 8, 11, 14, 16], "uint32_t": 0, "xor128": 0, "random": 0, "gener": [0, 1, 9, 11, 12, 20, 23], "9": [0, 1, 11, 14, 16], "base": [0, 9, 10, 11, 14], "thi": [0, 1, 4, 6, 7, 9, 11, 12, 14, 17, 18, 19, 20, 22, 23, 24], "paper": 0, "findminmax": 0, "m": [0, 11, 12, 13], "min": 0, "max": [0, 16], "find": [0, 10, 11], "matrix": [0, 1, 15, 24], "bool": [0, 6, 7, 8, 10, 16], "a_symmetr": 0, "b_symmetr": 0, "quantizationgranular": 0, "q_gran": 0, "has_bia": 0, "fuse_relu": 0, "bias_typ": 0, "direct": [0, 10, 12, 19, 20, 22, 23], "fals": [0, 6, 16, 21], "requantizeoutputprocessingavx2": 0, "out": [0, 11, 17, 19, 21], "inp": 0, "block_type_t": 0, "block": [0, 20, 22, 23], "ld_out": 0, "ld_in": 0, "requantizationparams_t": 0, "requant": 0, "avx2": [0, 1], "i": [0, 1, 4, 6, 8, 9, 10, 11, 12, 14, 15, 16, 18, 19, 20, 21, 22, 23, 24], "c_per_g": 0, "requantizeoutputprocessinggconvavx512": 0, "avx512": 0, "note": [1, 10, 11, 12, 20, 21, 22, 23], "The": [1, 6, 8, 9, 12, 13, 14, 15, 16, 18, 20, 21, 22, 23], "most": [1, 11, 12, 14, 21], "date": [1, 11, 12, 21], "embed": [1, 11, 12, 21, 24], "script": [1, 11, 12, 21], "bundl": [1, 11, 12, 21], "repo": [1, 11, 12, 21, 22], "under": [1, 11, 12, 18, 19, 21, 22], "setup_env": [1, 11, 12, 21], "bash": [1, 11, 12, 21], "step": [1, 11, 12, 14, 21, 22], "fbgemm_gpu": [1, 6, 14, 16, 17, 18, 19, 20, 22], "follow": [1, 9, 11, 12, 14, 19, 20, 21, 22], "toolchain": [1, 11, 12], "run": [1, 11, 12, 13, 21], "cpu": [1, 6, 7, 13, 21], "higher": 1, "In": [1, 9, 12, 14, 18, 20, 22], "doe": [1, 2, 12, 20, 21, 22], "have": [1, 9, 10, 14, 21], "ani": [1, 9, 15, 18, 19, 21, 22], "intel": 1, "mkl": 1, "howev": [1, 11, 14, 19], "comparison": 1, "some": [1, 11, 14, 21], "benchmark": 1, "If": [1, 11, 12, 16, 18, 20, 21, 22], "found": [1, 11, 12, 21], "path": [1, 11, 20, 23], "through": [1, 18, 20, 22], "intel_mkl_dir": 1, "variabl": 1, "built": [1, 11, 12, 21, 24], "report": [1, 12], "otherwis": [1, 6, 12, 19], "subset": 1, "all": [1, 9, 10, 11, 12, 14, 16, 19, 21], "three": [1, 14], "git": [1, 11], "submodul": [1, 11], "custom": [1, 23], "version": [1, 12], "desir": [1, 11, 14, 15, 20], "thei": [1, 11, 21, 23], "asmjit_src_dir": 1, "cpuinfo_src_dir": 1, "googletest_source_dir": 1, "With": 1, "inner": [1, 14], "take": [1, 11], "one": [1, 8, 9, 10, 15, 16, 20, 22], "doesn": 1, "fit": [1, 19], "approach": 1, "so": [1, 9, 11, 12, 14], "implement": [1, 14], "dynam": 1, "effici": [1, 24], "shape": [1, 14, 16], "specif": [1, 9, 11, 16, 19], "vector": [1, 3, 4, 5, 6, 7, 15, 22], "code": [1, 19, 21], "third": 1, "parti": 1, "call": [1, 6, 12], "detect": [1, 13], "runtim": [1, 11], "pytorch": [1, 14, 17, 21, 22, 24], "project": [1, 18], "dispatch": [1, 6], "optim": [1, 8, 16], "test": [1, 11, 12, 18, 24], "you": [1, 18, 20, 22], "don": [1, 9, 11, 21], "want": [1, 18], "togeth": [1, 20, 21], "default": [1, 9, 11, 12, 16], "turn": [1, 21], "off": [1, 12, 17], "simpli": [1, 11], "fbgemm_build_test": 1, "conda": [1, 21], "For": [1, 6, 13, 14, 17, 19, 20, 21, 22, 23], "platform": [1, 11, 19], "gcc": [1, 11], "17": [1, 11], "sysroot": [1, 11], "packag": [1, 13, 21], "also": [1, 11, 16, 23], "need": [1, 11, 12, 13, 14, 18, 20, 22, 23], "avoid": [1, 11], "issu": [1, 4, 6, 11, 12, 17], "miss": [1, 10, 11], "symbol": 1, "glibcxx": [1, 11], "when": [1, 4, 9, 11, 13, 14, 16, 20, 21, 23], "n": [1, 8, 11, 12, 23], "env_nam": [1, 11, 12], "y": [1, 4, 11, 12, 15, 21], "gxx_linux": [1, 11], "64": [1, 11, 14], "10": [1, 11, 12, 14], "sysroot_linux": [1, 11], "2": [1, 11, 12, 14, 15, 16, 20, 22, 23], "forg": [1, 11, 21], "while": [1, 11, 21], "newer": [1, 11], "binari": [1, 11, 19], "compat": 1, "older": [1, 11, 12], "system": [1, 4, 11, 12, 14], "ubuntu": [1, 11], "20": [1, 11], "04": [1, 11], "cento": [1, 11], "stream": [1, 11], "becaus": [1, 11, 14], "refer": [1, 11, 14, 21, 22], "from": [1, 6, 8, 9, 10, 11, 12, 13, 14, 16, 18, 19, 20, 21, 22, 23], "libstdc": [1, 11], "6": [1, 11, 12, 14], "To": [1, 11, 13, 23], "see": [1, 6, 11, 12, 14, 20, 22, 23], "what": [1, 11, 21], "glibc": 1, "avail": [1, 6, 11, 21], "libcxx_path": [1, 11], "print": [1, 11, 12, 16, 22], "objdump": [1, 11], "tc": [1, 11], "grep": [1, 11], "glibc_": [1, 11], "sed": [1, 11], "sort": [1, 9, 10, 11], "vu": [1, 11], "cat": [1, 11], "glibcxx_": [1, 11], "machin": [1, 11, 12, 13, 24], "microsoft": [1, 8], "visual": 1, "studio": 1, "2019": 1, "recommend": [1, 4, 11, 12, 14], "necessari": [1, 11], "ninja": [1, 11], "etc": [1, 11, 16], "make": [1, 10, 11, 18, 20, 21, 22], "openbla": 1, "dev": [1, 11], "onli": [1, 9, 10, 13, 14, 18, 20, 21, 23], "clone": [1, 11], "along": [1, 11, 12], "its": [1, 6, 9, 11, 16, 19, 21, 23], "insid": [1, 11, 12, 21, 23], "recurs": [1, 11], "http": [1, 11, 12, 18, 20, 21, 22], "github": [1, 11, 18], "com": [1, 11, 18], "cd": [1, 11, 13, 21], "assum": [1, 9], "process": [1, 4, 12, 14, 18, 22], "straightforward": 1, "creat": [1, 6, 11, 14, 18, 20, 22, 23], "directori": [1, 11, 13, 18, 20, 21], "mkdir": 1, "doxygen": [1, 20, 21], "document": [1, 6, 18, 19], "add": [1, 15, 18, 20, 21, 22], "dfbgemm_build_doc": 1, "ON": [1, 19], "duse_sanit": 1, "address": [1, 11], "dfbgemm_library_typ": 1, "share": [1, 6], "dpython_execut": 1, "which": [1, 9, 11, 12, 14, 16, 21], "python3": [1, 12], "j": [1, 14], "verbos": 1, "likewis": 1, "veri": [1, 20, 21, 22], "target": [1, 6, 8, 9, 11, 14, 20, 21, 22, 23], "architectur": [1, 11, 12], "bc": [1, 11], "x64": 1, "program": [1, 18], "file": [1, 11, 12, 17, 18, 20, 21, 22, 23], "x86": [1, 24], "enterpris": 1, "vc": 1, "auxiliari": 1, "vcvarsal": 1, "bat": 1, "build_dir": 1, "dfbgemm_build_benchmark": 1, "dcmake_build_typ": 1, "releas": [1, 12], "dcmake_c_compil": 1, "cl": 1, "ex": 1, "dcmake_cxx_compil": 1, "v": [1, 4, 13, 15], "bounds_check_indices_cuda": 2, "rows_per_t": 2, "indic": [2, 10, 14, 16], "offset": [2, 4, 9, 10, 15, 16], "bounds_check_mod": [2, 16], "warn": [2, 16, 20], "c10": [2, 4, 6, 8, 10], "option": [2, 4, 6, 10, 11, 15, 16], "b_ofset": 2, "max_b": 2, "int_nbit_split_embedding_codegen_lookup_funct": 2, "dev_weight": [2, 10], "uvm_weight": [2, 10], "weights_plac": [2, 10], "weights_offset": [2, 10], "weights_ti": [2, 10], "d_offset": [2, 8, 10], "total_d": [2, 10, 16], "max_int2_d": 2, "max_int4_d": 2, "max_int8_d": 2, "max_float16_d": 2, "max_float32_d": 2, "pooling_mod": [2, 16], "indice_weight": 2, "output_dtyp": [2, 8, 16], "lxu_cache_weight": [2, 10], "lxu_cache_loc": [2, 10], "row_align": [2, 10], "max_float8_d": 2, "fp8_exponent_bit": 2, "fp8_exponent_bia": 2, "int_nbit_split_embedding_uvm_caching_codegen_lookup_funct": 2, "cache_hash_size_cumsum": [2, 10], "total_cache_hash_s": [2, 10], "cache_index_table_map": [2, 10], "lxu_cache_st": [2, 10], "lxu_stat": 2, "simlar": 2, "uvm_cach": 2, "lookup": [2, 10], "pruned_hashmap_lookup_cuda": 2, "hash_tabl": 2, "hash_table_offset": 2, "pruned_array_lookup_cuda": 2, "index_remap": 2, "index_remappings_offset": 2, "int_nbit_split_embedding_codegen_lookup_function_cpu": 2, "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu": 2, "pruned_hashmap_insert_unweighted_cpu": 2, "dense_indic": 2, "pruned_hashmap_lookup_unweighted_cpu": 2, "pruned_array_lookup_cpu": 2, "tupl": [3, 4, 9, 10, 16], "tbe_input_combine_cpu": 3, "indices_list": 3, "offsets_list": 3, "per_sample_weight": [3, 16], "include_last_offset": 3, "padding_fused_tbe_input_combine_cpu": 3, "batch_siz": 3, "solv": 4, "differ": [4, 9, 14], "length": [4, 9, 15, 16, 22], "often": 4, "occur": [4, 20], "spars": [4, 14, 24], "featur": [4, 9, 14, 16, 17], "well": [4, 9, 11, 20], "natur": [4, 14], "languag": [4, 14, 23], "batch": [4, 9, 14, 15, 24], "jagged_to_padded_dense_forward": 4, "symintarrayref": 4, "max_length": [4, 15], "doubl": [4, 8, 9], "padding_valu": [4, 15], "jagged_dense_elementwise_add_jagged_output_cuda": 4, "x_valu": [4, 15], "x_offset": [4, 15, 22], "where": [4, 6, 9, 14, 15, 16], "dens": [4, 15, 22], "jagged_to_padded_dens": [4, 15], "jagged_dense_elementwise_add": [4, 15], "jagged_dense_elementwise_mul": [4, 15], "batched_dense_vec_jagged_2d_mul": [4, 15], "a_valu": [4, 15], "a_offset": [4, 15], "dense_to_jag": [4, 15], "symint": 4, "total_l": [4, 15], "jagged_dense_elementwise_add_jagged_output": [4, 15], "jagged_1d_to_dens": [4, 15], "max_l": 4, "jagged_2d_to_dens": [4, 11, 12, 15, 21, 22], "max_sequence_length": [4, 15, 22], "recat_embedding_grad_output_cuda": 5, "grad_output": 5, "num_features_per_rank": 5, "recat_embedding_grad_output_mixed_d_cuda": 5, "dim_sum_per_rank": 5, "recat_embedding_grad_output_mixed_d_batch_cuda": 5, "cumsum_dim_sum_per_rank": 5, "recat_embedding_grad_output_mixed_d_cpu": 5, "new_managed_tensor": 6, "self": 6, "alloc": [6, 20], "an": [6, 9, 12, 14, 16, 20, 21, 22, 23], "unifi": 6, "manag": [6, 11, 12, 16], "uvm": [6, 13], "Then": 6, "set": [6, 10, 13, 14, 15, 16], "prefer": [6, 12], "storag": [6, 8, 10], "locat": [6, 10, 11, 14], "host": 6, "establish": 6, "map": [6, 9, 10, 14, 16], "devic": [6, 7, 11, 13, 16], "return": [6, 8, 9, 15, 16, 20, 21, 22], "A": [6, 8, 12, 14, 15, 16, 19, 20, 21, 22], "new": [6, 8, 10, 20, 21, 22], "new_managed_tensor_meta": 6, "placehold": 6, "meta": [6, 19], "kei": 6, "empti": [6, 14, 15, 23], "new_host_mapped_tensor": 6, "new_unified_tensor": 6, "is_host_map": 6, "either": [6, 8, 9, 11, 12], "whether": [6, 11, 19], "depend": [6, 8, 11, 12, 14], "new_vanilla_managed_tensor": 6, "allow": 6, "automat": [6, 9, 13, 21], "uvm_storag": 6, "check": [6, 16], "gpu": [6, 11, 12, 13, 24], "true": [6, 16], "is_uvm_tensor": 6, "BUT": [6, 19], "non": [6, 16], "uvm_to_cpu": 6, "effect": [6, 14], "move": 6, "uvm_to_devic": 6, "prototyp": 6, "same": [6, 9, 11, 14, 15, 20, 21, 22], "whose": 6, "uvm_cuda_mem_advis": 6, "cuda_memory_advis": 6, "cudamemadvis": 6, "cudamemoryadvis": 6, "enum": [6, 8], "python": [6, 11, 13, 20, 21, 23], "side": [6, 20, 22, 24], "namespac": 6, "over": [6, 11], "valid": 6, "here": [6, 11, 18, 20, 21, 22, 23], "more": [6, 11, 16, 20, 22, 23], "inform": [6, 14, 22, 23], "uvm_cuda_mem_prefetch_async": 6, "device_t": 6, "cudamemprefetchasync": 6, "prefetch": 6, "destin": 6, "uvm_mem_advice_dont_fork": 6, "madvis": 6, "madv_dontfork": 6, "workaround": 6, "driver": [6, 11], "un": 6, "page": [6, 18, 23, 24], "tabl": [6, 9, 14, 24], "fork": [6, 18], "caus": [6, 11, 12, 19, 21], "slowdown": 6, "next": [6, 14, 20, 22], "access": [6, 16], "uvm_to_cpu_clon": 6, "copi": 6, "contigu": [6, 9], "singl": [6, 8], "thread": 6, "memcpi": 6, "contain": [6, 11, 14, 15, 16, 22], "section": [7, 11, 22], "includ": [7, 11, 19, 20, 22], "cuda": [7, 16, 24], "variou": 7, "all_to_one_devic": 7, "inputtensor": 7, "target_devic": 7, "permute_pooled_embs_split_gpu": 7, "pooled_emb": 7, "offset_dim_list": 7, "permute_list": 7, "inv_offset_dim_list": 7, "inv_permute_list": 7, "permute_pooled_embs_auto_grad_split_gpu": 7, "permute_pooled_embs_auto_grad_gpu": 7, "permute_pooled_embs_cpu_impl": 7, "allow_dupl": 7, "permute_pooled_embs_split_cpu": 7, "permute_pooled_embs_auto_grad_split_cpu": 7, "permute_pooled_embs_auto_grad": 7, "permute_pooled_embs_auto_grad_cpu": 7, "model": [8, 9], "techniqu": 8, "reduc": 8, "larg": 8, "order": [8, 14, 18], "achiev": [8, 12], "better": [8, 20], "small": 8, "loss": [8, 19], "accuraci": 8, "_float_to_bfloat16_gpu": 8, "brain": 8, "bfloat16": 8, "_bfloat16_to_float_gpu": 8, "_float_to_fp8rowwise_gpu": 8, "forward": 8, "fp8": 8, "dtype": [8, 16], "sparsetyp": [8, 16], "bf16": 8, "throw": [8, 20], "error": [8, 12, 20, 21, 22], "_fp8rowwise_to_float_gpu": 8, "represent": [8, 14], "_float_to_fused8bitrowwise_gpu": 8, "_half_to_fused8bitrowwise_gpu": 8, "half": 8, "_single_or_half_precision_to_fused8bitrowwise_gpu": 8, "_fused8bitrowwise_to_float_gpu": 8, "_fused8bitrowwise_to_half_gpu": 8, "_fused8bitrowwise_to_single_or_half_precision_gpu": 8, "scale_bias_last": 8, "quant_padding_float_typ": 8, "_fused8bitrowwise_to_float_mixed_dim_gpu": 8, "kfloat": 8, "khalf": 8, "_float_to_fusednbitrowwise_gpu": 8, "_half_to_fusednbitrowwise_gpu": 8, "_single_or_half_precision_to_fusednbitrowwise_gpu": 8, "_fusednbitrowwise_to_float_gpu": 8, "_fusednbitrowwise_to_half_gpu": 8, "_fusednbitrowwise_to_single_or_half_precision_gpu": 8, "_float_to_hfp8_gpu": 8, "ebit": 8, "exponent_bia": 8, "max_po": 8, "hybrid": 8, "hfp8": 8, "_hfp8_to_float_gpu": 8, "_float_to_msfp_gpu": 8, "bounding_box_s": 8, "mbit": 8, "min_po": 8, "msfp": 8, "_msfp_to_float_gpu": 8, "_float_to_paddedfp8rowwise_gpu": 8, "row_dim": 8, "pad": [8, 14, 15, 22], "_paddedfp8rowwise_to_float_gpu": 8, "output_last_dim": 8, "_fused8bitrowwise_to_float_cpu_out": 8, "_float_to_fused8bitrowwise_cpu_out": 8, "float_to_fused8bitrowwise_cpu": 8, "half_to_fused8bitrowwise_cpu": 8, "float_or_half_to_fused8bitrowwise_cpu": 8, "fused8bitrowwise_to_float_cpu": 8, "fused8bitrowwise_to_half_cpu": 8, "fused8bitrowwise_to_float_or_half_cpu": 8, "float_to_fp8rowwise_cpu": 8, "fp8rowwise_to_float_cpu": 8, "fusednbitrowwise_to_float_cpu": 8, "fusednbitrowwise_to_half_cpu": 8, "fusednbitrowwise_to_float_or_half_cpu": 8, "floattofp8quantized_ref": 8, "nrow": 8, "ncol": 8, "fp8quantizedtofloat_ref": 8, "expand_into_jagged_permute_cuda": 9, "permut": 9, "input_offset": 9, "output_offset": 9, "output_s": 9, "expand_into_jagged_permut": 9, "expand": 9, "index": [9, 10, 11, 12, 14, 20, 22], "case": [9, 11, 12, 14, 18], "ha": [9, 12, 14, 18, 20, 21], "across": [9, 11], "rank": [9, 14], "level": 9, "exclus": 9, "op": [9, 12, 15, 22], "bag": [9, 16, 24], "posit": [9, 16], "sit": 9, "after": [9, 11, 12, 13, 14, 16, 21, 22, 23], "we": [9, 14, 18], "deriv": [9, 14, 19], "arrai": [9, 15, 22], "comput": [9, 11, 12, 16], "formula": 9, "output_permut": 9, "table_offset": 9, "bag_offset": 9, "histogram_binning_calibration_cpu": 9, "logit": 9, "bin_num_exampl": 9, "bin_num_posit": 9, "positive_weight": 9, "lower_bound": 9, "upper_bound": 9, "bin_ctr_in_use_aft": 9, "bin_ctr_weight_valu": 9, "divid": [9, 14], "predict": 9, "rang": [9, 14], "e": [9, 11, 14, 20, 22, 23], "b": [9, 11, 14, 15, 16, 20, 21, 22, 23], "bin": [9, 11], "two": [9, 14, 15, 16, 21], "exampl": [9, 11, 12, 13, 15, 16, 20, 21, 22, 23], "fall": [9, 11, 12], "bucket": [9, 11], "basic": [9, 22], "histogram": 9, "As": [9, 11, 12, 14], "result": [9, 15], "statist": 9, "real": 9, "ctr": 9, "num_po": 9, "num_exampl": 9, "final": 9, "calibr": 9, "pre": [9, 11, 12], "cali": 9, "wai": [9, 19], "within": 9, "suffici": [9, 18, 21], "That": 9, "fine": 9, "grain": 9, "modul": [9, 12, 16, 22], "theoret": 9, "layer": 9, "fix": 9, "uncalibr": 9, "befor": [9, 16, 23], "appli": [9, 11, 14, 16], "sigmoid": 9, "calibart": 9, "pass": [9, 16, 18, 21], "argument": [9, 20, 21, 22], "lower": 9, "bound": [9, 14], "calibration_target": 9, "observ": 9, "sum": [9, 15, 16], "statisct": 9, "final_calibrated_predict": 9, "bin_ctr_weight": 9, "bin_ctr": 9, "calibrated_predict": 9, "bin_id": 9, "generic_histogram_binning_calibration_by_feature_cpu": 9, "segment_valu": 9, "segment_length": 9, "num_seg": 9, "bin_boundari": 9, "extens": [9, 20, 21], "ectr": 9, "abov": [9, 12, 14, 19, 20, 22, 23], "accept": [9, 18], "keyjaggedtensor": 9, "num_bin": 9, "longer": [9, 17, 20], "still": [9, 11], "parambin_ctr_weight_valu": 9, "get_unique_indices_cuda": 10, "linear_indic": 10, "max_indic": 10, "compute_count": 10, "dedupl": 10, "pair": [10, 23], "lru_cache_find_uncached_cuda": 10, "unique_indic": 10, "unique_indices_length": 10, "time_stamp": 10, "lru_stat": 10, "gather_cache_stat": 10, "uvm_cache_stat": 10, "lock_cache_lin": 10, "lxu_cache_locking_count": 10, "lru": [10, 16], "cach": [10, 11, 16], "uncach": 10, "them": 10, "host_lxu_cache_slot": 10, "h_in": 10, "cache_set": [10, 16], "linearize_cache_indices_cuda": 10, "linear": 10, "uniqu": [10, 23], "linearize_cache_indices_from_row_idx_cuda": 10, "update_table_indic": 10, "update_row_indic": 10, "format": [10, 21, 22], "inplac": 10, "updat": [10, 11, 12, 16, 18], "lru_cache_populate_cuda": 10, "hash_size_cumsum": 10, "linear_cache_indic": 10, "stochastic_round": [10, 16], "fetch": 10, "insert": [10, 23], "timestep": 10, "lru_cache_populate_byte_cuda": 10, "byte": 10, "element": [10, 14], "direct_mapped_lru_cache_populate_byte_cuda": 10, "lxu_cache_miss_timestamp": 10, "assoc": 10, "variant": [10, 11, 12, 21], "lfu_cache_populate_cuda": 10, "lfu_stat": 10, "lfu": [10, 16], "lfu_cache_populate_byte_cuda": 10, "lxu_cache_lookup_cuda": 10, "invalid_index": 10, "num_uniq_cache_indic": 10, "lxu_cache_locations_output": 10, "look": [10, 16], "up": [10, 16], "slot": 10, "sentinel": 10, "direct_mapped_lxu_cache_lookup_cuda": 10, "lxu_cache_flush_cuda": 10, "flush": 10, "reset_weight_momentum_cuda": 10, "momentum1_dev": 10, "momentum1_uvm": 10, "momentum1_plac": 10, "momentum1_offset": 10, "pruned_indic": 10, "pruned_indices_offset": 10, "logical_table_id": 10, "buffer_id": 10, "lxu_cache_locking_counter_decrement_cuda": 10, "decrement": 10, "counter": 10, "lxu_cache_locations_update_cuda": 10, "lxu_cache_locations_new": 10, "fbgemm": [11, 12, 15, 17, 18, 19, 21, 22], "reproduc": [11, 12, 18, 19], "export": [11, 13], "platform_nam": 11, "unam": 11, "prefix": [11, 23], "miniconda_prefix": 11, "home": 11, "download": [11, 12], "wget": 11, "q": 11, "anaconda": 11, "miniconda3": 11, "latest": 11, "sh": 11, "o": [11, 12], "p": 11, "load": [11, 14, 22], "shortcut": 11, "bashrc": 11, "command": [11, 12, 20, 21], "against": [11, 13], "env": [11, 12], "name": [11, 12, 19, 20, 22], "python_vers": 11, "3": [11, 14, 15, 16, 19, 22], "12": [11, 14, 16], "upgrad": 11, "pyopenssl": 11, "22": [11, 14], "requir": [11, 12, 14, 16, 21, 22], "recent": [11, 12], "nvcc": 11, "capabl": [11, 13], "5": [11, 14, 16], "done": [11, 12], "bare": 11, "metal": 11, "neither": [11, 19], "nor": [11, 19], "nvidia": 11, "present": [11, 22], "sinc": [11, 14], "setup": [11, 12], "pull": [11, 12, 21], "linux": [11, 12], "distribut": [11, 19], "11": [11, 12, 14], "entrypoint": 11, "devel": 11, "ubuntu22": 11, "rest": [11, 12], "mai": [11, 12, 14, 19], "construct": [11, 12, 14], "mechan": 11, "full": [11, 12, 23], "nvml": 11, "org": [11, 12, 22], "cuda_vers": 11, "label": 11, "verifi": [11, 12, 20, 22], "cuda_runtim": 11, "h": [11, 15, 20], "libnvidia": [11, 12], "ml": [11, 12], "conda_prefix": 11, "printenv": 11, "extract": 11, "given": [11, 14, 15], "url": [11, 12], "builder": 11, "blob": 11, "main": [11, 18], "common": [11, 12, 14, 22], "install_cuda": 11, "cudnn_url": 11, "redist": 11, "x86_64": 11, "26_cuda12": 11, "archiv": 11, "tar": 11, "xz": 11, "unpack": 11, "amd": [11, 12], "minim": 11, "termin": 11, "both": [11, 17, 19, 21], "minimum": [11, 20, 21, 22], "oper": [11, 12, 24], "guid": [11, 22], "disabl": 11, "apt": 11, "prompt": 11, "debian_frontend": 11, "noninteract": 11, "db": 11, "radeon": 11, "amdgpu": 11, "focal": 11, "install_5": 11, "50601": 11, "1_all": 11, "deb": 11, "usecas": 11, "hiplibsdk": 11, "dkm": 11, "hipifi": 11, "clang": 11, "hip": 11, "oppos": 11, "reli": 11, "fbgemm_cpu": 11, "librari": [11, 21, 24], "cmake": 11, "click": 11, "hypothesi": [11, 12], "jinja2": 11, "numpi": [11, 12], "scikit": [11, 12], "offici": 11, "homepag": 11, "authorit": [11, 12, 21], "how": [11, 12, 13, 22], "nightli": [11, 12], "rc": 11, "without": [11, 19], "alwai": 11, "reliabl": 11, "known": [11, 16], "arriv": 11, "hour": 11, "later": 11, "than": [11, 14], "window": 11, "silent": 11, "place": [11, 16], "artifact": 11, "select": 11, "dure": [11, 14, 16, 22], "thu": [11, 16], "import": [11, 12, 16, 22, 23], "first": [11, 20, 22, 23], "prior": [11, 12, 19], "much": [11, 20], "determinist": 11, "torch": [11, 12, 15, 16, 21, 22], "whl": [11, 12], "cu121": [11, 12], "rocm5": [11, 12], "write": [11, 12, 21, 22], "ensur": [11, 12, 18], "properli": 11, "__version__": 11, "cuda_cmake_macro": 11, "txt": [11, 21, 23], "tag": [11, 20, 23], "fbgemm_vers": 11, "v0": 11, "fbgemm_": 11, "addit": [11, 14, 15], "flow": 11, "keep": 11, "state": 11, "becom": 11, "stale": 11, "problem": 11, "re": [11, 12], "attempt": 11, "failur": [11, 12], "due": 11, "clear": [11, 18], "py": [11, 12, 13, 21, 22], "clean": [11, 21], "must": [11, 12, 13, 14, 16, 19, 23], "package_nam": 11, "fbgemm_gpu_": 11, "It": [11, 12, 14], "convent": 11, "major": 11, "minor": 11, "py312": 11, "python_tag": 11, "determin": [11, 14], "processor": 11, "arch": 11, "python_plat_nam": 11, "manylinux2014_": 11, "maco": 11, "macosx_10_9_": 11, "arm64": 11, "macosx_11_0_": 11, "win_": 11, "cpu_onli": 11, "flag": [11, 21], "bdist_wheel": 11, "package_vari": 11, "plat": 11, "made": [11, 21], "presenc": 11, "unabl": 11, "cudacxx": 11, "cuda_bin_path": 11, "cub": 11, "applic": [11, 16, 20, 22], "cub_dir": 11, "header": [11, 20, 23], "cudnn_include_dir": 11, "cudnn_librari": 11, "lib": [11, 12], "nvml_lib_path": 11, "sm70": [11, 12], "80": 11, "v100": [11, 12], "a100": [11, 12], "current": [11, 12, 14, 16], "cuda_arch_list": 11, "7": [11, 12, 14, 15, 16], "unset": 11, "torch_cuda_arch_list": 11, "exist": [11, 20, 22], "preced": 11, "dtorch_cuda_arch_list": 11, "invoc": [11, 21], "rocm_path": 11, "pytorch_rocm_arch": 11, "gfx906": 11, "gfx908": 11, "gfx90a": 11, "wiki": 11, "gentoo": 11, "list": [11, 14, 15, 16, 19, 20, 22], "rocminfo": 11, "gfx": 11, "dhip_root_dir": 11, "dcmake_c_flag": 11, "dtorch_use_hip_dsa": 11, "dcmake_cxx_flag": 11, "complet": [11, 18, 21], "actual": 11, "correct": 11, "lot": 11, "jinja": 11, "instanti": 11, "sure": [11, 18, 20, 22], "accident": 11, "cours": 11, "fbgemm_gpu_lib_path": 11, "fbgemm_gpu_pi": [11, 12], "defin": [11, 14, 20], "nm": 11, "gdcu": 11, "referenc": 11, "certain": 11, "gdc": 11, "merge_pooled_embed": [11, 12], "isol": [12, 21], "build": [12, 13, 20, 22, 24], "work": [12, 14, 18], "sm80": 12, "respect": 12, "other": [12, 14, 19, 20, 21, 22], "scratch": 12, "guarante": 12, "especi": 12, "displai": [12, 23], "do": [12, 18], "smi": 12, "515": 12, "76": 12, "persist": 12, "bu": [12, 23], "id": 12, "disp": 12, "volatil": 12, "uncorr": 12, "ecc": 12, "fan": 12, "temp": 12, "perf": 12, "pwr": 12, "usag": [12, 21, 22], "cap": 12, "memori": [12, 16, 24], "util": [12, 24], "mig": 12, "a10g": 12, "00000000": 12, "00": 12, "1e": 12, "31c": 12, "p0": 12, "59w": 12, "300w": 12, "0mib": 12, "23028mib": 12, "gi": 12, "ci": 12, "pid": 12, "No": 12, "though": 12, "expos": 12, "detail": 12, "onc": [12, 18], "imag": 12, "launch": 12, "alreadi": [12, 18, 20, 22], "toolkit": 12, "interfac": 12, "concis": 12, "info": [12, 20, 22], "dieedg": 12, "avgpwr": 12, "sclk": 12, "mclk": 12, "pwrcap": 12, "vram": 12, "33": 12, "0c": 12, "37": 12, "0w": 12, "300mhz": 12, "1200mhz": 12, "auto": [12, 21], "290": 12, "32": 12, "39": 12, "log": 12, "difficult": 12, "relev": [12, 20], "correctli": [12, 20, 21], "link": [12, 21], "encount": 12, "signatur": [12, 21], "traceback": 12, "last": 12, "root": [12, 18], "miniconda": 12, "mycondaenv": 12, "site": 12, "_op": [12, 21], "line": [12, 22, 23], "565": 12, "__getattr__": 12, "overload_nam": 12, "_c": 12, "_jit_get_oper": 12, "qualified_op_nam": 12, "runtimeerror": 12, "except": [12, 20, 22], "wa": 12, "string": [12, 23], "post47": 12, "py3": 12, "aarch64": 12, "egg": 12, "__init__": [12, 22], "21": 12, "_fbgemm_gpu_doc": 12, "noqa": 12, "f401": 12, "e402": 12, "18": 12, "569": 12, "rais": [12, 22], "attributeerror": [12, 22], "_opnamespac": 12, "object": [12, 14], "attribut": [12, 22], "cli": 12, "main_run": 12, "execut": [12, 13], "47": 12, "fail": [12, 13, 20], "_zn6fbgemm48floatorhalftofusednbitrowwisequantizedsbhalfavx2itli2eeevpkt_miph": 12, "appear": 12, "reason": [12, 21], "libtorch": 12, "visibl": 12, "ld_library_path": 12, "incorrectli": [12, 21], "declar": [12, 20], "were": [12, 15], "pr": [12, 20, 21, 22], "1618": 12, "former": 12, "resolv": 12, "manual": [12, 20], "latter": 12, "seriou": 12, "tha": 12, "develop": [12, 21], "bench": 13, "good": [13, 19], "instal": [13, 21, 24], "pytest": 13, "rsx": 13, "w": 13, "ignor": [13, 16, 21], "pytestcollectionwarn": 13, "split_table_batched_embeddings_test": 13, "quantize_ops_test": 13, "sparse_ops_test": 13, "split_embedding_inference_converter_test": 13, "mode": [13, 16], "cuda_visible_devic": 13, "environ": 13, "enabl": 13, "debug": 13, "cuda_launch_block": 13, "fbgemm_test_with_rocm": 13, "hip_launch_block": 13, "split_table_batched_embeddings_benchmark": 13, "purpos": [14, 15, 16, 19], "handl": 14, "consecut": 14, "similar": [14, 16], "nestedtensor": 14, "raggedtensor": 14, "tensorflow": 14, "notabl": 14, "token": 14, "sentenc": 14, "repres": 14, "maxlength": 14, "2d": [14, 15, 16, 22], "numel": 14, "greatest": 14, "divisor": 14, "smallest": 14, "sub": 14, "exclud": 14, "partit": 14, "impli": [14, 19], "denot": [14, 20, 22], "offest": 14, "outer": 14, "would": 14, "begin": 14, "maximum": [14, 15, 22], "between": [14, 20, 21, 23], "normal": 14, "densor": 14, "form": [14, 19], "figur": 14, "below": 14, "show": [14, 21], "mean": [14, 16], "accomod": 14, "logic": [14, 20], "At": [14, 20, 21, 22], "stage": 14, "wise": [14, 16], "multipl": [14, 15, 16, 22, 24], "hadamard": 14, "product": [14, 19], "involv": 14, "bmatrix": 14, "rightarrow": 14, "16": 14, "25": 14, "36": 14, "49": 14, "81": 14, "50": 14, "operand": 14, "word": 14, "ax": 14, "properti": 14, "hold": 14, "elementwis": [14, 15], "equival": 14, "d": [14, 15, 23], "start": [14, 15, 22, 23], "dim": 14, "onto": 14, "part": 14, "everi": 14, "those": [14, 15, 18, 22], "converson": 14, "possibl": [14, 18, 19], "could": 14, "lead": 14, "read": [14, 16], "relat": 14, "smaller": 14, "expect": 14, "happen": 14, "give": 14, "situat": 14, "like": 14, "dense_tensor": 14, "jagged_tensor": 14, "break": 14, "exact": 14, "usual": 14, "1d": [15, 16, 22], "area": 15, "outsid": 15, "coverag": 15, "total": [15, 16], "identit": 15, "structur": 15, "jagged_dense_dense_elementwise_add_jagged_output": 15, "y_0": 15, "y_1": 15, "multipli": [15, 16], "max_n": 15, "matmul": 15, "stacked_jagged_1d_to_dens": 15, "arg": [15, 22], "kwarg": 15, "stacked_jagged_2d_to_dens": 15, "split_table_batched_embeddings_op": 16, "splittablebatchedembeddingbagscodegen": 16, "embedding_spec": 16, "feature_table_map": 16, "none": 16, "cache_algorithm": 16, "cachealgorithm": 16, "cache_load_factor": 16, "cache_reserved_memori": 16, "cache_precis": 16, "weights_precis": 16, "enforce_hbm": 16, "optimtyp": 16, "exact_sgd": 16, "record_cache_metr": 16, "gradient_clip": 16, "max_gradi": 16, "learning_r": 16, "01": 16, "ep": 16, "0e": 16, "momentum": 16, "weight_decai": 16, "weight_decay_mod": 16, "weightdecaymod": 16, "eta": 16, "001": 16, "beta1": 16, "beta2": 16, "999": 16, "poolingmod": 16, "boundscheckmod": 16, "sourc": [16, 18, 19, 20, 21, 22], "backward": 16, "embeddingloc": 16, "computedevic": 16, "spec": 16, "placement": 16, "lxu": 16, "algorithm": 16, "capac": 16, "amount": 16, "reserv": [16, 19], "hbm": 16, "adam": 16, "exact_adagrad": 16, "exact_rowwise_adagrad": 16, "exact_rowwise_weighted_adagrad": 16, "lamb": 16, "lars_sgd": 16, "partial_rowwise_adam": 16, "partial_rowwise_lamb": 16, "sgd": 16, "recordcachemetr": 16, "record": 16, "hit": 16, "request": [16, 17, 21], "record_cache_miss_count": 16, "metric": 16, "record_tablewise_cache_miss": 16, "stochast": 16, "round": 16, "gradient": 16, "clip": 16, "learn": 16, "rate": 16, "epsilon": 16, "adagrad": 16, "lar": 16, "decai": 16, "l2": 16, "decoupl": 16, "pool": [16, 24], "boundari": 16, "fatal": 16, "conatin": 16, "column": 16, "feature_requires_grad": 16, "split_table_batched_embeddings_ops_common": 16, "split_table_batched_embeddings_ops_train": 16, "init_embedding_weights_uniform": 16, "split_embedding_weight": 16, "9426": 16, "7046": 16, "4214": 16, "0419": 16, "1331": 16, "7856": 16, "8124": 16, "2021": 16, "5771": 16, "5911": 16, "7792": 16, "1068": 16, "6203": 16, "4813": 16, "1677": 16, "4790": 16, "5587": 16, "0941": 16, "5754": 16, "3475": 16, "8952": 16, "1964": 16, "0810": 16, "4174": 16, "2513": 16, "4039": 16, "3775": 16, "3273": 16, "5399": 16, "0229": 16, "1455": 16, "8770": 16, "9520": 16, "4593": 16, "7169": 16, "6307": 16, "1765": 16, "8757": 16, "8614": 16, "2051": 16, "0603": 16, "9980": 16, "7958": 16, "5826": 16, "long": 16, "13": 16, "5197": 16, "2957": 16, "3578": 16, "1487": 16, "4873": 16, "3044": 16, "9801": 16, "2769": 16, "7164": 16, "8528": 16, "7159": 16, "6719": 16, "0784": 16, "2016": 16, "2176": 16, "1988": 16, "3825": 16, "5008": 16, "8991": 16, "1405": 16, "2637": 16, "9427": 16, "8902": 16, "3754": 16, "5013": 16, "6105": 16, "9968": 16, "3057": 16, "7621": 16, "9821": 16, "7314": 16, "6195": 16, "grad_fn": 16, "cppnode": 16, "splitlookupfunction_sgd_op": 16, "question": 17, "concern": 17, "discuss": 17, "kick": 17, "regard": 17, "feel": 17, "free": 17, "reach": 17, "easi": 18, "transpar": 18, "describ": 18, "activ": 18, "welcom": [18, 24], "your": [18, 21, 22], "repositori": 18, "branch": 18, "ve": 18, "ad": [18, 21], "chang": [18, 20, 22], "api": [18, 20, 21, 22], "suit": 18, "lint": 18, "haven": 18, "submit": [18, 20, 22], "facebook": [18, 19, 24], "open": 18, "track": 18, "public": [18, 21], "bug": 18, "pleas": [18, 20, 22], "descript": [18, 20, 21, 22, 23], "instruct": [18, 20, 21, 22, 24], "abl": 18, "bounti": 18, "safe": 18, "disclosur": 18, "secur": 18, "go": 18, "outlin": 18, "By": 18, "agre": 18, "tree": 18, "claus": 19, "bsd": 19, "softwar": 19, "copyright": 19, "inc": 19, "affili": 19, "right": [19, 23], "redistribut": 19, "modif": 19, "permit": 19, "condit": 19, "met": 19, "retain": 19, "notic": 19, "disclaim": 19, "materi": 19, "contributor": 19, "endors": 19, "promot": 19, "written": 19, "permiss": 19, "BY": 19, "THE": 19, "holder": 19, "AND": 19, "AS": 19, "express": [19, 23], "OR": 19, "warranti": 19, "NOT": 19, "limit": [19, 21], "TO": 19, "OF": 19, "merchant": 19, "FOR": 19, "particular": 19, "IN": 19, "NO": 19, "event": 19, "shall": 19, "BE": 19, "liabl": 19, "indirect": 19, "incident": 19, "special": 19, "exemplari": 19, "consequenti": 19, "damag": 19, "procur": 19, "substitut": 19, "servic": 19, "profit": 19, "busi": 19, "interrupt": 19, "theori": 19, "liabil": 19, "contract": 19, "strict": 19, "tort": 19, "neglig": 19, "aris": 19, "even": 19, "IF": 19, "advis": 19, "SUCH": 19, "javadoc": 20, "style": [20, 22], "comment": [20, 21, 23], "sphinx": [20, 21, 22], "breath": 20, "kept": 20, "cpp": [20, 22, 23], "cu": 20, "cuh": 20, "everyth": 20, "ifndef": 20, "doxygen_this_will_be_skip": 20, "endif": 20, "hidden": 20, "html": [20, 21, 22], "descriptionss": 20, "configur": 20, "publish": [20, 22], "docstr": [20, 21, 22], "method": [20, 21, 22], "organ": 20, "yet": 20, "top": [20, 24], "defgroup": 20, "directli": [20, 22], "behavior": [20, 22], "tparam": 20, "param": [20, 22], "thrown": [20, 22], "ingroup": 20, "brief": 20, "short": 20, "example_method": [20, 22], "def": [20, 22], "foo": [20, 22], "lst": [20, 22], "And": [20, 22], "verbatim": [20, 22], "text": [20, 22, 23], "diagram": [20, 22], "unpars": 20, "second": [20, 22], "prev": [20, 22], "usabl": [20, 22], "space": [20, 21, 22], "endcod": 20, "align": [20, 22], "param1": [20, 22], "param2": 20, "bad_alloc": 20, "logic_error": 20, "href": 20, "www": [20, 22], "nl": 20, "cmdlink": 20, "On": [20, 22], "doxygengroup": 20, "rst": [20, 22, 23], "content": [20, 23, 24], "toctre": [20, 22], "ini": 20, "been": 20, "taken": 20, "care": 20, "append": [20, 22], "doc": [20, 21, 22, 23], "local": [20, 22], "netlifi": [20, 21, 22], "preview": [20, 22], "serv": 21, "accompani": 21, "put": 21, "yourself": 21, "shoe": 21, "who": 21, "understand": 21, "live": 21, "easier": 21, "leav": 21, "separ": 21, "task": 21, "instead": 21, "pointer": 21, "tool": 21, "pip": 21, "graphviz": [21, 23], "assembl": 21, "view": 21, "prepend": 21, "sphinx_lint": 21, "technic": 21, "why": 21, "invok": 21, "occasion": 21, "unresolv": 21, "might": 21, "opt": 21, "pycapsul": 21, "class": [21, 22], "neg": 21, "silenc": 21, "being": 21, "nitpick": 21, "conf": 21, "domain": 21, "deploi": 21, "app": 21, "googl": 22, "c_size_t": 22, "about": 22, "ret": 22, "emplace_back": 22, "item": 22, "valueerror": 22, "14": 22, "restructuredtext": 22, "en": 22, "master": 22, "__": 22, "pep": 22, "0287": 22, "42": 22, "autofunct": 22, "c_ulong": 22, "mani": 22, "attach": 22, "fact": 22, "helper": 22, "codebas": 22, "add_doc": 22, "jag": [22, 24], "forc": 22, "hoc": 22, "the_new_doc_modul": 22, "remain": 22, "render": [22, 23], "anchor": 23, "_doc": 23, "underscor": 23, "_": 23, "There": 23, "elsewher": 23, "ref": 23, "anoth": 23, "literalinclud": 23, "rel": 23, "enclos": 23, "bracket": 23, "skiplin": 23, "suppli": 23, "math": 23, "inlin": 23, "k_": 23, "k_n": 23, "expressino": 23, "int_a": 23, "frac": 23, "2v": 23, "dx": 23, "left": 23, "dv": 23, "_a": 23, "du": 23, "digraph": 23, "altern": 23, "extern": 23, "dot": 23, "examplegraph": 23, "low": 24, "precis": 24, "high": 24, "convolut": 24, "server": 24, "infer": 24, "backend": 24, "caffe2": 24, "quantiz": 24, "collect": 24, "transform": 24, "contribut": 24, "contact": 24, "licens": 24, "combin": 24, "tbe": 24}, "objects": {"": [[8, 0, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref"], [8, 1, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref::ebits"], [8, 1, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref::exponent_bias"], [8, 1, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref::input"], [8, 1, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref::ncols"], [8, 1, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref::nrows"], [8, 1, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref::output"], [8, 0, 1, "_CPPv423FP8rowwise_to_float_cpuRK6TensorbK7int64_t", "FP8rowwise_to_float_cpu"], [8, 1, 1, "_CPPv423FP8rowwise_to_float_cpuRK6TensorbK7int64_t", "FP8rowwise_to_float_cpu::forward"], [8, 1, 1, "_CPPv423FP8rowwise_to_float_cpuRK6TensorbK7int64_t", "FP8rowwise_to_float_cpu::input"], [8, 1, 1, "_CPPv423FP8rowwise_to_float_cpuRK6TensorbK7int64_t", "FP8rowwise_to_float_cpu::output_dtype"], [0, 0, 1, "_CPPv410FindMinMaxPKfPfPf7int64_t", "FindMinMax"], [0, 1, 1, "_CPPv410FindMinMaxPKfPfPf7int64_t", "FindMinMax::len"], [0, 1, 1, "_CPPv410FindMinMaxPKfPfPf7int64_t", "FindMinMax::m"], [0, 1, 1, "_CPPv410FindMinMaxPKfPfPf7int64_t", "FindMinMax::max"], [0, 1, 1, "_CPPv410FindMinMaxPKfPfPf7int64_t", "FindMinMax::min"], [0, 0, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf"], [0, 2, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf::InputType"], [0, 1, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf::bit_rate"], [0, 1, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf::input"], [0, 1, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf::input_columns"], [0, 1, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf::input_rows"], [0, 1, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf::output"], [8, 0, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref"], [8, 1, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::ebits"], [8, 1, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::exponent_bias"], [8, 1, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::input"], [8, 1, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::max_pos"], [8, 1, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::ncols"], [8, 1, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::nrows"], [8, 1, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::output"], [0, 0, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize"], [0, 2, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::T"], [0, 1, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::dst"], [0, 1, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::len"], [0, 1, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::noise_ratio"], [0, 1, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::num_threads"], [0, 1, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::qparams"], [0, 1, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::src"], [0, 1, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::thread_id"], [0, 0, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise"], [0, 1, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::C"], [0, 1, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::G"], [0, 1, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::K"], [0, 2, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::LAYOUT"], [0, 2, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::T"], [0, 1, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::X"], [0, 1, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::dst"], [0, 1, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::scales"], [0, 1, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::src"], [0, 1, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::zero_points"], [0, 0, 1, "_CPPv46Xor128v", "Xor128"], [8, 0, 1, "_CPPv424_FP8rowwise_to_float_gpuRKN2at6TensorEbK7int64_t", "_FP8rowwise_to_float_gpu"], [8, 1, 1, "_CPPv424_FP8rowwise_to_float_gpuRKN2at6TensorEbK7int64_t", "_FP8rowwise_to_float_gpu::forward"], [8, 1, 1, "_CPPv424_FP8rowwise_to_float_gpuRKN2at6TensorEbK7int64_t", "_FP8rowwise_to_float_gpu::input"], [8, 1, 1, "_CPPv424_FP8rowwise_to_float_gpuRKN2at6TensorEbK7int64_t", "_FP8rowwise_to_float_gpu::output_dtype"], [8, 0, 1, "_CPPv422_bfloat16_to_float_gpuRKN2at6TensorE", "_bfloat16_to_float_gpu"], [8, 1, 1, "_CPPv422_bfloat16_to_float_gpuRKN2at6TensorE", "_bfloat16_to_float_gpu::input"], [8, 0, 1, "_CPPv424_float_to_FP8rowwise_gpuRK6TensorKb", "_float_to_FP8rowwise_gpu"], [8, 1, 1, "_CPPv424_float_to_FP8rowwise_gpuRK6TensorKb", "_float_to_FP8rowwise_gpu::forward"], [8, 1, 1, "_CPPv424_float_to_FP8rowwise_gpuRK6TensorKb", "_float_to_FP8rowwise_gpu::input"], [8, 0, 1, "_CPPv422_float_to_bfloat16_gpuRKN2at6TensorE", "_float_to_bfloat16_gpu"], [8, 1, 1, "_CPPv422_float_to_bfloat16_gpuRKN2at6TensorE", "_float_to_bfloat16_gpu::input"], [8, 0, 1, "_CPPv434_float_to_fused8bitrowwise_cpu_outR6TensorRK6Tensor", "_float_to_fused8bitrowwise_cpu_out"], [8, 1, 1, "_CPPv434_float_to_fused8bitrowwise_cpu_outR6TensorRK6Tensor", "_float_to_fused8bitrowwise_cpu_out::input"], [8, 1, 1, "_CPPv434_float_to_fused8bitrowwise_cpu_outR6TensorRK6Tensor", "_float_to_fused8bitrowwise_cpu_out::output"], [8, 0, 1, "_CPPv430_float_to_fused8bitrowwise_gpuRK6Tensor", "_float_to_fused8bitrowwise_gpu"], [8, 1, 1, "_CPPv430_float_to_fused8bitrowwise_gpuRK6Tensor", "_float_to_fused8bitrowwise_gpu::input"], [8, 0, 1, "_CPPv430_float_to_fusednbitrowwise_gpuRK6TensorK7int64_t", "_float_to_fusednbitrowwise_gpu"], [8, 1, 1, "_CPPv430_float_to_fusednbitrowwise_gpuRK6TensorK7int64_t", "_float_to_fusednbitrowwise_gpu::bit_rate"], [8, 1, 1, "_CPPv430_float_to_fusednbitrowwise_gpuRK6TensorK7int64_t", "_float_to_fusednbitrowwise_gpu::input"], [8, 0, 1, "_CPPv418_float_to_hfp8_gpuRKN2at6TensorEK7int64_tK7int64_tKd", "_float_to_hfp8_gpu"], [8, 1, 1, "_CPPv418_float_to_hfp8_gpuRKN2at6TensorEK7int64_tK7int64_tKd", "_float_to_hfp8_gpu::ebits"], [8, 1, 1, "_CPPv418_float_to_hfp8_gpuRKN2at6TensorEK7int64_tK7int64_tKd", "_float_to_hfp8_gpu::exponent_bias"], [8, 1, 1, "_CPPv418_float_to_hfp8_gpuRKN2at6TensorEK7int64_tK7int64_tKd", "_float_to_hfp8_gpu::input"], [8, 1, 1, "_CPPv418_float_to_hfp8_gpuRKN2at6TensorEK7int64_tK7int64_tKd", "_float_to_hfp8_gpu::max_pos"], [8, 0, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu"], [8, 1, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::bias"], [8, 1, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::bounding_box_size"], [8, 1, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::ebits"], [8, 1, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::input"], [8, 1, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::max_pos"], [8, 1, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::mbits"], [8, 1, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::min_pos"], [8, 0, 1, "_CPPv430_float_to_paddedFP8rowwise_gpuRK6TensorKbK7int64_t", "_float_to_paddedFP8rowwise_gpu"], [8, 1, 1, "_CPPv430_float_to_paddedFP8rowwise_gpuRK6TensorKbK7int64_t", "_float_to_paddedFP8rowwise_gpu::forward"], [8, 1, 1, "_CPPv430_float_to_paddedFP8rowwise_gpuRK6TensorKbK7int64_t", "_float_to_paddedFP8rowwise_gpu::input"], [8, 1, 1, "_CPPv430_float_to_paddedFP8rowwise_gpuRK6TensorKbK7int64_t", "_float_to_paddedFP8rowwise_gpu::row_dim"], [8, 0, 1, "_CPPv434_fused8bitrowwise_to_float_cpu_outR6TensorRK6Tensor", "_fused8bitrowwise_to_float_cpu_out"], [8, 1, 1, "_CPPv434_fused8bitrowwise_to_float_cpu_outR6TensorRK6Tensor", "_fused8bitrowwise_to_float_cpu_out::input"], [8, 1, 1, "_CPPv434_fused8bitrowwise_to_float_cpu_outR6TensorRK6Tensor", "_fused8bitrowwise_to_float_cpu_out::output"], [8, 0, 1, "_CPPv430_fused8bitrowwise_to_float_gpuRKN2at6TensorE", "_fused8bitrowwise_to_float_gpu"], [8, 1, 1, "_CPPv430_fused8bitrowwise_to_float_gpuRKN2at6TensorE", "_fused8bitrowwise_to_float_gpu::input"], [8, 0, 1, "_CPPv440_fused8bitrowwise_to_float_mixed_dim_gpuRKN2at6TensorERKN2at6TensorEK7int64_t", "_fused8bitrowwise_to_float_mixed_dim_gpu"], [8, 1, 1, "_CPPv440_fused8bitrowwise_to_float_mixed_dim_gpuRKN2at6TensorERKN2at6TensorEK7int64_t", "_fused8bitrowwise_to_float_mixed_dim_gpu::D_offsets"], [8, 1, 1, "_CPPv440_fused8bitrowwise_to_float_mixed_dim_gpuRKN2at6TensorERKN2at6TensorEK7int64_t", "_fused8bitrowwise_to_float_mixed_dim_gpu::input"], [8, 1, 1, "_CPPv440_fused8bitrowwise_to_float_mixed_dim_gpuRKN2at6TensorERKN2at6TensorEK7int64_t", "_fused8bitrowwise_to_float_mixed_dim_gpu::output_dtype"], [8, 0, 1, "_CPPv429_fused8bitrowwise_to_half_gpuRKN2at6TensorE", "_fused8bitrowwise_to_half_gpu"], [8, 1, 1, "_CPPv429_fused8bitrowwise_to_half_gpuRKN2at6TensorE", "_fused8bitrowwise_to_half_gpu::input"], [8, 0, 1, "_CPPv449_fused8bitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tKbKb", "_fused8bitrowwise_to_single_or_half_precision_gpu"], [8, 1, 1, "_CPPv449_fused8bitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tKbKb", "_fused8bitrowwise_to_single_or_half_precision_gpu::input"], [8, 1, 1, "_CPPv449_fused8bitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tKbKb", "_fused8bitrowwise_to_single_or_half_precision_gpu::output_dtype"], [8, 1, 1, "_CPPv449_fused8bitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tKbKb", "_fused8bitrowwise_to_single_or_half_precision_gpu::quant_padding_float_type"], [8, 1, 1, "_CPPv449_fused8bitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tKbKb", "_fused8bitrowwise_to_single_or_half_precision_gpu::scale_bias_last"], [8, 0, 1, "_CPPv430_fusednbitrowwise_to_float_gpuRKN2at6TensorEK7int64_t", "_fusednbitrowwise_to_float_gpu"], [8, 1, 1, "_CPPv430_fusednbitrowwise_to_float_gpuRKN2at6TensorEK7int64_t", "_fusednbitrowwise_to_float_gpu::bit_rate"], [8, 1, 1, "_CPPv430_fusednbitrowwise_to_float_gpuRKN2at6TensorEK7int64_t", "_fusednbitrowwise_to_float_gpu::input"], [8, 0, 1, "_CPPv429_fusednbitrowwise_to_half_gpuRKN2at6TensorEK7int64_t", "_fusednbitrowwise_to_half_gpu"], [8, 1, 1, "_CPPv429_fusednbitrowwise_to_half_gpuRKN2at6TensorEK7int64_t", "_fusednbitrowwise_to_half_gpu::bit_rate"], [8, 1, 1, "_CPPv429_fusednbitrowwise_to_half_gpuRKN2at6TensorEK7int64_t", "_fusednbitrowwise_to_half_gpu::input"], [8, 0, 1, "_CPPv449_fusednbitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tK7int64_t", "_fusednbitrowwise_to_single_or_half_precision_gpu"], [8, 1, 1, "_CPPv449_fusednbitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tK7int64_t", "_fusednbitrowwise_to_single_or_half_precision_gpu::bit_rate"], [8, 1, 1, "_CPPv449_fusednbitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tK7int64_t", "_fusednbitrowwise_to_single_or_half_precision_gpu::input"], [8, 1, 1, "_CPPv449_fusednbitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tK7int64_t", "_fusednbitrowwise_to_single_or_half_precision_gpu::output_dtype"], [8, 0, 1, "_CPPv429_half_to_fused8bitrowwise_gpuRK6Tensor", "_half_to_fused8bitrowwise_gpu"], [8, 1, 1, "_CPPv429_half_to_fused8bitrowwise_gpuRK6Tensor", "_half_to_fused8bitrowwise_gpu::input"], [8, 0, 1, "_CPPv429_half_to_fusednbitrowwise_gpuRKN2at6TensorEK7int64_t", "_half_to_fusednbitrowwise_gpu"], [8, 1, 1, "_CPPv429_half_to_fusednbitrowwise_gpuRKN2at6TensorEK7int64_t", "_half_to_fusednbitrowwise_gpu::bit_rate"], [8, 1, 1, "_CPPv429_half_to_fusednbitrowwise_gpuRKN2at6TensorEK7int64_t", "_half_to_fusednbitrowwise_gpu::input"], [8, 0, 1, "_CPPv418_hfp8_to_float_gpuRKN2at6TensorEK7int64_tK7int64_t", "_hfp8_to_float_gpu"], [8, 1, 1, "_CPPv418_hfp8_to_float_gpuRKN2at6TensorEK7int64_tK7int64_t", "_hfp8_to_float_gpu::ebits"], [8, 1, 1, "_CPPv418_hfp8_to_float_gpuRKN2at6TensorEK7int64_tK7int64_t", "_hfp8_to_float_gpu::exponent_bias"], [8, 1, 1, "_CPPv418_hfp8_to_float_gpuRKN2at6TensorEK7int64_tK7int64_t", "_hfp8_to_float_gpu::input"], [8, 0, 1, "_CPPv418_msfp_to_float_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_t", "_msfp_to_float_gpu"], [8, 1, 1, "_CPPv418_msfp_to_float_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_t", "_msfp_to_float_gpu::bias"], [8, 1, 1, "_CPPv418_msfp_to_float_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_t", "_msfp_to_float_gpu::ebits"], [8, 1, 1, "_CPPv418_msfp_to_float_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_t", "_msfp_to_float_gpu::input"], [8, 1, 1, "_CPPv418_msfp_to_float_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_t", "_msfp_to_float_gpu::mbits"], [8, 0, 1, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t", "_paddedFP8rowwise_to_float_gpu"], [8, 1, 1, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t", "_paddedFP8rowwise_to_float_gpu::forward"], [8, 1, 1, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t", "_paddedFP8rowwise_to_float_gpu::input"], [8, 1, 1, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t", "_paddedFP8rowwise_to_float_gpu::output_dtype"], [8, 1, 1, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t", "_paddedFP8rowwise_to_float_gpu::output_last_dim"], [8, 1, 1, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t", "_paddedFP8rowwise_to_float_gpu::row_dim"], [8, 0, 1, "_CPPv449_single_or_half_precision_to_fused8bitrowwise_gpuRK6Tensor", "_single_or_half_precision_to_fused8bitrowwise_gpu"], [8, 1, 1, "_CPPv449_single_or_half_precision_to_fused8bitrowwise_gpuRK6Tensor", "_single_or_half_precision_to_fused8bitrowwise_gpu::input"], [8, 0, 1, "_CPPv449_single_or_half_precision_to_fusednbitrowwise_gpuRK6TensorK7int64_t", "_single_or_half_precision_to_fusednbitrowwise_gpu"], [8, 1, 1, "_CPPv449_single_or_half_precision_to_fusednbitrowwise_gpuRK6TensorK7int64_t", "_single_or_half_precision_to_fusednbitrowwise_gpu::bit_rate"], [8, 1, 1, "_CPPv449_single_or_half_precision_to_fusednbitrowwise_gpuRK6TensorK7int64_t", "_single_or_half_precision_to_fusednbitrowwise_gpu::input"], [7, 0, 1, "_CPPv417all_to_one_deviceNSt6vectorIN2at6TensorEEEN2at6DeviceE", "all_to_one_device"], [7, 1, 1, "_CPPv417all_to_one_deviceNSt6vectorIN2at6TensorEEEN2at6DeviceE", "all_to_one_device::inputTensors"], [7, 1, 1, "_CPPv417all_to_one_deviceNSt6vectorIN2at6TensorEEEN2at6DeviceE", "all_to_one_device::target_device"], [4, 0, 1, "_CPPv431batched_dense_vec_jagged_2d_mulRK6TensorRK6TensorRK6Tensor", "batched_dense_vec_jagged_2d_mul"], [4, 1, 1, "_CPPv431batched_dense_vec_jagged_2d_mulRK6TensorRK6TensorRK6Tensor", "batched_dense_vec_jagged_2d_mul::a_offsets"], [4, 1, 1, "_CPPv431batched_dense_vec_jagged_2d_mulRK6TensorRK6TensorRK6Tensor", "batched_dense_vec_jagged_2d_mul::a_values"], [4, 1, 1, "_CPPv431batched_dense_vec_jagged_2d_mulRK6TensorRK6TensorRK6Tensor", "batched_dense_vec_jagged_2d_mul::v"], [2, 0, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKN3c108optionalI6TensorEERKN3c108optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda"], [2, 1, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKN3c108optionalI6TensorEERKN3c108optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::B_ofsets"], [2, 1, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKN3c108optionalI6TensorEERKN3c108optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::bounds_check_mode"], [2, 1, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKN3c108optionalI6TensorEERKN3c108optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::indices"], [2, 1, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKN3c108optionalI6TensorEERKN3c108optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::max_B"], [2, 1, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKN3c108optionalI6TensorEERKN3c108optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::offsets"], [2, 1, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKN3c108optionalI6TensorEERKN3c108optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::rows_per_table"], [2, 1, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKN3c108optionalI6TensorEERKN3c108optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::warning"], [2, 1, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKN3c108optionalI6TensorEERKN3c108optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::weights"], [4, 0, 1, "_CPPv415dense_to_jaggedRK6TensorRKNSt6vectorI6TensorEEN3c108optionalIN2at6SymIntEEE", "dense_to_jagged"], [4, 1, 1, "_CPPv415dense_to_jaggedRK6TensorRKNSt6vectorI6TensorEEN3c108optionalIN2at6SymIntEEE", "dense_to_jagged::dense"], [4, 1, 1, "_CPPv415dense_to_jaggedRK6TensorRKNSt6vectorI6TensorEEN3c108optionalIN2at6SymIntEEE", "dense_to_jagged::offsets"], [4, 1, 1, "_CPPv415dense_to_jaggedRK6TensorRKNSt6vectorI6TensorEEN3c108optionalIN2at6SymIntEEE", "dense_to_jagged::total_L"], [10, 0, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda"], [10, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::D_offsets"], [10, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::cache_index_table_map"], [10, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::gather_cache_stats"], [10, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::hash_size_cumsum"], [10, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::linear_cache_indices"], [10, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::lru_state"], [10, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::lxu_cache_miss_timestamp"], [10, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::lxu_cache_state"], [10, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::lxu_cache_weights"], [10, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::row_alignment"], [10, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::time_stamp"], [10, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::total_cache_hash_size"], [10, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::uvm_cache_stats"], [10, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::weights"], [10, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::weights_offsets"], [10, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::weights_tys"], [10, 0, 1, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lxu_cache_lookup_cuda"], [10, 1, 1, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lxu_cache_lookup_cuda::gather_cache_stats"], [10, 1, 1, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lxu_cache_lookup_cuda::invalid_index"], [10, 1, 1, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lxu_cache_lookup_cuda::linear_cache_indices"], [10, 1, 1, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lxu_cache_lookup_cuda::lxu_cache_state"], [10, 1, 1, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lxu_cache_lookup_cuda::uvm_cache_stats"], [20, 0, 1, "_CPPv4I0_NSt6size_tEE14example_method7int32_t1Tf", "example_method"], [20, 2, 1, "_CPPv4I0_NSt6size_tEE14example_method7int32_t1Tf", "example_method::Alignment"], [20, 2, 1, "_CPPv4I0_NSt6size_tEE14example_method7int32_t1Tf", "example_method::T"], [20, 1, 1, "_CPPv4I0_NSt6size_tEE14example_method7int32_t1Tf", "example_method::param1"], [20, 1, 1, "_CPPv4I0_NSt6size_tEE14example_method7int32_t1Tf", "example_method::param2"], [9, 0, 1, "_CPPv431expand_into_jagged_permute_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_t", "expand_into_jagged_permute_cuda"], [9, 1, 1, "_CPPv431expand_into_jagged_permute_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_t", "expand_into_jagged_permute_cuda::input_offsets"], [9, 1, 1, "_CPPv431expand_into_jagged_permute_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_t", "expand_into_jagged_permute_cuda::output_offsets"], [9, 1, 1, "_CPPv431expand_into_jagged_permute_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_t", "expand_into_jagged_permute_cuda::output_size"], [9, 1, 1, "_CPPv431expand_into_jagged_permute_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_t", "expand_into_jagged_permute_cuda::permute"], [8, 0, 1, "_CPPv437float_or_half_to_fused8bitrowwise_cpuRK6Tensor", "float_or_half_to_fused8bitrowwise_cpu"], [8, 1, 1, "_CPPv437float_or_half_to_fused8bitrowwise_cpuRK6Tensor", "float_or_half_to_fused8bitrowwise_cpu::input"], [8, 0, 1, "_CPPv423float_to_FP8rowwise_cpuRK6Tensorb", "float_to_FP8rowwise_cpu"], [8, 1, 1, "_CPPv423float_to_FP8rowwise_cpuRK6Tensorb", "float_to_FP8rowwise_cpu::forward"], [8, 1, 1, "_CPPv423float_to_FP8rowwise_cpuRK6Tensorb", "float_to_FP8rowwise_cpu::input"], [8, 0, 1, "_CPPv429float_to_fused8bitrowwise_cpuRK6Tensor", "float_to_fused8bitrowwise_cpu"], [8, 1, 1, "_CPPv429float_to_fused8bitrowwise_cpuRK6Tensor", "float_to_fused8bitrowwise_cpu::input"], [8, 0, 1, "_CPPv429fused8bitrowwise_to_float_cpuRK6Tensor", "fused8bitrowwise_to_float_cpu"], [8, 1, 1, "_CPPv429fused8bitrowwise_to_float_cpuRK6Tensor", "fused8bitrowwise_to_float_cpu::input"], [8, 0, 1, "_CPPv437fused8bitrowwise_to_float_or_half_cpuRK6TensorK7int64_tKbKb", "fused8bitrowwise_to_float_or_half_cpu"], [8, 1, 1, "_CPPv437fused8bitrowwise_to_float_or_half_cpuRK6TensorK7int64_tKbKb", "fused8bitrowwise_to_float_or_half_cpu::input"], [8, 1, 1, "_CPPv437fused8bitrowwise_to_float_or_half_cpuRK6TensorK7int64_tKbKb", "fused8bitrowwise_to_float_or_half_cpu::output_dtype"], [8, 1, 1, "_CPPv437fused8bitrowwise_to_float_or_half_cpuRK6TensorK7int64_tKbKb", "fused8bitrowwise_to_float_or_half_cpu::quant_padding_float_type"], [8, 1, 1, "_CPPv437fused8bitrowwise_to_float_or_half_cpuRK6TensorK7int64_tKbKb", "fused8bitrowwise_to_float_or_half_cpu::scale_bias_last"], [8, 0, 1, "_CPPv428fused8bitrowwise_to_half_cpuRK6Tensor", "fused8bitrowwise_to_half_cpu"], [8, 1, 1, "_CPPv428fused8bitrowwise_to_half_cpuRK6Tensor", "fused8bitrowwise_to_half_cpu::input"], [8, 0, 1, "_CPPv429fusednbitrowwise_to_float_cpuRK6TensorK7int64_t", "fusednbitrowwise_to_float_cpu"], [8, 1, 1, "_CPPv429fusednbitrowwise_to_float_cpuRK6TensorK7int64_t", "fusednbitrowwise_to_float_cpu::bit_rate"], [8, 1, 1, "_CPPv429fusednbitrowwise_to_float_cpuRK6TensorK7int64_t", "fusednbitrowwise_to_float_cpu::input"], [8, 0, 1, "_CPPv437fusednbitrowwise_to_float_or_half_cpuRK6TensorK7int64_tK7int64_t", "fusednbitrowwise_to_float_or_half_cpu"], [8, 1, 1, "_CPPv437fusednbitrowwise_to_float_or_half_cpuRK6TensorK7int64_tK7int64_t", "fusednbitrowwise_to_float_or_half_cpu::bit_rate"], [8, 1, 1, "_CPPv437fusednbitrowwise_to_float_or_half_cpuRK6TensorK7int64_tK7int64_t", "fusednbitrowwise_to_float_or_half_cpu::input"], [8, 1, 1, "_CPPv437fusednbitrowwise_to_float_or_half_cpuRK6TensorK7int64_tK7int64_t", "fusednbitrowwise_to_float_or_half_cpu::output_dtype"], [8, 0, 1, "_CPPv428fusednbitrowwise_to_half_cpuRK6TensorK7int64_t", "fusednbitrowwise_to_half_cpu"], [8, 1, 1, "_CPPv428fusednbitrowwise_to_half_cpuRK6TensorK7int64_t", "fusednbitrowwise_to_half_cpu::bit_rate"], [8, 1, 1, "_CPPv428fusednbitrowwise_to_half_cpuRK6TensorK7int64_t", "fusednbitrowwise_to_half_cpu::input"], [9, 0, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu"], [9, 1, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::bin_boundaries"], [9, 1, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::bin_ctr_in_use_after"], [9, 1, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::bin_ctr_weight_value"], [9, 1, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::bin_num_examples"], [9, 1, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::bin_num_positives"], [9, 1, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::logit"], [9, 1, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::num_segments"], [9, 1, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::positive_weight"], [9, 1, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::segment_lengths"], [9, 1, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::segment_value"], [10, 0, 1, "_CPPv423get_unique_indices_cudaN2at6TensorE7int64_tb", "get_unique_indices_cuda"], [10, 1, 1, "_CPPv423get_unique_indices_cudaN2at6TensorE7int64_tb", "get_unique_indices_cuda::compute_count"], [10, 1, 1, "_CPPv423get_unique_indices_cudaN2at6TensorE7int64_tb", "get_unique_indices_cuda::linear_indices"], [10, 1, 1, "_CPPv423get_unique_indices_cudaN2at6TensorE7int64_tb", "get_unique_indices_cuda::max_indices"], [8, 0, 1, "_CPPv428half_to_fused8bitrowwise_cpuRK6Tensor", "half_to_fused8bitrowwise_cpu"], [8, 1, 1, "_CPPv428half_to_fused8bitrowwise_cpuRK6Tensor", "half_to_fused8bitrowwise_cpu::input"], [9, 0, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu"], [9, 1, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::bin_ctr_in_use_after"], [9, 1, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::bin_ctr_weight_value"], [9, 1, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::bin_num_examples"], [9, 1, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::bin_num_positives"], [9, 1, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::logit"], [9, 1, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::lower_bound"], [9, 1, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::positive_weight"], [9, 1, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::upper_bound"], [10, 0, 1, "_CPPv419host_lxu_cache_slot7int64_t7int64_t", "host_lxu_cache_slot"], [10, 1, 1, "_CPPv419host_lxu_cache_slot7int64_t7int64_t", "host_lxu_cache_slot::C"], [10, 1, 1, "_CPPv419host_lxu_cache_slot7int64_t7int64_t", "host_lxu_cache_slot::h_in"], [2, 0, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::D_offsets"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::dev_weights"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::fp8_exponent_bias"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::fp8_exponent_bits"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::indice_weights"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::indices"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::lxu_cache_locations"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::lxu_cache_weights"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::max_float16_D"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::max_float32_D"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::max_float8_D"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::max_int2_D"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::max_int4_D"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::max_int8_D"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::offsets"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::output_dtype"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::pooling_mode"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::row_alignment"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::total_D"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::uvm_weights"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::weights_offsets"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::weights_placements"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::weights_tys"], [2, 0, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::D_offsets"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::dev_weights"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::fp8_exponent_bias"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::fp8_exponent_bits"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::indice_weights"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::indices"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::lxu_cache_locations"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::lxu_cache_weights"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::max_float16_D"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::max_float32_D"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::max_float8_D"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::max_int2_D"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::max_int4_D"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::max_int8_D"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::offsets"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::output_dtype"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::pooling_mode"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::row_alignment"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::total_D"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::uvm_weights"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::weights_offsets"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::weights_placements"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::weights_tys"], [2, 0, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::D_offsets"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::cache_hash_size_cumsum"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::cache_index_table_map"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::dev_weights"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::fp8_exponent_bias"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::fp8_exponent_bits"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::indice_weights"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::indices"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::lxu_cache_locations"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::lxu_cache_state"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::lxu_cache_weights"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::lxu_state"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::max_float16_D"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::max_float32_D"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::max_float8_D"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::max_int2_D"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::max_int4_D"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::max_int8_D"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::offsets"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::output_dtype"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::pooling_mode"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::row_alignment"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::total_D"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::total_cache_hash_size"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::uvm_weights"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::weights_offsets"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::weights_placements"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::weights_tys"], [2, 0, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::D_offsets"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::cache_hash_size_cumsum"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::cache_index_table_map"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::dev_weights"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::fp8_exponent_bias"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::fp8_exponent_bits"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::indice_weights"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::indices"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::lxu_cache_locations"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::lxu_cache_state"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::lxu_cache_weights"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::lxu_state"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::max_float16_D"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::max_float32_D"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::max_float8_D"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::max_int2_D"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::max_int4_D"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::max_int8_D"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::offsets"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::output_dtype"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::pooling_mode"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::row_alignment"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::total_D"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::total_cache_hash_size"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::uvm_weights"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::weights_offsets"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::weights_placements"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::weights_tys"], [6, 0, 1, "_CPPv413is_uvm_tensorRK6Tensor", "is_uvm_tensor"], [6, 1, 1, "_CPPv413is_uvm_tensorRK6Tensor", "is_uvm_tensor::self"], [4, 0, 1, "_CPPv418jagged_1d_to_dense6Tensor6TensorN3c106SymIntE7int64_t", "jagged_1d_to_dense"], [4, 1, 1, "_CPPv418jagged_1d_to_dense6Tensor6TensorN3c106SymIntE7int64_t", "jagged_1d_to_dense::max_L"], [4, 1, 1, "_CPPv418jagged_1d_to_dense6Tensor6TensorN3c106SymIntE7int64_t", "jagged_1d_to_dense::offsets"], [4, 1, 1, "_CPPv418jagged_1d_to_dense6Tensor6TensorN3c106SymIntE7int64_t", "jagged_1d_to_dense::padding_value"], [4, 1, 1, "_CPPv418jagged_1d_to_dense6Tensor6TensorN3c106SymIntE7int64_t", "jagged_1d_to_dense::values"], [4, 0, 1, "_CPPv418jagged_2d_to_dense6Tensor6TensorN3c106SymIntE", "jagged_2d_to_dense"], [4, 1, 1, "_CPPv418jagged_2d_to_dense6Tensor6TensorN3c106SymIntE", "jagged_2d_to_dense::max_sequence_length"], [4, 1, 1, "_CPPv418jagged_2d_to_dense6Tensor6TensorN3c106SymIntE", "jagged_2d_to_dense::offsets"], [4, 1, 1, "_CPPv418jagged_2d_to_dense6Tensor6TensorN3c106SymIntE", "jagged_2d_to_dense::values"], [4, 0, 1, "_CPPv428jagged_dense_elementwise_addRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add"], [4, 1, 1, "_CPPv428jagged_dense_elementwise_addRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add::x_offsets"], [4, 1, 1, "_CPPv428jagged_dense_elementwise_addRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add::x_values"], [4, 1, 1, "_CPPv428jagged_dense_elementwise_addRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add::y"], [4, 0, 1, "_CPPv442jagged_dense_elementwise_add_jagged_outputRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output"], [4, 1, 1, "_CPPv442jagged_dense_elementwise_add_jagged_outputRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output::x_offsets"], [4, 1, 1, "_CPPv442jagged_dense_elementwise_add_jagged_outputRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output::x_values"], [4, 1, 1, "_CPPv442jagged_dense_elementwise_add_jagged_outputRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output::y"], [4, 0, 1, "_CPPv447jagged_dense_elementwise_add_jagged_output_cudaRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output_cuda"], [4, 1, 1, "_CPPv447jagged_dense_elementwise_add_jagged_output_cudaRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output_cuda::x_offsets"], [4, 1, 1, "_CPPv447jagged_dense_elementwise_add_jagged_output_cudaRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output_cuda::x_values"], [4, 1, 1, "_CPPv447jagged_dense_elementwise_add_jagged_output_cudaRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output_cuda::y"], [4, 0, 1, "_CPPv428jagged_dense_elementwise_mulRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_mul"], [4, 1, 1, "_CPPv428jagged_dense_elementwise_mulRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_mul::x_offsets"], [4, 1, 1, "_CPPv428jagged_dense_elementwise_mulRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_mul::x_values"], [4, 1, 1, "_CPPv428jagged_dense_elementwise_mulRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_mul::y"], [4, 0, 1, "_CPPv422jagged_to_padded_denseRK6TensorRKNSt6vectorI6TensorEEKN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense"], [4, 1, 1, "_CPPv422jagged_to_padded_denseRK6TensorRKNSt6vectorI6TensorEEKN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense::max_lengths"], [4, 1, 1, "_CPPv422jagged_to_padded_denseRK6TensorRKNSt6vectorI6TensorEEKN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense::offsets"], [4, 1, 1, "_CPPv422jagged_to_padded_denseRK6TensorRKNSt6vectorI6TensorEEKN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense::padding_value"], [4, 1, 1, "_CPPv422jagged_to_padded_denseRK6TensorRKNSt6vectorI6TensorEEKN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense::values"], [4, 0, 1, "_CPPv430jagged_to_padded_dense_forwardRK6TensorRKNSt6vectorI6TensorEEN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense_forward"], [4, 1, 1, "_CPPv430jagged_to_padded_dense_forwardRK6TensorRKNSt6vectorI6TensorEEN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense_forward::max_lengths"], [4, 1, 1, "_CPPv430jagged_to_padded_dense_forwardRK6TensorRKNSt6vectorI6TensorEEN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense_forward::offsets"], [4, 1, 1, "_CPPv430jagged_to_padded_dense_forwardRK6TensorRKNSt6vectorI6TensorEEN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense_forward::padding_value"], [4, 1, 1, "_CPPv430jagged_to_padded_dense_forwardRK6TensorRKNSt6vectorI6TensorEEN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense_forward::values"], [10, 0, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda"], [10, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::D_offsets"], [10, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::cache_hash_size_cumsum"], [10, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::cache_index_table_map"], [10, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::lfu_state"], [10, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::linear_cache_indices"], [10, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::lxu_cache_state"], [10, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::lxu_cache_weights"], [10, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::row_alignment"], [10, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::total_cache_hash_size"], [10, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::weights"], [10, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::weights_offsets"], [10, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::weights_tys"], [10, 0, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda"], [10, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::D_offsets"], [10, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::cache_hash_size_cumsum"], [10, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::cache_index_table_map"], [10, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::lfu_state"], [10, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::linear_cache_indices"], [10, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::lxu_cache_state"], [10, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::lxu_cache_weights"], [10, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::stochastic_rounding"], [10, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::total_cache_hash_size"], [10, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::weights"], [10, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::weights_offsets"], [10, 0, 1, "_CPPv428linearize_cache_indices_cudaN2at6TensorEN2at6TensorEN2at6TensorE", "linearize_cache_indices_cuda"], [10, 1, 1, "_CPPv428linearize_cache_indices_cudaN2at6TensorEN2at6TensorEN2at6TensorE", "linearize_cache_indices_cuda::cache_hash_size_cumsum"], [10, 1, 1, "_CPPv428linearize_cache_indices_cudaN2at6TensorEN2at6TensorEN2at6TensorE", "linearize_cache_indices_cuda::indices"], [10, 1, 1, "_CPPv428linearize_cache_indices_cudaN2at6TensorEN2at6TensorEN2at6TensorE", "linearize_cache_indices_cuda::offsets"], [10, 0, 1, "_CPPv441linearize_cache_indices_from_row_idx_cudaN2at6TensorEN2at6TensorEN2at6TensorE", "linearize_cache_indices_from_row_idx_cuda"], [10, 1, 1, "_CPPv441linearize_cache_indices_from_row_idx_cudaN2at6TensorEN2at6TensorEN2at6TensorE", "linearize_cache_indices_from_row_idx_cuda::cache_hash_size_cumsum"], [10, 1, 1, "_CPPv441linearize_cache_indices_from_row_idx_cudaN2at6TensorEN2at6TensorEN2at6TensorE", "linearize_cache_indices_from_row_idx_cuda::update_row_indices"], [10, 1, 1, "_CPPv441linearize_cache_indices_from_row_idx_cudaN2at6TensorEN2at6TensorEN2at6TensorE", "linearize_cache_indices_from_row_idx_cuda::update_table_indices"], [10, 0, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE", "lru_cache_find_uncached_cuda"], [10, 1, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE", "lru_cache_find_uncached_cuda::gather_cache_stats"], [10, 1, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE", "lru_cache_find_uncached_cuda::lock_cache_line"], [10, 1, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE", "lru_cache_find_uncached_cuda::lru_state"], [10, 1, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE", "lru_cache_find_uncached_cuda::lxu_cache_locking_counter"], [10, 1, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE", "lru_cache_find_uncached_cuda::lxu_cache_state"], [10, 1, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE", "lru_cache_find_uncached_cuda::max_indices"], [10, 1, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE", "lru_cache_find_uncached_cuda::time_stamp"], [10, 1, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE", "lru_cache_find_uncached_cuda::unique_indices"], [10, 1, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE", "lru_cache_find_uncached_cuda::unique_indices_length"], [10, 1, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE", "lru_cache_find_uncached_cuda::uvm_cache_stats"], [10, 0, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda"], [10, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::D_offsets"], [10, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::cache_index_table_map"], [10, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::gather_cache_stats"], [10, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::hash_size_cumsum"], [10, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::linear_cache_indices"], [10, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::lru_state"], [10, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::lxu_cache_state"], [10, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::lxu_cache_weights"], [10, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::row_alignment"], [10, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::time_stamp"], [10, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::total_cache_hash_size"], [10, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::uvm_cache_stats"], [10, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::weights"], [10, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::weights_offsets"], [10, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::weights_tys"], [10, 0, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda"], [10, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::D_offsets"], [10, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::cache_index_table_map"], [10, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::gather_cache_stats"], [10, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::hash_size_cumsum"], [10, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::linear_cache_indices"], [10, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::lock_cache_line"], [10, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::lru_state"], [10, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::lxu_cache_locking_counter"], [10, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::lxu_cache_state"], [10, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::lxu_cache_weights"], [10, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::stochastic_rounding"], [10, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::time_stamp"], [10, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::total_cache_hash_size"], [10, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::uvm_cache_stats"], [10, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::weights"], [10, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::weights_offsets"], [10, 0, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda"], [10, 1, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::D_offsets"], [10, 1, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::cache_hash_size_cumsum"], [10, 1, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::cache_index_table_map"], [10, 1, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::lxu_cache_state"], [10, 1, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::lxu_cache_weights"], [10, 1, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::stochastic_rounding"], [10, 1, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::total_D"], [10, 1, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::uvm_weights"], [10, 1, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::weights_offsets"], [10, 0, 1, "_CPPv431lxu_cache_locations_update_cudaN2at6TensorEN2at6TensorEN3c108optionalIN2at6TensorEEE", "lxu_cache_locations_update_cuda"], [10, 1, 1, "_CPPv431lxu_cache_locations_update_cudaN2at6TensorEN2at6TensorEN3c108optionalIN2at6TensorEEE", "lxu_cache_locations_update_cuda::lxu_cache_locations"], [10, 1, 1, "_CPPv431lxu_cache_locations_update_cudaN2at6TensorEN2at6TensorEN3c108optionalIN2at6TensorEEE", "lxu_cache_locations_update_cuda::lxu_cache_locations_new"], [10, 1, 1, "_CPPv431lxu_cache_locations_update_cudaN2at6TensorEN2at6TensorEN3c108optionalIN2at6TensorEEE", "lxu_cache_locations_update_cuda::num_uniq_cache_indices"], [10, 0, 1, "_CPPv440lxu_cache_locking_counter_decrement_cudaN2at6TensorEN2at6TensorE", "lxu_cache_locking_counter_decrement_cuda"], [10, 1, 1, "_CPPv440lxu_cache_locking_counter_decrement_cudaN2at6TensorEN2at6TensorE", "lxu_cache_locking_counter_decrement_cuda::lxu_cache_locations"], [10, 1, 1, "_CPPv440lxu_cache_locking_counter_decrement_cudaN2at6TensorEN2at6TensorE", "lxu_cache_locking_counter_decrement_cuda::lxu_cache_locking_counter"], [10, 0, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda"], [10, 1, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::gather_cache_stats"], [10, 1, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::invalid_index"], [10, 1, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::linear_cache_indices"], [10, 1, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::lxu_cache_locations_output"], [10, 1, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::lxu_cache_state"], [10, 1, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::num_uniq_cache_indices"], [10, 1, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::uvm_cache_stats"], [6, 0, 1, "_CPPv422new_host_mapped_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_host_mapped_tensor"], [6, 1, 1, "_CPPv422new_host_mapped_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_host_mapped_tensor::self"], [6, 1, 1, "_CPPv422new_host_mapped_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_host_mapped_tensor::sizes"], [6, 0, 1, "_CPPv418new_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_managed_tensor"], [6, 1, 1, "_CPPv418new_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_managed_tensor::self"], [6, 1, 1, "_CPPv418new_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_managed_tensor::sizes"], [6, 0, 1, "_CPPv423new_managed_tensor_metaRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_managed_tensor_meta"], [6, 1, 1, "_CPPv423new_managed_tensor_metaRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_managed_tensor_meta::self"], [6, 1, 1, "_CPPv423new_managed_tensor_metaRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_managed_tensor_meta::sizes"], [6, 0, 1, "_CPPv418new_unified_tensorRK6TensorRKNSt6vectorINSt7int64_tEEEb", "new_unified_tensor"], [6, 1, 1, "_CPPv418new_unified_tensorRK6TensorRKNSt6vectorINSt7int64_tEEEb", "new_unified_tensor::is_host_mapped"], [6, 1, 1, "_CPPv418new_unified_tensorRK6TensorRKNSt6vectorINSt7int64_tEEEb", "new_unified_tensor::self"], [6, 1, 1, "_CPPv418new_unified_tensorRK6TensorRKNSt6vectorINSt7int64_tEEEb", "new_unified_tensor::sizes"], [6, 0, 1, "_CPPv426new_vanilla_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_vanilla_managed_tensor"], [6, 1, 1, "_CPPv426new_vanilla_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_vanilla_managed_tensor::self"], [6, 1, 1, "_CPPv426new_vanilla_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_vanilla_managed_tensor::sizes"], [3, 0, 1, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t", "padding_fused_tbe_input_combine_cpu"], [3, 1, 1, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t", "padding_fused_tbe_input_combine_cpu::batch_size"], [3, 1, 1, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t", "padding_fused_tbe_input_combine_cpu::include_last_offsets"], [3, 1, 1, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t", "padding_fused_tbe_input_combine_cpu::indices_list"], [3, 1, 1, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t", "padding_fused_tbe_input_combine_cpu::offsets_list"], [3, 1, 1, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t", "padding_fused_tbe_input_combine_cpu::per_sample_weights"], [7, 0, 1, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad"], [7, 1, 1, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad::inv_offset_dim_list"], [7, 1, 1, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad::inv_permute_list"], [7, 1, 1, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad::offset_dim_list"], [7, 1, 1, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad::permute_list"], [7, 1, 1, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad::pooled_embs"], [7, 0, 1, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_cpu"], [7, 1, 1, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_cpu::inv_offset_dim_list"], [7, 1, 1, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_cpu::inv_permute_list"], [7, 1, 1, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_cpu::offset_dim_list"], [7, 1, 1, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_cpu::permute_list"], [7, 1, 1, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_cpu::pooled_embs"], [7, 0, 1, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_gpu"], [7, 1, 1, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_gpu::inv_offset_dim_list"], [7, 1, 1, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_gpu::inv_permute_list"], [7, 1, 1, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_gpu::offset_dim_list"], [7, 1, 1, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_gpu::permute_list"], [7, 1, 1, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_gpu::pooled_embs"], [7, 0, 1, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_cpu"], [7, 1, 1, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_cpu::inv_offset_dim_list"], [7, 1, 1, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_cpu::inv_permute_list"], [7, 1, 1, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_cpu::offset_dim_list"], [7, 1, 1, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_cpu::permute_list"], [7, 1, 1, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_cpu::pooled_embs"], [7, 0, 1, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_gpu"], [7, 1, 1, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_gpu::inv_offset_dim_list"], [7, 1, 1, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_gpu::inv_permute_list"], [7, 1, 1, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_gpu::offset_dim_list"], [7, 1, 1, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_gpu::permute_list"], [7, 1, 1, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_gpu::pooled_embs"], [7, 0, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl"], [7, 1, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl::allow_duplicates"], [7, 1, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl::inv_offset_dim_list"], [7, 1, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl::inv_permute_list"], [7, 1, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl::offset_dim_list"], [7, 1, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl::permute_list"], [7, 1, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl::pooled_embs"], [7, 0, 1, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_cpu"], [7, 1, 1, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_cpu::inv_offset_dim_list"], [7, 1, 1, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_cpu::inv_permute_list"], [7, 1, 1, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_cpu::offset_dim_list"], [7, 1, 1, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_cpu::permute_list"], [7, 1, 1, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_cpu::pooled_embs"], [7, 0, 1, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_gpu"], [7, 1, 1, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_gpu::inv_offset_dim_list"], [7, 1, 1, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_gpu::inv_permute_list"], [7, 1, 1, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_gpu::offset_dim_list"], [7, 1, 1, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_gpu::permute_list"], [7, 1, 1, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_gpu::pooled_embs"], [2, 0, 1, "_CPPv423pruned_array_lookup_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cpu"], [2, 1, 1, "_CPPv423pruned_array_lookup_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cpu::index_remappings"], [2, 1, 1, "_CPPv423pruned_array_lookup_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cpu::index_remappings_offsets"], [2, 1, 1, "_CPPv423pruned_array_lookup_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cpu::indices"], [2, 1, 1, "_CPPv423pruned_array_lookup_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cpu::offsets"], [2, 0, 1, "_CPPv424pruned_array_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cuda"], [2, 1, 1, "_CPPv424pruned_array_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cuda::index_remappings"], [2, 1, 1, "_CPPv424pruned_array_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cuda::index_remappings_offsets"], [2, 1, 1, "_CPPv424pruned_array_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cuda::indices"], [2, 1, 1, "_CPPv424pruned_array_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cuda::offsets"], [2, 0, 1, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_insert_unweighted_cpu"], [2, 1, 1, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_insert_unweighted_cpu::dense_indices"], [2, 1, 1, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_insert_unweighted_cpu::hash_table"], [2, 1, 1, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_insert_unweighted_cpu::hash_table_offsets"], [2, 1, 1, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_insert_unweighted_cpu::indices"], [2, 1, 1, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_insert_unweighted_cpu::offsets"], [2, 0, 1, "_CPPv426pruned_hashmap_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_cuda"], [2, 1, 1, "_CPPv426pruned_hashmap_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_cuda::hash_table"], [2, 1, 1, "_CPPv426pruned_hashmap_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_cuda::hash_table_offsets"], [2, 1, 1, "_CPPv426pruned_hashmap_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_cuda::indices"], [2, 1, 1, "_CPPv426pruned_hashmap_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_cuda::offsets"], [2, 0, 1, "_CPPv436pruned_hashmap_lookup_unweighted_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_unweighted_cpu"], [2, 1, 1, "_CPPv436pruned_hashmap_lookup_unweighted_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_unweighted_cpu::hash_table"], [2, 1, 1, "_CPPv436pruned_hashmap_lookup_unweighted_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_unweighted_cpu::hash_table_offsets"], [2, 1, 1, "_CPPv436pruned_hashmap_lookup_unweighted_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_unweighted_cpu::indices"], [2, 1, 1, "_CPPv436pruned_hashmap_lookup_unweighted_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_unweighted_cpu::offsets"], [5, 0, 1, "_CPPv432recat_embedding_grad_output_cuda6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_cuda"], [5, 1, 1, "_CPPv432recat_embedding_grad_output_cuda6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_cuda::grad_output"], [5, 1, 1, "_CPPv432recat_embedding_grad_output_cuda6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_cuda::num_features_per_rank"], [5, 0, 1, "_CPPv446recat_embedding_grad_output_mixed_D_batch_cudaRK6TensorRK6TensorRK6Tensor", "recat_embedding_grad_output_mixed_D_batch_cuda"], [5, 1, 1, "_CPPv446recat_embedding_grad_output_mixed_D_batch_cudaRK6TensorRK6TensorRK6Tensor", "recat_embedding_grad_output_mixed_D_batch_cuda::cumsum_dim_sum_per_rank"], [5, 1, 1, "_CPPv446recat_embedding_grad_output_mixed_D_batch_cudaRK6TensorRK6TensorRK6Tensor", "recat_embedding_grad_output_mixed_D_batch_cuda::dim_sum_per_rank"], [5, 1, 1, "_CPPv446recat_embedding_grad_output_mixed_D_batch_cudaRK6TensorRK6TensorRK6Tensor", "recat_embedding_grad_output_mixed_D_batch_cuda::grad_output"], [5, 0, 1, "_CPPv439recat_embedding_grad_output_mixed_D_cpuRK6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_mixed_D_cpu"], [5, 1, 1, "_CPPv439recat_embedding_grad_output_mixed_D_cpuRK6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_mixed_D_cpu::dim_sum_per_rank"], [5, 1, 1, "_CPPv439recat_embedding_grad_output_mixed_D_cpuRK6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_mixed_D_cpu::grad_output"], [5, 0, 1, "_CPPv440recat_embedding_grad_output_mixed_D_cudaRK6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_mixed_D_cuda"], [5, 1, 1, "_CPPv440recat_embedding_grad_output_mixed_D_cudaRK6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_mixed_D_cuda::dim_sum_per_rank"], [5, 1, 1, "_CPPv440recat_embedding_grad_output_mixed_D_cudaRK6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_mixed_D_cuda::grad_output"], [0, 0, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::A_SYMMETRIC"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::BIAS_TYPE"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::B_SYMMETRIC"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::DIRECT"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::FUSE_RELU"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::HAS_BIAS"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::Q_GRAN"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::block"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::inp"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::ld_in"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::ld_out"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::out"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::r"], [0, 0, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::A_SYMMETRIC"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::BIAS_TYPE"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::B_SYMMETRIC"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::C_PER_G"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::FUSE_RELU"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::HAS_BIAS"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::Q_GRAN"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::block"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::inp"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::ld_in"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::ld_out"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::out"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::r"], [10, 0, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda"], [10, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::D_offsets"], [10, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::buffer_ids"], [10, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::cache_hash_size_cumsum"], [10, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::dev_weights"], [10, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::logical_table_ids"], [10, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::lxu_cache_state"], [10, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::lxu_cache_weights"], [10, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::momentum1_dev"], [10, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::momentum1_offsets"], [10, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::momentum1_placements"], [10, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::momentum1_uvm"], [10, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::pruned_indices"], [10, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::pruned_indices_offsets"], [10, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::total_cache_hash_size"], [10, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::uvm_weights"], [10, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::weights_offsets"], [10, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::weights_placements"], [3, 0, 1, "_CPPv421tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE", "tbe_input_combine_cpu"], [3, 1, 1, "_CPPv421tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE", "tbe_input_combine_cpu::include_last_offsets"], [3, 1, 1, "_CPPv421tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE", "tbe_input_combine_cpu::indices_list"], [3, 1, 1, "_CPPv421tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE", "tbe_input_combine_cpu::offsets_list"], [3, 1, 1, "_CPPv421tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE", "tbe_input_combine_cpu::per_sample_weights"], [6, 0, 1, "_CPPv419uvm_cuda_mem_adviseRK6Tensor7int64_t", "uvm_cuda_mem_advise"], [6, 1, 1, "_CPPv419uvm_cuda_mem_adviseRK6Tensor7int64_t", "uvm_cuda_mem_advise::cuda_memory_advise"], [6, 1, 1, "_CPPv419uvm_cuda_mem_adviseRK6Tensor7int64_t", "uvm_cuda_mem_advise::self"], [6, 0, 1, "_CPPv427uvm_cuda_mem_prefetch_asyncRK6TensorN3c108optionalI6TensorEE", "uvm_cuda_mem_prefetch_async"], [6, 1, 1, "_CPPv427uvm_cuda_mem_prefetch_asyncRK6TensorN3c108optionalI6TensorEE", "uvm_cuda_mem_prefetch_async::device_t"], [6, 1, 1, "_CPPv427uvm_cuda_mem_prefetch_asyncRK6TensorN3c108optionalI6TensorEE", "uvm_cuda_mem_prefetch_async::self"], [6, 0, 1, "_CPPv424uvm_mem_advice_dont_forkRK6Tensor", "uvm_mem_advice_dont_fork"], [6, 1, 1, "_CPPv424uvm_mem_advice_dont_forkRK6Tensor", "uvm_mem_advice_dont_fork::self"], [6, 0, 1, "_CPPv411uvm_storageRK6Tensor", "uvm_storage"], [6, 1, 1, "_CPPv411uvm_storageRK6Tensor", "uvm_storage::self"], [6, 0, 1, "_CPPv410uvm_to_cpuRK6Tensor", "uvm_to_cpu"], [6, 1, 1, "_CPPv410uvm_to_cpuRK6Tensor", "uvm_to_cpu::self"], [6, 0, 1, "_CPPv416uvm_to_cpu_cloneRK6Tensor", "uvm_to_cpu_clone"], [6, 1, 1, "_CPPv416uvm_to_cpu_cloneRK6Tensor", "uvm_to_cpu_clone::self"], [6, 0, 1, "_CPPv413uvm_to_deviceRK6TensorRK6Tensor", "uvm_to_device"], [6, 1, 1, "_CPPv413uvm_to_deviceRK6TensorRK6Tensor", "uvm_to_device::prototype"], [6, 1, 1, "_CPPv413uvm_to_deviceRK6TensorRK6Tensor", "uvm_to_device::self"], [16, 3, 0, "-", "fbgemm_gpu"]], "fbgemm_gpu.docs.examples": [[22, 4, 1, "", "example_method"]], "fbgemm_gpu.split_table_batched_embeddings_ops": [[16, 4, 1, "", "SplitTableBatchedEmbeddingBagsCodegen"]], "torch.ops.fbgemm": [[15, 4, 1, "", "batched_dense_vec_jagged_2d_mul"], [15, 4, 1, "", "dense_to_jagged"], [15, 4, 1, "", "jagged_1d_to_dense"], [15, 4, 1, "", "jagged_2d_to_dense"], [15, 4, 1, "", "jagged_dense_dense_elementwise_add_jagged_output"], [15, 4, 1, "", "jagged_dense_elementwise_add"], [15, 4, 1, "", "jagged_dense_elementwise_add_jagged_output"], [15, 4, 1, "", "jagged_dense_elementwise_mul"], [15, 4, 1, "", "jagged_to_padded_dense"], [15, 4, 1, "", "stacked_jagged_1d_to_dense"], [15, 4, 1, "", "stacked_jagged_2d_to_dense"]]}, "objtypes": {"0": "cpp:function", "1": "cpp:functionParam", "2": "cpp:templateParam", "3": "py:module", "4": "py:function"}, "objnames": {"0": ["cpp", "function", "C++ function"], "1": ["cpp", "functionParam", "C++ function parameter"], "2": ["cpp", "templateParam", "C++ template parameter"], "3": ["py", "module", "Python module"], "4": ["py", "function", "Python function"]}, "titleterms": {"quantiz": [0, 8], "util": 0, "refer": [0, 23], "implement": 0, "method": 0, "avx": 0, "2": 0, "512": 0, "build": [1, 11, 21], "instruct": [1, 11, 12], "fbgemm": [1, 24], "requir": 1, "hardwar": 1, "softwar": 1, "depend": 1, "asmjit": 1, "cpuinfo": 1, "googletest": 1, "set": [1, 11, 12, 21], "up": [1, 11, 12, 21], "an": [1, 11], "isol": [1, 11], "environ": [1, 11, 12, 21], "instal": [1, 11, 12], "tool": [1, 11], "c": [1, 11, 20, 24], "compil": [1, 11], "other": [1, 11, 23], "librari": [1, 12], "prepar": [1, 11], "linux": 1, "maco": 1, "cmake": 1, "bazel": 1, "window": 1, "embed": [2, 7, 10, 16], "oper": [2, 3, 4, 5, 6, 7, 8, 9, 10, 14, 15, 16], "cuda": [2, 4, 5, 6, 8, 9, 11, 12, 13], "cpu": [2, 4, 5, 8, 9, 11, 12], "combin": [3, 14], "input": 3, "jag": [4, 14, 15], "tensor": [4, 14, 15], "layout": 5, "transform": 5, "memori": 6, "pool": 7, "merg": 7, "permut": 7, "spars": 9, "data": 9, "tabl": [10, 16], "batch": [10, 16], "miniconda": 11, "conda": [11, 12], "onli": [11, 12], "docker": [11, 12], "imag": 11, "cudnn": 11, "rocm": [11, 12, 13], "miopen": 11, "pytorch": [11, 12], "through": [11, 12], "pip": [11, 12], "post": [11, 12], "check": [11, 12], "fbgemm_gpu": [11, 12, 13, 21, 24], "packag": [11, 12], "The": 11, "process": 11, "wheel": 11, "variabl": 11, "For": 11, "develop": [11, 24], "undefin": [11, 12], "symbol": [11, 12], "glibc": 11, "version": 11, "compat": 11, "nvidia": 12, "driver": 12, "contain": 12, "runtim": 12, "amdgpu": 12, "python": [12, 22, 24], "public": 12, "pypi": 12, "test": 13, "variant": 13, "benchmark": 13, "high": 14, "level": 14, "overview": [14, 24], "format": 14, "valu": 14, "offset": 14, "max": 14, "length": 14, "exampl": 14, "arithmet": 14, "convers": 14, "dens": 14, "tbe": 16, "contact": 17, "u": 17, "github": 17, "slack": 17, "contribut": 18, "code": [18, 20, 22, 23], "conduct": 18, "pull": 18, "request": 18, "contributor": 18, "licens": [18, 19], "agreement": 18, "cla": 18, "issu": 18, "ad": [20, 22, 23], "document": [20, 21, 22, 23, 24], "gener": [21, 22, 24], "guidelin": 21, "specif": 21, "guid": 21, "toolchain": 21, "lint": 21, "deploy": 21, "preview": 21, "todo": 22, "auto": 22, "sphinx": 23, "pointer": 23, "section": 23, "referenc": 23, "sourc": 23, "latex": 23, "graph": 23, "homepag": 24, "info": 24, "api": 24}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx.ext.todo": 2, "sphinx.ext.viewcode": 1, "sphinx": 57}, "alltitles": {"Jagged Tensor Operators": [[14, "jagged-tensor-operators"], [15, "jagged-tensor-operators"], [4, "jagged-tensor-operators"]], "High Level Overview": [[14, "high-level-overview"]], "Jagged Tensor Format": [[14, "jagged-tensor-format"]], "Values": [[14, "values"]], "Offsets": [[14, "offsets"]], "Max Lengths": [[14, "max-lengths"]], "Jagged Tensor Example": [[14, "jagged-tensor-example"]], "Jagged Tensor Operations": [[14, "jagged-tensor-operations"]], "Arithmetic Operations": [[14, "arithmetic-operations"]], "Conversion Operations": [[14, "conversion-operations"]], "Jagged to Dense": [[14, "jagged-to-dense"]], "Dense to Jagged": [[14, "dense-to-jagged"]], "Combined Arithmetic + Conversion Operations": [[14, "combined-arithmetic-conversion-operations"]], "Installation Instructions": [[12, "installation-instructions"]], "Set Up CPU-Only Environment": [[12, "set-up-cpu-only-environment"]], "Set Up CUDA Environment": [[12, "set-up-cuda-environment"]], "Install NVIDIA Drivers": [[12, "install-nvidia-drivers"]], "Set Up the CUDA Docker Container and Conda Environment": [[12, "set-up-the-cuda-docker-container-and-conda-environment"]], "Install the CUDA Runtime": [[12, "install-the-cuda-runtime"]], "Set Up ROCm Environment": [[12, "set-up-rocm-environment"]], "Install AMDGPU Drivers": [[12, "install-amdgpu-drivers"]], "Set Up the ROCm Docker Container and Conda Environment": [[12, "set-up-the-rocm-docker-container-and-conda-environment"]], "Install Python Libraries": [[12, "install-python-libraries"]], "Install PyTorch": [[12, "install-pytorch"], [11, "install-pytorch"]], "Install the FBGEMM_GPU Package": [[12, "install-the-fbgemm-gpu-package"]], "Install through PyTorch PIP": [[12, "install-through-pytorch-pip"]], "Install through Public PyPI": [[12, "install-through-public-pypi"]], "Post-Installation Checks": [[12, "post-installation-checks"]], "Undefined Symbols": [[12, "undefined-symbols"]], "Testing FBGEMM_GPU": [[13, "testing-fbgemm-gpu"]], "FBGEMM_GPU Tests": [[13, "fbgemm-gpu-tests"]], "Testing with the CUDA Variant": [[13, "testing-with-the-cuda-variant"]], "Testing with the ROCm Variant": [[13, "testing-with-the-rocm-variant"]], "FBGEMM_GPU Benchmarks": [[13, "fbgemm-gpu-benchmarks"]], "Sphinx Documentation Pointers": [[23, "sphinx-documentation-pointers"]], "References Other Sections of the Documentation": [[23, "references-other-sections-of-the-documentation"]], "Referencing the Source Code": [[23, "referencing-the-source-code"]], "Adding LaTeX": [[23, "adding-latex"]], "Adding Graphs": [[23, "adding-graphs"]], "Adding Documentation to Python Code": [[22, "adding-documentation-to-python-code"]], "Todo": [[22, "id1"]], "Adding Documentation to Auto-Generated Python Code": [[22, "adding-documentation-to-auto-generated-python-code"]], "Documentation": [[21, "documentation"]], "General Documentation Guidelines": [[21, "general-documentation-guidelines"]], "Specific Documentation Guides": [[21, "specific-documentation-guides"]], "Building the Documentation": [[21, "building-the-documentation"]], "Set Up Build Environment": [[21, "set-up-build-environment"]], "Build FBGEMM_GPU": [[21, "build-fbgemm-gpu"]], "Set Up the Documentation Toolchain": [[21, "set-up-the-documentation-toolchain"]], "Build the Documentation": [[21, "build-the-documentation"]], "Linting the Documentation": [[21, "linting-the-documentation"]], "Deployment Preview": [[21, "deployment-preview"]], "Table Batched Embedding (TBE) Operators": [[16, "module-fbgemm_gpu"]], "Contact Us": [[17, "contact-us"]], "GitHub": [[17, "github"]], "Slack": [[17, "slack"]], "FBGEMM and FBGEMM_GPU Documentation Homepage": [[24, "fbgemm-and-fbgemm-gpu-documentation-homepage"]], "General Info": [[24, null]], "FBGEMM Development": [[24, null]], "FBGEMM_GPU Development": [[24, null]], "FBGEMM_GPU Overview": [[24, null]], "FBGEMM C++ API": [[24, null]], "FBGEMM_GPU C++ API": [[24, null]], "FBGEMM_GPU Python API": [[24, null]], "Combine Input Operators": [[3, "combine-input-operators"]], "Layout Transformation Operators": [[5, "layout-transformation-operators"]], "CUDA Operators": [[5, "cuda-operators"], [4, "cuda-operators"], [9, "cuda-operators"], [8, "cuda-operators"], [2, "cuda-operators"]], "CPU Operators": [[5, "cpu-operators"], [4, "cpu-operators"], [9, "cpu-operators"], [8, "cpu-operators"], [2, "cpu-operators"]], "Table Batched Embedding Operators": [[10, "table-batched-embedding-operators"]], "Sparse Data Operators": [[9, "sparse-data-operators"]], "Build Instructions": [[11, "build-instructions"], [1, "build-instructions"]], "Set Up an Isolated Build Environment": [[11, "set-up-an-isolated-build-environment"], [1, "set-up-an-isolated-build-environment"]], "Install Miniconda": [[11, "install-miniconda"]], "Set Up the Conda Environment": [[11, "set-up-the-conda-environment"]], "Set Up for CPU-Only Build": [[11, "set-up-for-cpu-only-build"]], "Set Up for CUDA Build": [[11, "set-up-for-cuda-build"]], "CUDA Docker Image": [[11, "cuda-docker-image"]], "Install CUDA": [[11, "install-cuda"]], "Install cuDNN": [[11, "install-cudnn"]], "Set Up for ROCm Build": [[11, "set-up-for-rocm-build"]], "ROCm Docker Image": [[11, "rocm-docker-image"]], "Install ROCm": [[11, "install-rocm"]], "Install MIOpen": [[11, "install-miopen"]], "Install the Build Tools": [[11, "install-the-build-tools"], [1, "install-the-build-tools"]], "C/C++ Compiler": [[11, "c-c-compiler"], [1, "c-c-compiler"]], "Other Build Tools": [[11, "other-build-tools"], [1, "other-build-tools"]], "Installation Through Conda": [[11, "installation-through-conda"]], "Installation Through PyTorch PIP": [[11, "installation-through-pytorch-pip"]], "Post-Install Checks": [[11, "post-install-checks"]], "Build the FBGEMM_GPU Package": [[11, "build-the-fbgemm-gpu-package"]], "Preparing the Build": [[11, "preparing-the-build"], [1, "preparing-the-build"]], "The Build Process": [[11, "the-build-process"]], "Set Wheel Build Variables": [[11, "set-wheel-build-variables"]], "CPU-Only Build": [[11, "cpu-only-build"]], "CUDA Build": [[11, "cuda-build"]], "ROCm Build": [[11, "rocm-build"]], "Post-Build Checks (For Developers)": [[11, "post-build-checks-for-developers"]], "Undefined Symbols Check": [[11, "undefined-symbols-check"]], "GLIBC Version Compatibility Check": [[11, "glibc-version-compatibility-check"]], "Contributing": [[18, "contributing"]], "Code of Conduct": [[18, "code-of-conduct"]], "Pull Requests": [[18, "pull-requests"]], "Contributor License Agreement (\u201cCLA\u201d)": [[18, "contributor-license-agreement-cla"]], "Issues": [[18, "issues"]], "License": [[18, "license"], [19, "license"]], "Adding Documentation to C++ Code": [[20, "adding-documentation-to-c-code"]], "Pooled Embeddings Operators": [[7, "pooled-embeddings-operators"]], "Merge Operators": [[7, "merge-operators"]], "Permutation Operators": [[7, "permutation-operators"]], "CUDA Memory Operators": [[6, "cuda-memory-operators"]], "Quantization Operators": [[8, "quantization-operators"]], "Quantization Utilities": [[0, "quantization-utilities"]], "Reference Implementation Methods": [[0, "reference-implementation-methods"]], "AVX-2 Implementation Methods": [[0, "avx-2-implementation-methods"]], "AVX-512 Implementation Methods": [[0, "avx-512-implementation-methods"]], "Embedding Operators": [[2, "embedding-operators"]], "FBGEMM Requirements": [[1, "fbgemm-requirements"]], "Hardware Requirements": [[1, "hardware-requirements"]], "Software Dependencies": [[1, "software-dependencies"]], "asmjit": [[1, "asmjit"]], "cpuinfo": [[1, "cpuinfo"]], "GoogleTest": [[1, "googletest"]], "Build the FBGEMM Library": [[1, "build-the-fbgemm-library"]], "Building on Linux and macOS (CMake)": [[1, "building-on-linux-and-macos-cmake"]], "Building on Linux (Bazel)": [[1, "building-on-linux-bazel"]], "Building on Windows": [[1, "building-on-windows"]]}, "indexentries": {"findminmax (c++ function)": [[0, "_CPPv410FindMinMaxPKfPfPf7int64_t"]], "floatorhalftofusednbitrowwisequantizedsbhalf (c++ function)": [[0, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE"]], "fusedquantizedequantize (c++ function)": [[0, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif"]], "quantizegroupwise (c++ function)": [[0, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T"]], "xor128 (c++ function)": [[0, "_CPPv46Xor128v"]], "requantizeoutputprocessingavx2 (c++ function)": [[0, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE"]], "requantizeoutputprocessinggconvavx512 (c++ function)": [[0, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE"]], "bounds_check_indices_cuda (c++ function)": [[2, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKN3c108optionalI6TensorEERKN3c108optionalI6TensorEEK7int64_t"]], "int_nbit_split_embedding_codegen_lookup_function (c++ function)": [[2, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE"]], "int_nbit_split_embedding_codegen_lookup_function_cpu (c++ function)": [[2, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE"]], "int_nbit_split_embedding_uvm_caching_codegen_lookup_function (c++ function)": [[2, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE"]], "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu (c++ function)": [[2, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE"]], "pruned_array_lookup_cpu (c++ function)": [[2, "_CPPv423pruned_array_lookup_cpu6Tensor6Tensor6Tensor6Tensor"]], "pruned_array_lookup_cuda (c++ function)": [[2, "_CPPv424pruned_array_lookup_cuda6Tensor6Tensor6Tensor6Tensor"]], "pruned_hashmap_insert_unweighted_cpu (c++ function)": [[2, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor"]], "pruned_hashmap_lookup_cuda (c++ function)": [[2, "_CPPv426pruned_hashmap_lookup_cuda6Tensor6Tensor6Tensor6Tensor"]], "pruned_hashmap_lookup_unweighted_cpu (c++ function)": [[2, "_CPPv436pruned_hashmap_lookup_unweighted_cpu6Tensor6Tensor6Tensor6Tensor"]], "padding_fused_tbe_input_combine_cpu (c++ function)": [[3, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t"]], "tbe_input_combine_cpu (c++ function)": [[3, "_CPPv421tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE"]], "batched_dense_vec_jagged_2d_mul (c++ function)": [[4, "_CPPv431batched_dense_vec_jagged_2d_mulRK6TensorRK6TensorRK6Tensor"]], "dense_to_jagged (c++ function)": [[4, "_CPPv415dense_to_jaggedRK6TensorRKNSt6vectorI6TensorEEN3c108optionalIN2at6SymIntEEE"]], "jagged_1d_to_dense (c++ function)": [[4, "_CPPv418jagged_1d_to_dense6Tensor6TensorN3c106SymIntE7int64_t"]], "jagged_2d_to_dense (c++ function)": [[4, "_CPPv418jagged_2d_to_dense6Tensor6TensorN3c106SymIntE"]], "jagged_dense_elementwise_add (c++ function)": [[4, "_CPPv428jagged_dense_elementwise_addRK6TensorRKNSt6vectorI6TensorEERK6Tensor"]], "jagged_dense_elementwise_add_jagged_output (c++ function)": [[4, "_CPPv442jagged_dense_elementwise_add_jagged_outputRK6TensorRKNSt6vectorI6TensorEERK6Tensor"]], "jagged_dense_elementwise_add_jagged_output_cuda (c++ function)": [[4, "_CPPv447jagged_dense_elementwise_add_jagged_output_cudaRK6TensorRKNSt6vectorI6TensorEERK6Tensor"]], "jagged_dense_elementwise_mul (c++ function)": [[4, "_CPPv428jagged_dense_elementwise_mulRK6TensorRKNSt6vectorI6TensorEERK6Tensor"]], "jagged_to_padded_dense (c++ function)": [[4, "_CPPv422jagged_to_padded_denseRK6TensorRKNSt6vectorI6TensorEEKN3c1014SymIntArrayRefEKd"]], "jagged_to_padded_dense_forward (c++ function)": [[4, "_CPPv430jagged_to_padded_dense_forwardRK6TensorRKNSt6vectorI6TensorEEN3c1014SymIntArrayRefEKd"]], "recat_embedding_grad_output_cuda (c++ function)": [[5, "_CPPv432recat_embedding_grad_output_cuda6TensorRKNSt6vectorI7int64_tEE"]], "recat_embedding_grad_output_mixed_d_batch_cuda (c++ function)": [[5, "_CPPv446recat_embedding_grad_output_mixed_D_batch_cudaRK6TensorRK6TensorRK6Tensor"]], "recat_embedding_grad_output_mixed_d_cpu (c++ function)": [[5, "_CPPv439recat_embedding_grad_output_mixed_D_cpuRK6TensorRKNSt6vectorI7int64_tEE"]], "recat_embedding_grad_output_mixed_d_cuda (c++ function)": [[5, "_CPPv440recat_embedding_grad_output_mixed_D_cudaRK6TensorRKNSt6vectorI7int64_tEE"]], "is_uvm_tensor (c++ function)": [[6, "_CPPv413is_uvm_tensorRK6Tensor"]], "new_host_mapped_tensor (c++ function)": [[6, "_CPPv422new_host_mapped_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE"]], "new_managed_tensor (c++ function)": [[6, "_CPPv418new_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE"]], "new_managed_tensor_meta (c++ function)": [[6, "_CPPv423new_managed_tensor_metaRK6TensorRKNSt6vectorINSt7int64_tEEE"]], "new_unified_tensor (c++ function)": [[6, "_CPPv418new_unified_tensorRK6TensorRKNSt6vectorINSt7int64_tEEEb"]], "new_vanilla_managed_tensor (c++ function)": [[6, "_CPPv426new_vanilla_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE"]], "uvm_cuda_mem_advise (c++ function)": [[6, "_CPPv419uvm_cuda_mem_adviseRK6Tensor7int64_t"]], "uvm_cuda_mem_prefetch_async (c++ function)": [[6, "_CPPv427uvm_cuda_mem_prefetch_asyncRK6TensorN3c108optionalI6TensorEE"]], "uvm_mem_advice_dont_fork (c++ function)": [[6, "_CPPv424uvm_mem_advice_dont_forkRK6Tensor"]], "uvm_storage (c++ function)": [[6, "_CPPv411uvm_storageRK6Tensor"]], "uvm_to_cpu (c++ function)": [[6, "_CPPv410uvm_to_cpuRK6Tensor"]], "uvm_to_cpu_clone (c++ function)": [[6, "_CPPv416uvm_to_cpu_cloneRK6Tensor"]], "uvm_to_device (c++ function)": [[6, "_CPPv413uvm_to_deviceRK6TensorRK6Tensor"]], "all_to_one_device (c++ function)": [[7, "_CPPv417all_to_one_deviceNSt6vectorIN2at6TensorEEEN2at6DeviceE"]], "permute_pooled_embs_auto_grad (c++ function)": [[7, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor"]], "permute_pooled_embs_auto_grad_cpu (c++ function)": [[7, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor"]], "permute_pooled_embs_auto_grad_gpu (c++ function)": [[7, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor"]], "permute_pooled_embs_auto_grad_split_cpu (c++ function)": [[7, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE"]], "permute_pooled_embs_auto_grad_split_gpu (c++ function)": [[7, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE"]], "permute_pooled_embs_cpu_impl (c++ function)": [[7, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb"]], "permute_pooled_embs_split_cpu (c++ function)": [[7, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE"]], "permute_pooled_embs_split_gpu (c++ function)": [[7, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE"]], "fp8quantizedtofloat_ref (c++ function)": [[8, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi"]], "fp8rowwise_to_float_cpu (c++ function)": [[8, "_CPPv423FP8rowwise_to_float_cpuRK6TensorbK7int64_t"]], "floattofp8quantized_ref (c++ function)": [[8, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd"]], "_fp8rowwise_to_float_gpu (c++ function)": [[8, "_CPPv424_FP8rowwise_to_float_gpuRKN2at6TensorEbK7int64_t"]], "_bfloat16_to_float_gpu (c++ function)": [[8, "_CPPv422_bfloat16_to_float_gpuRKN2at6TensorE"]], "_float_to_fp8rowwise_gpu (c++ function)": [[8, "_CPPv424_float_to_FP8rowwise_gpuRK6TensorKb"]], "_float_to_bfloat16_gpu (c++ function)": [[8, "_CPPv422_float_to_bfloat16_gpuRKN2at6TensorE"]], "_float_to_fused8bitrowwise_cpu_out (c++ function)": [[8, "_CPPv434_float_to_fused8bitrowwise_cpu_outR6TensorRK6Tensor"]], "_float_to_fused8bitrowwise_gpu (c++ function)": [[8, "_CPPv430_float_to_fused8bitrowwise_gpuRK6Tensor"]], "_float_to_fusednbitrowwise_gpu (c++ function)": [[8, "_CPPv430_float_to_fusednbitrowwise_gpuRK6TensorK7int64_t"]], "_float_to_hfp8_gpu (c++ function)": [[8, "_CPPv418_float_to_hfp8_gpuRKN2at6TensorEK7int64_tK7int64_tKd"]], "_float_to_msfp_gpu (c++ function)": [[8, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd"]], "_float_to_paddedfp8rowwise_gpu (c++ function)": [[8, "_CPPv430_float_to_paddedFP8rowwise_gpuRK6TensorKbK7int64_t"]], "_fused8bitrowwise_to_float_cpu_out (c++ function)": [[8, "_CPPv434_fused8bitrowwise_to_float_cpu_outR6TensorRK6Tensor"]], "_fused8bitrowwise_to_float_gpu (c++ function)": [[8, "_CPPv430_fused8bitrowwise_to_float_gpuRKN2at6TensorE"]], "_fused8bitrowwise_to_float_mixed_dim_gpu (c++ function)": [[8, "_CPPv440_fused8bitrowwise_to_float_mixed_dim_gpuRKN2at6TensorERKN2at6TensorEK7int64_t"]], "_fused8bitrowwise_to_half_gpu (c++ function)": [[8, "_CPPv429_fused8bitrowwise_to_half_gpuRKN2at6TensorE"]], "_fused8bitrowwise_to_single_or_half_precision_gpu (c++ function)": [[8, "_CPPv449_fused8bitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tKbKb"]], "_fusednbitrowwise_to_float_gpu (c++ function)": [[8, "_CPPv430_fusednbitrowwise_to_float_gpuRKN2at6TensorEK7int64_t"]], "_fusednbitrowwise_to_half_gpu (c++ function)": [[8, "_CPPv429_fusednbitrowwise_to_half_gpuRKN2at6TensorEK7int64_t"]], "_fusednbitrowwise_to_single_or_half_precision_gpu (c++ function)": [[8, "_CPPv449_fusednbitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tK7int64_t"]], "_half_to_fused8bitrowwise_gpu (c++ function)": [[8, "_CPPv429_half_to_fused8bitrowwise_gpuRK6Tensor"]], "_half_to_fusednbitrowwise_gpu (c++ function)": [[8, "_CPPv429_half_to_fusednbitrowwise_gpuRKN2at6TensorEK7int64_t"]], "_hfp8_to_float_gpu (c++ function)": [[8, "_CPPv418_hfp8_to_float_gpuRKN2at6TensorEK7int64_tK7int64_t"]], "_msfp_to_float_gpu (c++ function)": [[8, "_CPPv418_msfp_to_float_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_t"]], "_paddedfp8rowwise_to_float_gpu (c++ function)": [[8, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t"]], "_single_or_half_precision_to_fused8bitrowwise_gpu (c++ function)": [[8, "_CPPv449_single_or_half_precision_to_fused8bitrowwise_gpuRK6Tensor"]], "_single_or_half_precision_to_fusednbitrowwise_gpu (c++ function)": [[8, "_CPPv449_single_or_half_precision_to_fusednbitrowwise_gpuRK6TensorK7int64_t"]], "float_or_half_to_fused8bitrowwise_cpu (c++ function)": [[8, "_CPPv437float_or_half_to_fused8bitrowwise_cpuRK6Tensor"]], "float_to_fp8rowwise_cpu (c++ function)": [[8, "_CPPv423float_to_FP8rowwise_cpuRK6Tensorb"]], "float_to_fused8bitrowwise_cpu (c++ function)": [[8, "_CPPv429float_to_fused8bitrowwise_cpuRK6Tensor"]], "fused8bitrowwise_to_float_cpu (c++ function)": [[8, "_CPPv429fused8bitrowwise_to_float_cpuRK6Tensor"]], "fused8bitrowwise_to_float_or_half_cpu (c++ function)": [[8, "_CPPv437fused8bitrowwise_to_float_or_half_cpuRK6TensorK7int64_tKbKb"]], "fused8bitrowwise_to_half_cpu (c++ function)": [[8, "_CPPv428fused8bitrowwise_to_half_cpuRK6Tensor"]], "fusednbitrowwise_to_float_cpu (c++ function)": [[8, "_CPPv429fusednbitrowwise_to_float_cpuRK6TensorK7int64_t"]], "fusednbitrowwise_to_float_or_half_cpu (c++ function)": [[8, "_CPPv437fusednbitrowwise_to_float_or_half_cpuRK6TensorK7int64_tK7int64_t"]], "fusednbitrowwise_to_half_cpu (c++ function)": [[8, "_CPPv428fusednbitrowwise_to_half_cpuRK6TensorK7int64_t"]], "half_to_fused8bitrowwise_cpu (c++ function)": [[8, "_CPPv428half_to_fused8bitrowwise_cpuRK6Tensor"]], "expand_into_jagged_permute_cuda (c++ function)": [[9, "_CPPv431expand_into_jagged_permute_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_t"]], "generic_histogram_binning_calibration_by_feature_cpu (c++ function)": [[9, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td"]], "histogram_binning_calibration_cpu (c++ function)": [[9, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td"]], "direct_mapped_lru_cache_populate_byte_cuda (c++ function)": [[10, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE"]], "direct_mapped_lxu_cache_lookup_cuda (c++ function)": [[10, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE"]], "get_unique_indices_cuda (c++ function)": [[10, "_CPPv423get_unique_indices_cudaN2at6TensorE7int64_tb"]], "host_lxu_cache_slot (c++ function)": [[10, "_CPPv419host_lxu_cache_slot7int64_t7int64_t"]], "lfu_cache_populate_byte_cuda (c++ function)": [[10, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t"]], "lfu_cache_populate_cuda (c++ function)": [[10, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb"]], "linearize_cache_indices_cuda (c++ function)": [[10, "_CPPv428linearize_cache_indices_cudaN2at6TensorEN2at6TensorEN2at6TensorE"]], "linearize_cache_indices_from_row_idx_cuda (c++ function)": [[10, "_CPPv441linearize_cache_indices_from_row_idx_cudaN2at6TensorEN2at6TensorEN2at6TensorE"]], "lru_cache_find_uncached_cuda (c++ function)": [[10, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE"]], "lru_cache_populate_byte_cuda (c++ function)": [[10, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE"]], "lru_cache_populate_cuda (c++ function)": [[10, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE"]], "lxu_cache_flush_cuda (c++ function)": [[10, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb"]], "lxu_cache_locations_update_cuda (c++ function)": [[10, "_CPPv431lxu_cache_locations_update_cudaN2at6TensorEN2at6TensorEN3c108optionalIN2at6TensorEEE"]], "lxu_cache_locking_counter_decrement_cuda (c++ function)": [[10, "_CPPv440lxu_cache_locking_counter_decrement_cudaN2at6TensorEN2at6TensorE"]], "lxu_cache_lookup_cuda (c++ function)": [[10, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEE"]], "reset_weight_momentum_cuda (c++ function)": [[10, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t"]], "batched_dense_vec_jagged_2d_mul() (in module torch.ops.fbgemm)": [[15, "torch.ops.fbgemm.batched_dense_vec_jagged_2d_mul"]], "dense_to_jagged() (in module torch.ops.fbgemm)": [[15, "torch.ops.fbgemm.dense_to_jagged"]], "jagged_1d_to_dense() (in module torch.ops.fbgemm)": [[15, "torch.ops.fbgemm.jagged_1d_to_dense"]], "jagged_2d_to_dense() (in module torch.ops.fbgemm)": [[15, "torch.ops.fbgemm.jagged_2d_to_dense"]], "jagged_dense_dense_elementwise_add_jagged_output() (in module torch.ops.fbgemm)": [[15, "torch.ops.fbgemm.jagged_dense_dense_elementwise_add_jagged_output"]], "jagged_dense_elementwise_add() (in module torch.ops.fbgemm)": [[15, "torch.ops.fbgemm.jagged_dense_elementwise_add"]], "jagged_dense_elementwise_add_jagged_output() (in module torch.ops.fbgemm)": [[15, "torch.ops.fbgemm.jagged_dense_elementwise_add_jagged_output"]], "jagged_dense_elementwise_mul() (in module torch.ops.fbgemm)": [[15, "torch.ops.fbgemm.jagged_dense_elementwise_mul"]], "jagged_to_padded_dense() (in module torch.ops.fbgemm)": [[15, "torch.ops.fbgemm.jagged_to_padded_dense"]], "stacked_jagged_1d_to_dense() (in module torch.ops.fbgemm)": [[15, "torch.ops.fbgemm.stacked_jagged_1d_to_dense"]], "stacked_jagged_2d_to_dense() (in module torch.ops.fbgemm)": [[15, "torch.ops.fbgemm.stacked_jagged_2d_to_dense"]], "splittablebatchedembeddingbagscodegen() (in module fbgemm_gpu.split_table_batched_embeddings_ops)": [[16, "fbgemm_gpu.split_table_batched_embeddings_ops.SplitTableBatchedEmbeddingBagsCodegen"]], "fbgemm_gpu": [[16, "module-fbgemm_gpu"]], "module": [[16, "module-fbgemm_gpu"]], "example_method (c++ function)": [[20, "_CPPv4I0_NSt6size_tEE14example_method7int32_t1Tf"]], "example_method() (in module fbgemm_gpu.docs.examples)": [[22, "fbgemm_gpu.docs.examples.example_method"]]}}) \ No newline at end of file +Search.setIndex({"docnames": ["fbgemm-cpp-api/QuantUtils", "fbgemm-development/BuildInstructions", "fbgemm_gpu-cpp-api/embedding_ops", "fbgemm_gpu-cpp-api/input_combine", "fbgemm_gpu-cpp-api/jagged_tensor_ops", "fbgemm_gpu-cpp-api/layout_transform_ops", "fbgemm_gpu-cpp-api/memory_utils", "fbgemm_gpu-cpp-api/merge_pooled_embeddings", "fbgemm_gpu-cpp-api/quantize_ops", "fbgemm_gpu-cpp-api/sparse_ops", "fbgemm_gpu-cpp-api/split_table_batched_embeddings", "fbgemm_gpu-development/BuildInstructions", "fbgemm_gpu-development/InstallationInstructions", "fbgemm_gpu-development/TestInstructions", "fbgemm_gpu-overview/jagged-tensor-ops/JaggedTensorOps", "fbgemm_gpu-python-api/jagged_tensor_ops", "fbgemm_gpu-python-api/table_batched_embedding_ops", "general/ContactUs", "general/Contributing", "general/License", "general/documentation/Cpp", "general/documentation/Overview", "general/documentation/Python", "general/documentation/Sphinx", "index"], "filenames": ["fbgemm-cpp-api/QuantUtils.rst", "fbgemm-development/BuildInstructions.rst", "fbgemm_gpu-cpp-api/embedding_ops.rst", "fbgemm_gpu-cpp-api/input_combine.rst", "fbgemm_gpu-cpp-api/jagged_tensor_ops.rst", "fbgemm_gpu-cpp-api/layout_transform_ops.rst", "fbgemm_gpu-cpp-api/memory_utils.rst", "fbgemm_gpu-cpp-api/merge_pooled_embeddings.rst", "fbgemm_gpu-cpp-api/quantize_ops.rst", "fbgemm_gpu-cpp-api/sparse_ops.rst", "fbgemm_gpu-cpp-api/split_table_batched_embeddings.rst", "fbgemm_gpu-development/BuildInstructions.rst", "fbgemm_gpu-development/InstallationInstructions.rst", "fbgemm_gpu-development/TestInstructions.rst", "fbgemm_gpu-overview/jagged-tensor-ops/JaggedTensorOps.rst", "fbgemm_gpu-python-api/jagged_tensor_ops.rst", "fbgemm_gpu-python-api/table_batched_embedding_ops.rst", "general/ContactUs.rst", "general/Contributing.rst", "general/License.rst", "general/documentation/Cpp.rst", "general/documentation/Overview.rst", "general/documentation/Python.rst", "general/documentation/Sphinx.rst", "index.rst"], "titles": ["Quantization Utilities", "Build Instructions", "Embedding Operators", "Combine Input Operators", "Jagged Tensor Operators", "Layout Transformation Operators", "CUDA Memory Operators", "Pooled Embeddings Operators", "Quantization Operators", "Sparse Data Operators", "Table Batched Embedding Operators", "Build Instructions", "Installation Instructions", "Testing FBGEMM_GPU", "Jagged Tensor Operators", "Jagged Tensor Operators", "Table Batched Embedding (TBE) Operators", "Contact Us", "Contributing", "License", "Adding Documentation to C++ Code", "Documentation", "Adding Documentation to Python Code", "Sphinx Documentation Pointers", "FBGEMM and FBGEMM_GPU Documentation Homepage"], "terms": {"templat": [0, 11, 20], "typenam": [0, 20], "t": [0, 1, 6, 9, 11, 16, 18, 20, 21], "layout_t": 0, "layout": [0, 24], "kcx": 0, "void": [0, 2, 6, 8, 10], "quantizegroupwis": 0, "const": [0, 2, 3, 4, 5, 6, 7, 8, 9, 22], "float": [0, 8, 15, 16, 20, 22], "src": 0, "int": [0, 8, 15, 16, 20, 22], "k": 0, "c": [0, 10, 12, 14, 19, 21, 22, 23], "x": [0, 4, 14, 20, 22], "g": [0, 1, 9, 11, 20, 22], "scale": 0, "std": [0, 3, 4, 5, 6, 7, 9, 10, 20, 22], "int32_t": [0, 20, 22], "zero_point": 0, "dst": 0, "point": [0, 8, 15, 20, 22], "data": [0, 6, 14, 16, 19, 24], "type": [0, 1, 8, 12, 14, 15, 16, 20], "paramet": [0, 6, 8, 9, 15, 16, 20, 21, 22], "output": [0, 4, 8, 9, 15, 16, 20, 22], "int8_t": 0, "uint8_t": [0, 8, 10], "ar": [0, 1, 4, 10, 11, 12, 14, 15, 16, 19, 20, 21, 22], "support": [0, 1, 11, 12, 14, 22, 24], "input": [0, 4, 6, 8, 9, 14, 15, 16, 20, 24], "tensor": [0, 2, 3, 5, 6, 7, 8, 9, 10, 16, 21, 22, 24], "kxc": 0, "correspond": [0, 9, 10, 14, 20, 22], "kcr": 0, "kctr": 0, "weight": [0, 2, 9, 10, 16], "time": [0, 11, 12, 14], "dimens": [0, 4, 6, 9, 14, 15, 16, 22], "krsc": 0, "ktrsc": 0, "channel": [0, 11, 12, 17], "number": [0, 1, 9, 11, 14, 15, 16, 21], "r": [0, 21], "": [0, 1, 6, 11, 13, 14, 18, 20, 21, 22], "group": [0, 14, 20], "function": [0, 1, 11, 20, 22], "perform": [0, 1, 8, 9, 14, 24], "channelwis": 0, "1": [0, 1, 9, 10, 11, 12, 13, 14, 15, 16, 21, 22, 23], "groupwis": 0, "per": [0, 14], "size": [0, 1, 6, 8, 9, 14, 15, 16], "should": [0, 9, 10, 11, 12, 14, 18, 20, 21, 22], "equal": [0, 14, 22], "zero": [0, 15, 22], "reprsent": 0, "fusedquantizedequant": 0, "int64_t": [0, 2, 3, 4, 5, 6, 8, 9, 10], "len": [0, 14], "tensorquantizationparam": 0, "qparam": 0, "thread_id": 0, "0": [0, 1, 8, 9, 10, 11, 12, 14, 15, 16, 22], "num_thread": 0, "noise_ratio": 0, "0f": 0, "fuse": [0, 8, 16], "integ": [0, 6, 8, 14], "dequant": 0, "kernel": [0, 1, 6, 13, 24], "acceler": 0, "awar": 0, "train": [0, 16, 24], "fp32": [0, 8, 16], "valu": [0, 4, 6, 8, 9, 10, 15, 16, 20, 21, 22], "u": [0, 11, 23, 24], "int8": [0, 16], "us": [0, 1, 6, 9, 11, 13, 14, 16, 17, 18, 19, 20, 21, 22, 23, 24], "provid": [0, 1, 11, 12, 13, 19, 20, 21, 22, 24], "back": [0, 6, 10, 11, 12], "inputtyp": 0, "floatorhalftofusednbitrowwisequantizedsbhalf": 0, "bit_rat": [0, 8], "size_t": [0, 8, 20], "input_row": 0, "input_column": 0, "convert": [0, 6, 8, 14, 15, 22], "fp16": [0, 8, 16], "rowwis": [0, 8, 16], "bitrat": 0, "specifi": [0, 1, 8, 9, 11, 15, 16], "bit": [0, 8], "bia": [0, 8], "each": [0, 9, 11, 14, 15, 16, 22], "row": [0, 4, 10, 14, 15, 16, 22], "store": [0, 9, 10], "itself": [0, 14, 21], "end": [0, 12, 14, 23], "can": [0, 1, 8, 9, 11, 12, 14, 20, 21, 22, 23], "4": [0, 1, 11, 12, 14, 15, 16, 22], "8": [0, 1, 8, 11, 14, 16], "uint32_t": 0, "xor128": 0, "random": 0, "gener": [0, 1, 9, 11, 12, 20, 23], "9": [0, 1, 11, 14, 16], "base": [0, 9, 10, 11, 14], "thi": [0, 1, 4, 6, 7, 9, 11, 12, 14, 17, 18, 19, 20, 22, 23, 24], "paper": 0, "findminmax": 0, "m": [0, 11, 12, 13], "min": 0, "max": [0, 16], "find": [0, 10, 11], "matrix": [0, 1, 15, 24], "bool": [0, 6, 7, 8, 10, 16], "a_symmetr": 0, "b_symmetr": 0, "quantizationgranular": 0, "q_gran": 0, "has_bia": 0, "fuse_relu": 0, "bias_typ": 0, "direct": [0, 10, 12, 19, 20, 22, 23], "fals": [0, 6, 16, 21], "requantizeoutputprocessingavx2": 0, "out": [0, 11, 17, 19, 21], "inp": 0, "block_type_t": 0, "block": [0, 20, 22, 23], "ld_out": 0, "ld_in": 0, "requantizationparams_t": 0, "requant": 0, "avx2": [0, 1], "i": [0, 1, 4, 6, 8, 9, 10, 11, 12, 14, 15, 16, 18, 19, 20, 21, 22, 23, 24], "c_per_g": 0, "requantizeoutputprocessinggconvavx512": 0, "avx512": 0, "note": [1, 10, 11, 12, 20, 21, 22, 23], "The": [1, 6, 8, 9, 12, 13, 14, 15, 16, 18, 20, 21, 22, 23], "most": [1, 11, 12, 14, 21], "date": [1, 11, 12, 21], "embed": [1, 11, 12, 21, 24], "script": [1, 11, 12, 21], "bundl": [1, 11, 12, 21], "repo": [1, 11, 12, 21, 22], "under": [1, 11, 12, 18, 19, 21, 22], "setup_env": [1, 11, 12, 21], "bash": [1, 11, 12, 21], "step": [1, 11, 12, 14, 21, 22], "fbgemm_gpu": [1, 6, 14, 16, 17, 18, 19, 20, 22], "follow": [1, 9, 11, 12, 14, 19, 20, 21, 22], "toolchain": [1, 11, 12], "run": [1, 11, 12, 13, 21], "cpu": [1, 6, 7, 13, 21], "higher": 1, "In": [1, 9, 12, 14, 18, 20, 22], "doe": [1, 2, 12, 20, 21, 22], "have": [1, 9, 10, 14, 21], "ani": [1, 9, 15, 18, 19, 21, 22], "intel": 1, "mkl": 1, "howev": [1, 11, 14, 19], "comparison": 1, "some": [1, 11, 14, 21], "benchmark": 1, "If": [1, 11, 12, 16, 18, 20, 21, 22], "found": [1, 11, 12, 21], "path": [1, 11, 20, 23], "through": [1, 18, 20, 22], "intel_mkl_dir": 1, "variabl": 1, "built": [1, 11, 12, 21, 24], "report": [1, 12], "otherwis": [1, 6, 12, 19], "subset": 1, "all": [1, 9, 10, 11, 12, 14, 16, 19, 21], "three": [1, 14], "git": [1, 11], "submodul": [1, 11], "custom": [1, 23], "version": [1, 12], "desir": [1, 11, 14, 15, 20], "thei": [1, 11, 21, 23], "asmjit_src_dir": 1, "cpuinfo_src_dir": 1, "googletest_source_dir": 1, "With": 1, "inner": [1, 14], "take": [1, 11], "one": [1, 8, 9, 10, 15, 16, 20, 22], "doesn": 1, "fit": [1, 19], "approach": 1, "so": [1, 9, 11, 12, 14], "implement": [1, 14], "dynam": 1, "effici": [1, 24], "shape": [1, 14, 16], "specif": [1, 9, 11, 16, 19], "vector": [1, 3, 4, 5, 6, 7, 15, 22], "code": [1, 19, 21], "third": 1, "parti": 1, "call": [1, 6, 12], "detect": [1, 13], "runtim": [1, 11], "pytorch": [1, 14, 17, 21, 22, 24], "project": [1, 18], "dispatch": [1, 6], "optim": [1, 8, 16], "test": [1, 11, 12, 18, 24], "you": [1, 18, 20, 22], "don": [1, 9, 11, 21], "want": [1, 18], "togeth": [1, 20, 21], "default": [1, 9, 11, 12, 16], "turn": [1, 21], "off": [1, 12, 17], "simpli": [1, 11], "fbgemm_build_test": 1, "conda": [1, 21], "For": [1, 6, 13, 14, 17, 19, 20, 21, 22, 23], "platform": [1, 11, 19], "gcc": [1, 11], "17": [1, 11], "sysroot": [1, 11], "packag": [1, 13, 21], "also": [1, 11, 16, 23], "need": [1, 11, 12, 13, 14, 18, 20, 22, 23], "avoid": [1, 11], "issu": [1, 4, 6, 11, 12, 17], "miss": [1, 10, 11], "symbol": 1, "glibcxx": [1, 11], "when": [1, 4, 9, 11, 13, 14, 16, 20, 21, 23], "n": [1, 8, 11, 12, 23], "env_nam": [1, 11, 12], "y": [1, 4, 11, 12, 15, 21], "gxx_linux": [1, 11], "64": [1, 11, 14], "10": [1, 11, 12, 14], "sysroot_linux": [1, 11], "2": [1, 11, 12, 14, 15, 16, 20, 22, 23], "forg": [1, 11, 21], "while": [1, 11, 21], "newer": [1, 11], "binari": [1, 11, 19], "compat": 1, "older": [1, 11, 12], "system": [1, 4, 11, 12, 14], "ubuntu": [1, 11], "20": [1, 11], "04": [1, 11], "cento": [1, 11], "stream": [1, 11], "becaus": [1, 11, 14], "refer": [1, 11, 14, 21, 22], "from": [1, 6, 8, 9, 10, 11, 12, 13, 14, 16, 18, 19, 20, 21, 22, 23], "libstdc": [1, 11], "6": [1, 11, 12, 14], "To": [1, 11, 13, 23], "see": [1, 6, 11, 12, 14, 20, 22, 23], "what": [1, 11, 21], "glibc": 1, "avail": [1, 6, 11, 21], "libcxx_path": [1, 11], "print": [1, 11, 12, 16, 22], "objdump": [1, 11], "tc": [1, 11], "grep": [1, 11], "glibc_": [1, 11], "sed": [1, 11], "sort": [1, 9, 10, 11], "vu": [1, 11], "cat": [1, 11], "glibcxx_": [1, 11], "machin": [1, 11, 12, 13, 24], "microsoft": [1, 8], "visual": 1, "studio": 1, "2019": 1, "recommend": [1, 4, 11, 12, 14], "necessari": [1, 11], "ninja": [1, 11], "etc": [1, 11, 16], "make": [1, 10, 11, 18, 20, 21, 22], "openbla": 1, "dev": [1, 11], "onli": [1, 9, 10, 13, 14, 18, 20, 21, 23], "clone": [1, 11], "along": [1, 11, 12], "its": [1, 6, 9, 11, 16, 19, 21, 23], "insid": [1, 11, 12, 21, 23], "recurs": [1, 11], "http": [1, 11, 12, 18, 20, 21, 22], "github": [1, 11, 18], "com": [1, 11, 18], "cd": [1, 11, 13, 21], "assum": [1, 9], "process": [1, 4, 12, 14, 18, 22], "straightforward": 1, "creat": [1, 6, 11, 14, 18, 20, 22, 23], "directori": [1, 11, 13, 18, 20, 21], "mkdir": 1, "doxygen": [1, 20, 21], "document": [1, 6, 18, 19], "add": [1, 15, 18, 20, 21, 22], "dfbgemm_build_doc": 1, "ON": [1, 19], "duse_sanit": 1, "address": [1, 11], "dfbgemm_library_typ": 1, "share": [1, 6], "dpython_execut": 1, "which": [1, 9, 11, 12, 14, 16, 21], "python3": [1, 12], "j": [1, 14], "verbos": 1, "likewis": 1, "veri": [1, 20, 21, 22], "target": [1, 6, 8, 9, 11, 14, 20, 21, 22, 23], "architectur": [1, 11, 12], "bc": [1, 11], "x64": 1, "program": [1, 18], "file": [1, 11, 12, 17, 18, 20, 21, 22, 23], "x86": [1, 24], "enterpris": 1, "vc": 1, "auxiliari": 1, "vcvarsal": 1, "bat": 1, "build_dir": 1, "dfbgemm_build_benchmark": 1, "dcmake_build_typ": 1, "releas": [1, 12], "dcmake_c_compil": 1, "cl": 1, "ex": 1, "dcmake_cxx_compil": 1, "v": [1, 4, 13, 15], "bounds_check_indices_cuda": 2, "rows_per_t": 2, "indic": [2, 10, 14, 16], "offset": [2, 4, 9, 10, 15, 16], "bounds_check_mod": [2, 16], "warn": [2, 16, 20], "c10": [2, 4, 6, 8, 10], "option": [2, 4, 6, 10, 11, 15, 16], "b_ofset": 2, "max_b": 2, "int_nbit_split_embedding_codegen_lookup_funct": 2, "dev_weight": [2, 10], "uvm_weight": [2, 10], "weights_plac": [2, 10], "weights_offset": [2, 10], "weights_ti": [2, 10], "d_offset": [2, 8, 10], "total_d": [2, 10, 16], "max_int2_d": 2, "max_int4_d": 2, "max_int8_d": 2, "max_float16_d": 2, "max_float32_d": 2, "pooling_mod": [2, 16], "indice_weight": 2, "output_dtyp": [2, 8, 16], "lxu_cache_weight": [2, 10], "lxu_cache_loc": [2, 10], "row_align": [2, 10], "max_float8_d": 2, "fp8_exponent_bit": 2, "fp8_exponent_bia": 2, "int_nbit_split_embedding_uvm_caching_codegen_lookup_funct": 2, "cache_hash_size_cumsum": [2, 10], "total_cache_hash_s": [2, 10], "cache_index_table_map": [2, 10], "lxu_cache_st": [2, 10], "lxu_stat": 2, "simlar": 2, "uvm_cach": 2, "lookup": [2, 10], "pruned_hashmap_lookup_cuda": 2, "hash_tabl": 2, "hash_table_offset": 2, "pruned_array_lookup_cuda": 2, "index_remap": 2, "index_remappings_offset": 2, "int_nbit_split_embedding_codegen_lookup_function_cpu": 2, "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu": 2, "pruned_hashmap_insert_unweighted_cpu": 2, "dense_indic": 2, "pruned_hashmap_lookup_unweighted_cpu": 2, "pruned_array_lookup_cpu": 2, "tupl": [3, 4, 9, 10, 16], "tbe_input_combine_cpu": 3, "indices_list": 3, "offsets_list": 3, "per_sample_weight": [3, 16], "include_last_offset": 3, "padding_fused_tbe_input_combine_cpu": 3, "batch_siz": 3, "solv": 4, "differ": [4, 9, 14], "length": [4, 9, 15, 16, 22], "often": 4, "occur": [4, 20], "spars": [4, 14, 24], "featur": [4, 9, 14, 16, 17], "well": [4, 9, 11, 20], "natur": [4, 14], "languag": [4, 14, 23], "batch": [4, 9, 14, 15, 24], "jagged_to_padded_dense_forward": 4, "symintarrayref": 4, "max_length": [4, 15], "doubl": [4, 8, 9], "padding_valu": [4, 15], "jagged_dense_elementwise_add_jagged_output_cuda": 4, "x_valu": [4, 15], "x_offset": [4, 15, 22], "where": [4, 6, 9, 14, 15, 16], "dens": [4, 15, 22], "jagged_to_padded_dens": [4, 15], "jagged_dense_elementwise_add": [4, 15], "jagged_dense_elementwise_mul": [4, 15], "batched_dense_vec_jagged_2d_mul": [4, 15], "a_valu": [4, 15], "a_offset": [4, 15], "dense_to_jag": [4, 15], "symint": 4, "total_l": [4, 15], "jagged_dense_elementwise_add_jagged_output": [4, 15], "jagged_1d_to_dens": [4, 15], "max_l": 4, "jagged_2d_to_dens": [4, 11, 12, 15, 21, 22], "max_sequence_length": [4, 15, 22], "recat_embedding_grad_output_cuda": 5, "grad_output": 5, "num_features_per_rank": 5, "recat_embedding_grad_output_mixed_d_cuda": 5, "dim_sum_per_rank": 5, "recat_embedding_grad_output_mixed_d_batch_cuda": 5, "cumsum_dim_sum_per_rank": 5, "recat_embedding_grad_output_mixed_d_cpu": 5, "new_managed_tensor": 6, "self": 6, "alloc": [6, 20], "an": [6, 9, 12, 14, 16, 20, 21, 22, 23], "unifi": 6, "manag": [6, 11, 12, 16], "uvm": [6, 13], "Then": 6, "set": [6, 10, 13, 14, 15, 16], "prefer": [6, 12], "storag": [6, 8, 10], "locat": [6, 10, 11, 14], "host": 6, "establish": 6, "map": [6, 9, 10, 14, 16], "devic": [6, 7, 11, 13, 16], "return": [6, 8, 9, 15, 16, 20, 21, 22], "A": [6, 8, 12, 14, 15, 16, 19, 20, 21, 22], "new": [6, 8, 10, 20, 21, 22], "new_managed_tensor_meta": 6, "placehold": 6, "meta": [6, 19], "kei": 6, "empti": [6, 14, 15, 23], "new_host_mapped_tensor": 6, "new_unified_tensor": 6, "is_host_map": 6, "either": [6, 8, 9, 11, 12], "whether": [6, 11, 19], "depend": [6, 8, 11, 12, 14], "new_vanilla_managed_tensor": 6, "allow": 6, "automat": [6, 9, 13, 21], "uvm_storag": 6, "check": [6, 16], "gpu": [6, 11, 12, 13, 24], "true": [6, 16], "is_uvm_tensor": 6, "BUT": [6, 19], "non": [6, 16], "uvm_to_cpu": 6, "effect": [6, 14], "move": 6, "uvm_to_devic": 6, "prototyp": 6, "same": [6, 9, 11, 14, 15, 20, 21, 22], "whose": 6, "uvm_cuda_mem_advis": 6, "cuda_memory_advis": 6, "cudamemadvis": 6, "cudamemoryadvis": 6, "enum": [6, 8], "python": [6, 11, 13, 20, 21, 23], "side": [6, 20, 22, 24], "namespac": 6, "over": [6, 11], "valid": 6, "here": [6, 11, 18, 20, 21, 22, 23], "more": [6, 11, 16, 20, 22, 23], "inform": [6, 14, 22, 23], "uvm_cuda_mem_prefetch_async": 6, "device_t": 6, "cudamemprefetchasync": 6, "prefetch": 6, "destin": 6, "uvm_mem_advice_dont_fork": 6, "madvis": 6, "madv_dontfork": 6, "workaround": 6, "driver": [6, 11], "un": 6, "page": [6, 18, 23, 24], "tabl": [6, 9, 14, 24], "fork": [6, 18], "caus": [6, 11, 12, 19, 21], "slowdown": 6, "next": [6, 14, 20, 22], "access": [6, 16], "uvm_to_cpu_clon": 6, "copi": 6, "contigu": [6, 9], "singl": [6, 8], "thread": 6, "memcpi": 6, "contain": [6, 11, 14, 15, 16, 22], "section": [7, 11, 22], "includ": [7, 11, 19, 20, 22], "cuda": [7, 16, 24], "variou": 7, "all_to_one_devic": 7, "inputtensor": 7, "target_devic": 7, "permute_pooled_embs_split_gpu": 7, "pooled_emb": 7, "offset_dim_list": 7, "permute_list": 7, "inv_offset_dim_list": 7, "inv_permute_list": 7, "permute_pooled_embs_auto_grad_split_gpu": 7, "permute_pooled_embs_auto_grad_gpu": 7, "permute_pooled_embs_cpu_impl": 7, "allow_dupl": 7, "permute_pooled_embs_split_cpu": 7, "permute_pooled_embs_auto_grad_split_cpu": 7, "permute_pooled_embs_auto_grad": 7, "permute_pooled_embs_auto_grad_cpu": 7, "model": [8, 9], "techniqu": 8, "reduc": 8, "larg": 8, "order": [8, 14, 18], "achiev": [8, 12], "better": [8, 20], "small": 8, "loss": [8, 19], "accuraci": 8, "_float_to_bfloat16_gpu": 8, "brain": 8, "bfloat16": 8, "_bfloat16_to_float_gpu": 8, "_float_to_fp8rowwise_gpu": 8, "forward": 8, "fp8": 8, "dtype": [8, 16], "sparsetyp": [8, 16], "bf16": 8, "throw": [8, 20], "error": [8, 12, 20, 21, 22], "_fp8rowwise_to_float_gpu": 8, "represent": [8, 14], "_float_to_fused8bitrowwise_gpu": 8, "_half_to_fused8bitrowwise_gpu": 8, "half": 8, "_single_or_half_precision_to_fused8bitrowwise_gpu": 8, "_fused8bitrowwise_to_float_gpu": 8, "_fused8bitrowwise_to_half_gpu": 8, "_fused8bitrowwise_to_single_or_half_precision_gpu": 8, "scale_bias_last": 8, "quant_padding_float_typ": 8, "_fused8bitrowwise_to_float_mixed_dim_gpu": 8, "kfloat": 8, "khalf": 8, "_float_to_fusednbitrowwise_gpu": 8, "_half_to_fusednbitrowwise_gpu": 8, "_single_or_half_precision_to_fusednbitrowwise_gpu": 8, "_fusednbitrowwise_to_float_gpu": 8, "_fusednbitrowwise_to_half_gpu": 8, "_fusednbitrowwise_to_single_or_half_precision_gpu": 8, "_float_to_hfp8_gpu": 8, "ebit": 8, "exponent_bia": 8, "max_po": 8, "hybrid": 8, "hfp8": 8, "_hfp8_to_float_gpu": 8, "_float_to_msfp_gpu": 8, "bounding_box_s": 8, "mbit": 8, "min_po": 8, "msfp": 8, "_msfp_to_float_gpu": 8, "_float_to_paddedfp8rowwise_gpu": 8, "row_dim": 8, "pad": [8, 14, 15, 22], "_paddedfp8rowwise_to_float_gpu": 8, "output_last_dim": 8, "_fused8bitrowwise_to_float_cpu_out": 8, "_float_to_fused8bitrowwise_cpu_out": 8, "float_to_fused8bitrowwise_cpu": 8, "half_to_fused8bitrowwise_cpu": 8, "float_or_half_to_fused8bitrowwise_cpu": 8, "fused8bitrowwise_to_float_cpu": 8, "fused8bitrowwise_to_half_cpu": 8, "fused8bitrowwise_to_float_or_half_cpu": 8, "float_to_fp8rowwise_cpu": 8, "fp8rowwise_to_float_cpu": 8, "fusednbitrowwise_to_float_cpu": 8, "fusednbitrowwise_to_half_cpu": 8, "fusednbitrowwise_to_float_or_half_cpu": 8, "floattofp8quantized_ref": 8, "nrow": 8, "ncol": 8, "fp8quantizedtofloat_ref": 8, "expand_into_jagged_permute_cuda": 9, "permut": 9, "input_offset": 9, "output_offset": 9, "output_s": 9, "expand_into_jagged_permut": 9, "expand": 9, "index": [9, 10, 11, 12, 14, 20, 22], "case": [9, 11, 12, 14, 18], "ha": [9, 12, 14, 18, 20, 21], "across": [9, 11], "rank": [9, 14], "level": 9, "exclus": 9, "op": [9, 12, 15, 22], "bag": [9, 16, 24], "posit": [9, 16], "sit": 9, "after": [9, 11, 12, 13, 14, 16, 21, 22, 23], "we": [9, 14, 18], "deriv": [9, 14, 19], "arrai": [9, 15, 22], "comput": [9, 11, 12, 16], "formula": 9, "output_permut": 9, "table_offset": 9, "bag_offset": 9, "histogram_binning_calibration_cpu": 9, "logit": 9, "bin_num_exampl": 9, "bin_num_posit": 9, "positive_weight": 9, "lower_bound": 9, "upper_bound": 9, "bin_ctr_in_use_aft": 9, "bin_ctr_weight_valu": 9, "divid": [9, 14], "predict": 9, "rang": [9, 14], "e": [9, 11, 14, 20, 22, 23], "b": [9, 11, 14, 15, 16, 20, 21, 22, 23], "bin": [9, 11], "two": [9, 14, 15, 16, 21], "exampl": [9, 11, 12, 13, 15, 16, 20, 21, 22, 23], "fall": [9, 11, 12], "bucket": [9, 11], "basic": [9, 22], "histogram": 9, "As": [9, 11, 12, 14], "result": [9, 15], "statist": 9, "real": 9, "ctr": 9, "num_po": 9, "num_exampl": 9, "final": 9, "calibr": 9, "pre": [9, 11, 12], "cali": 9, "wai": [9, 19], "within": 9, "suffici": [9, 18, 21], "That": 9, "fine": 9, "grain": 9, "modul": [9, 12, 16, 22], "theoret": 9, "layer": 9, "fix": 9, "uncalibr": 9, "befor": [9, 16, 23], "appli": [9, 11, 14, 16], "sigmoid": 9, "calibart": 9, "pass": [9, 16, 18, 21], "argument": [9, 20, 21, 22], "lower": 9, "bound": [9, 14], "calibration_target": 9, "observ": 9, "sum": [9, 15, 16], "statisct": 9, "final_calibrated_predict": 9, "bin_ctr_weight": 9, "bin_ctr": 9, "calibrated_predict": 9, "bin_id": 9, "generic_histogram_binning_calibration_by_feature_cpu": 9, "segment_valu": 9, "segment_length": 9, "num_seg": 9, "bin_boundari": 9, "extens": [9, 20, 21], "ectr": 9, "abov": [9, 12, 14, 19, 20, 22, 23], "accept": [9, 18], "keyjaggedtensor": 9, "num_bin": 9, "longer": [9, 17, 20], "still": [9, 11], "parambin_ctr_weight_valu": 9, "get_unique_indices_cuda": 10, "linear_indic": 10, "max_indic": 10, "compute_count": 10, "dedupl": 10, "pair": [10, 23], "lru_cache_find_uncached_cuda": 10, "unique_indic": 10, "unique_indices_length": 10, "time_stamp": 10, "lru_stat": 10, "gather_cache_stat": 10, "uvm_cache_stat": 10, "lock_cache_lin": 10, "lxu_cache_locking_count": 10, "lru": [10, 16], "cach": [10, 11, 16], "uncach": 10, "them": 10, "host_lxu_cache_slot": 10, "h_in": 10, "cache_set": [10, 16], "linearize_cache_indices_cuda": 10, "linear": 10, "uniqu": [10, 23], "linearize_cache_indices_from_row_idx_cuda": 10, "update_table_indic": 10, "update_row_indic": 10, "format": [10, 21, 22], "inplac": 10, "updat": [10, 11, 12, 16, 18], "lru_cache_populate_cuda": 10, "hash_size_cumsum": 10, "linear_cache_indic": 10, "stochastic_round": [10, 16], "fetch": 10, "insert": [10, 23], "timestep": 10, "lru_cache_populate_byte_cuda": 10, "byte": 10, "element": [10, 14], "direct_mapped_lru_cache_populate_byte_cuda": 10, "lxu_cache_miss_timestamp": 10, "assoc": 10, "variant": [10, 11, 12, 21], "lfu_cache_populate_cuda": 10, "lfu_stat": 10, "lfu": [10, 16], "lfu_cache_populate_byte_cuda": 10, "lxu_cache_lookup_cuda": 10, "invalid_index": 10, "num_uniq_cache_indic": 10, "lxu_cache_locations_output": 10, "look": [10, 16], "up": [10, 16], "slot": 10, "sentinel": 10, "direct_mapped_lxu_cache_lookup_cuda": 10, "lxu_cache_flush_cuda": 10, "flush": 10, "reset_weight_momentum_cuda": 10, "momentum1_dev": 10, "momentum1_uvm": 10, "momentum1_plac": 10, "momentum1_offset": 10, "pruned_indic": 10, "pruned_indices_offset": 10, "logical_table_id": 10, "buffer_id": 10, "lxu_cache_locking_counter_decrement_cuda": 10, "decrement": 10, "counter": 10, "lxu_cache_locations_update_cuda": 10, "lxu_cache_locations_new": 10, "fbgemm": [11, 12, 15, 17, 18, 19, 21, 22], "reproduc": [11, 12, 18, 19], "export": [11, 13], "platform_nam": 11, "unam": 11, "prefix": [11, 23], "miniconda_prefix": 11, "home": 11, "download": [11, 12], "wget": 11, "q": 11, "anaconda": 11, "miniconda3": 11, "latest": 11, "sh": 11, "o": [11, 12], "p": 11, "load": [11, 14, 22], "shortcut": 11, "bashrc": 11, "command": [11, 12, 20, 21], "against": [11, 13], "env": [11, 12], "name": [11, 12, 19, 20, 22], "python_vers": 11, "3": [11, 14, 15, 16, 19, 22], "12": [11, 14, 16], "upgrad": 11, "pyopenssl": 11, "22": [11, 14], "requir": [11, 12, 14, 16, 21, 22], "recent": [11, 12], "nvcc": 11, "capabl": [11, 13], "5": [11, 14, 16], "done": [11, 12], "bare": 11, "metal": 11, "neither": [11, 19], "nor": [11, 19], "nvidia": 11, "present": [11, 22], "sinc": [11, 14], "setup": [11, 12], "pull": [11, 12, 21], "linux": [11, 12], "distribut": [11, 19], "11": [11, 12, 14], "entrypoint": 11, "devel": 11, "ubuntu22": 11, "rest": [11, 12], "mai": [11, 12, 14, 19], "construct": [11, 12, 14], "mechan": 11, "full": [11, 12, 23], "nvml": 11, "org": [11, 12, 22], "cuda_vers": 11, "label": 11, "verifi": [11, 12, 20, 22], "cuda_runtim": 11, "h": [11, 15, 20], "libnvidia": [11, 12], "ml": [11, 12], "conda_prefix": 11, "printenv": 11, "extract": 11, "given": [11, 14, 15], "url": [11, 12], "builder": 11, "blob": 11, "main": [11, 18], "common": [11, 12, 14, 22], "install_cuda": 11, "cudnn_url": 11, "redist": 11, "x86_64": 11, "26_cuda12": 11, "archiv": 11, "tar": 11, "xz": 11, "unpack": 11, "amd": [11, 12], "minim": 11, "termin": 11, "both": [11, 17, 19, 21], "minimum": [11, 20, 21, 22], "oper": [11, 12, 24], "guid": [11, 22], "disabl": 11, "apt": 11, "prompt": 11, "debian_frontend": 11, "noninteract": 11, "db": 11, "radeon": 11, "amdgpu": 11, "focal": 11, "install_5": 11, "50601": 11, "1_all": 11, "deb": 11, "usecas": 11, "hiplibsdk": 11, "dkm": 11, "hipifi": 11, "clang": 11, "hip": 11, "oppos": 11, "reli": 11, "fbgemm_cpu": 11, "librari": [11, 21, 24], "cmake": 11, "click": 11, "hypothesi": [11, 12], "jinja2": 11, "numpi": [11, 12], "scikit": [11, 12], "offici": 11, "homepag": 11, "authorit": [11, 12, 21], "how": [11, 12, 13, 22], "nightli": [11, 12], "rc": 11, "without": [11, 19], "alwai": 11, "reliabl": 11, "known": [11, 16], "arriv": 11, "hour": 11, "later": 11, "than": [11, 14], "window": 11, "silent": 11, "place": [11, 16], "artifact": 11, "select": 11, "dure": [11, 14, 16, 22], "thu": [11, 16], "import": [11, 12, 16, 22, 23], "first": [11, 20, 22, 23], "prior": [11, 12, 19], "much": [11, 20], "determinist": 11, "torch": [11, 12, 15, 16, 21, 22], "whl": [11, 12], "cu121": [11, 12], "rocm5": [11, 12], "write": [11, 12, 21, 22], "ensur": [11, 12, 18], "properli": 11, "__version__": 11, "cuda_cmake_macro": 11, "txt": [11, 21, 23], "tag": [11, 20, 23], "fbgemm_vers": 11, "v0": 11, "fbgemm_": 11, "addit": [11, 14, 15], "flow": 11, "keep": 11, "state": 11, "becom": 11, "stale": 11, "problem": 11, "re": [11, 12], "attempt": 11, "failur": [11, 12], "due": 11, "clear": [11, 18], "py": [11, 12, 13, 21, 22], "clean": [11, 21], "must": [11, 12, 13, 14, 16, 19, 23], "package_nam": 11, "fbgemm_gpu_": 11, "It": [11, 12, 14], "convent": 11, "major": 11, "minor": 11, "py312": 11, "python_tag": 11, "determin": [11, 14], "processor": 11, "arch": 11, "python_plat_nam": 11, "manylinux2014_": 11, "maco": 11, "macosx_10_9_": 11, "arm64": 11, "macosx_11_0_": 11, "win_": 11, "cpu_onli": 11, "flag": [11, 21], "bdist_wheel": 11, "package_vari": 11, "plat": 11, "made": [11, 21], "presenc": 11, "unabl": 11, "cudacxx": 11, "cuda_bin_path": 11, "cub": 11, "applic": [11, 16, 20, 22], "cub_dir": 11, "header": [11, 20, 23], "cudnn_include_dir": 11, "cudnn_librari": 11, "lib": [11, 12], "nvml_lib_path": 11, "sm70": [11, 12], "80": 11, "v100": [11, 12], "a100": [11, 12], "current": [11, 12, 14, 16], "cuda_arch_list": 11, "7": [11, 12, 14, 15, 16], "unset": 11, "torch_cuda_arch_list": 11, "exist": [11, 20, 22], "preced": 11, "dtorch_cuda_arch_list": 11, "invoc": [11, 21], "rocm_path": 11, "pytorch_rocm_arch": 11, "gfx906": 11, "gfx908": 11, "gfx90a": 11, "wiki": 11, "gentoo": 11, "list": [11, 14, 15, 16, 19, 20, 22], "rocminfo": 11, "gfx": 11, "dhip_root_dir": 11, "dcmake_c_flag": 11, "dtorch_use_hip_dsa": 11, "dcmake_cxx_flag": 11, "complet": [11, 18, 21], "actual": 11, "correct": 11, "lot": 11, "jinja": 11, "instanti": 11, "sure": [11, 18, 20, 22], "accident": 11, "cours": 11, "fbgemm_gpu_lib_path": 11, "fbgemm_gpu_pi": [11, 12], "defin": [11, 14, 20], "nm": 11, "gdcu": 11, "referenc": 11, "certain": 11, "gdc": 11, "merge_pooled_embed": [11, 12], "isol": [12, 21], "build": [12, 13, 20, 22, 24], "work": [12, 14, 18], "sm80": 12, "respect": 12, "other": [12, 14, 19, 20, 21, 22], "scratch": 12, "guarante": 12, "especi": 12, "displai": [12, 23], "do": [12, 18], "smi": 12, "515": 12, "76": 12, "persist": 12, "bu": [12, 23], "id": 12, "disp": 12, "volatil": 12, "uncorr": 12, "ecc": 12, "fan": 12, "temp": 12, "perf": 12, "pwr": 12, "usag": [12, 21, 22], "cap": 12, "memori": [12, 16, 24], "util": [12, 24], "mig": 12, "a10g": 12, "00000000": 12, "00": 12, "1e": 12, "31c": 12, "p0": 12, "59w": 12, "300w": 12, "0mib": 12, "23028mib": 12, "gi": 12, "ci": 12, "pid": 12, "No": 12, "though": 12, "expos": 12, "detail": 12, "onc": [12, 18], "imag": 12, "launch": 12, "alreadi": [12, 18, 20, 22], "toolkit": 12, "interfac": 12, "concis": 12, "info": [12, 20, 22], "dieedg": 12, "avgpwr": 12, "sclk": 12, "mclk": 12, "pwrcap": 12, "vram": 12, "33": 12, "0c": 12, "37": 12, "0w": 12, "300mhz": 12, "1200mhz": 12, "auto": [12, 21], "290": 12, "32": 12, "39": 12, "log": 12, "difficult": 12, "relev": [12, 20], "correctli": [12, 20, 21], "link": [12, 21], "encount": 12, "signatur": [12, 21], "traceback": 12, "last": 12, "root": [12, 18], "miniconda": 12, "mycondaenv": 12, "site": 12, "_op": [12, 21], "line": [12, 22, 23], "565": 12, "__getattr__": 12, "overload_nam": 12, "_c": 12, "_jit_get_oper": 12, "qualified_op_nam": 12, "runtimeerror": 12, "except": [12, 20, 22], "wa": 12, "string": [12, 23], "post47": 12, "py3": 12, "aarch64": 12, "egg": 12, "__init__": [12, 22], "21": 12, "_fbgemm_gpu_doc": 12, "noqa": 12, "f401": 12, "e402": 12, "18": 12, "569": 12, "rais": [12, 22], "attributeerror": [12, 22], "_opnamespac": 12, "object": [12, 14], "attribut": [12, 22], "cli": 12, "main_run": 12, "execut": [12, 13], "47": 12, "fail": [12, 13, 20], "_zn6fbgemm48floatorhalftofusednbitrowwisequantizedsbhalfavx2itli2eeevpkt_miph": 12, "appear": 12, "reason": [12, 21], "libtorch": 12, "visibl": 12, "ld_library_path": 12, "incorrectli": [12, 21], "declar": [12, 20], "were": [12, 15], "pr": [12, 20, 21, 22], "1618": 12, "former": 12, "resolv": 12, "manual": [12, 20], "latter": 12, "seriou": 12, "tha": 12, "develop": [12, 21], "bench": 13, "good": [13, 19], "instal": [13, 21, 24], "pytest": 13, "rsx": 13, "w": 13, "ignor": [13, 16, 21], "pytestcollectionwarn": 13, "split_table_batched_embeddings_test": 13, "quantize_ops_test": 13, "sparse_ops_test": 13, "split_embedding_inference_converter_test": 13, "mode": [13, 16], "cuda_visible_devic": 13, "environ": 13, "enabl": 13, "debug": 13, "cuda_launch_block": 13, "fbgemm_test_with_rocm": 13, "hip_launch_block": 13, "split_table_batched_embeddings_benchmark": 13, "purpos": [14, 15, 16, 19], "handl": 14, "consecut": 14, "similar": [14, 16], "nestedtensor": 14, "raggedtensor": 14, "tensorflow": 14, "notabl": 14, "token": 14, "sentenc": 14, "repres": 14, "maxlength": 14, "2d": [14, 15, 16, 22], "numel": 14, "greatest": 14, "divisor": 14, "smallest": 14, "sub": 14, "exclud": 14, "partit": 14, "impli": [14, 19], "denot": [14, 20, 22], "offest": 14, "outer": 14, "would": 14, "begin": 14, "maximum": [14, 15, 22], "between": [14, 20, 21, 23], "normal": 14, "densor": 14, "form": [14, 19], "figur": 14, "below": 14, "show": [14, 21], "mean": [14, 16], "accomod": 14, "logic": [14, 20], "At": [14, 20, 21, 22], "stage": 14, "wise": [14, 16], "multipl": [14, 15, 16, 22, 24], "hadamard": 14, "product": [14, 19], "involv": 14, "bmatrix": 14, "rightarrow": 14, "16": 14, "25": 14, "36": 14, "49": 14, "81": 14, "50": 14, "operand": 14, "word": 14, "ax": 14, "properti": 14, "hold": 14, "elementwis": [14, 15], "equival": 14, "d": [14, 15, 23], "start": [14, 15, 22, 23], "dim": 14, "onto": 14, "part": 14, "everi": 14, "those": [14, 15, 18, 22], "converson": 14, "possibl": [14, 18, 19], "could": 14, "lead": 14, "read": [14, 16], "relat": 14, "smaller": 14, "expect": 14, "happen": 14, "give": 14, "situat": 14, "like": 14, "dense_tensor": 14, "jagged_tensor": 14, "break": 14, "exact": 14, "usual": 14, "1d": [15, 16, 22], "area": 15, "outsid": 15, "coverag": 15, "total": [15, 16], "identit": 15, "structur": 15, "jagged_dense_dense_elementwise_add_jagged_output": 15, "y_0": 15, "y_1": 15, "multipli": [15, 16], "max_n": 15, "matmul": 15, "stacked_jagged_1d_to_dens": 15, "arg": [15, 22], "kwarg": 15, "stacked_jagged_2d_to_dens": 15, "split_table_batched_embeddings_op": 16, "splittablebatchedembeddingbagscodegen": 16, "embedding_spec": 16, "feature_table_map": 16, "none": 16, "cache_algorithm": 16, "cachealgorithm": 16, "cache_load_factor": 16, "cache_reserved_memori": 16, "cache_precis": 16, "weights_precis": 16, "enforce_hbm": 16, "optimtyp": 16, "exact_sgd": 16, "record_cache_metr": 16, "gradient_clip": 16, "max_gradi": 16, "learning_r": 16, "01": 16, "ep": 16, "0e": 16, "momentum": 16, "weight_decai": 16, "weight_decay_mod": 16, "weightdecaymod": 16, "eta": 16, "001": 16, "beta1": 16, "beta2": 16, "999": 16, "poolingmod": 16, "boundscheckmod": 16, "sourc": [16, 18, 19, 20, 21, 22], "backward": 16, "embeddingloc": 16, "computedevic": 16, "spec": 16, "placement": 16, "lxu": 16, "algorithm": 16, "capac": 16, "amount": 16, "reserv": [16, 19], "hbm": 16, "adam": 16, "exact_adagrad": 16, "exact_rowwise_adagrad": 16, "exact_rowwise_weighted_adagrad": 16, "lamb": 16, "lars_sgd": 16, "partial_rowwise_adam": 16, "partial_rowwise_lamb": 16, "sgd": 16, "recordcachemetr": 16, "record": 16, "hit": 16, "request": [16, 17, 21], "record_cache_miss_count": 16, "metric": 16, "record_tablewise_cache_miss": 16, "stochast": 16, "round": 16, "gradient": 16, "clip": 16, "learn": 16, "rate": 16, "epsilon": 16, "adagrad": 16, "lar": 16, "decai": 16, "l2": 16, "decoupl": 16, "pool": [16, 24], "boundari": 16, "fatal": 16, "conatin": 16, "column": 16, "feature_requires_grad": 16, "split_table_batched_embeddings_ops_common": 16, "split_table_batched_embeddings_ops_train": 16, "init_embedding_weights_uniform": 16, "split_embedding_weight": 16, "9426": 16, "7046": 16, "4214": 16, "0419": 16, "1331": 16, "7856": 16, "8124": 16, "2021": 16, "5771": 16, "5911": 16, "7792": 16, "1068": 16, "6203": 16, "4813": 16, "1677": 16, "4790": 16, "5587": 16, "0941": 16, "5754": 16, "3475": 16, "8952": 16, "1964": 16, "0810": 16, "4174": 16, "2513": 16, "4039": 16, "3775": 16, "3273": 16, "5399": 16, "0229": 16, "1455": 16, "8770": 16, "9520": 16, "4593": 16, "7169": 16, "6307": 16, "1765": 16, "8757": 16, "8614": 16, "2051": 16, "0603": 16, "9980": 16, "7958": 16, "5826": 16, "long": 16, "13": 16, "5197": 16, "2957": 16, "3578": 16, "1487": 16, "4873": 16, "3044": 16, "9801": 16, "2769": 16, "7164": 16, "8528": 16, "7159": 16, "6719": 16, "0784": 16, "2016": 16, "2176": 16, "1988": 16, "3825": 16, "5008": 16, "8991": 16, "1405": 16, "2637": 16, "9427": 16, "8902": 16, "3754": 16, "5013": 16, "6105": 16, "9968": 16, "3057": 16, "7621": 16, "9821": 16, "7314": 16, "6195": 16, "grad_fn": 16, "cppnode": 16, "splitlookupfunction_sgd_op": 16, "question": 17, "concern": 17, "discuss": 17, "kick": 17, "regard": 17, "feel": 17, "free": 17, "reach": 17, "easi": 18, "transpar": 18, "describ": 18, "activ": 18, "welcom": [18, 24], "your": [18, 21, 22], "repositori": 18, "branch": 18, "ve": 18, "ad": [18, 21], "chang": [18, 20, 22], "api": [18, 20, 21, 22], "suit": 18, "lint": 18, "haven": 18, "submit": [18, 20, 22], "facebook": [18, 19, 24], "open": 18, "track": 18, "public": [18, 21], "bug": 18, "pleas": [18, 20, 22], "descript": [18, 20, 21, 22, 23], "instruct": [18, 20, 21, 22, 24], "abl": 18, "bounti": 18, "safe": 18, "disclosur": 18, "secur": 18, "go": 18, "outlin": 18, "By": 18, "agre": 18, "tree": 18, "claus": 19, "bsd": 19, "softwar": 19, "copyright": 19, "inc": 19, "affili": 19, "right": [19, 23], "redistribut": 19, "modif": 19, "permit": 19, "condit": 19, "met": 19, "retain": 19, "notic": 19, "disclaim": 19, "materi": 19, "contributor": 19, "endors": 19, "promot": 19, "written": 19, "permiss": 19, "BY": 19, "THE": 19, "holder": 19, "AND": 19, "AS": 19, "express": [19, 23], "OR": 19, "warranti": 19, "NOT": 19, "limit": [19, 21], "TO": 19, "OF": 19, "merchant": 19, "FOR": 19, "particular": 19, "IN": 19, "NO": 19, "event": 19, "shall": 19, "BE": 19, "liabl": 19, "indirect": 19, "incident": 19, "special": 19, "exemplari": 19, "consequenti": 19, "damag": 19, "procur": 19, "substitut": 19, "servic": 19, "profit": 19, "busi": 19, "interrupt": 19, "theori": 19, "liabil": 19, "contract": 19, "strict": 19, "tort": 19, "neglig": 19, "aris": 19, "even": 19, "IF": 19, "advis": 19, "SUCH": 19, "javadoc": 20, "style": [20, 22], "comment": [20, 21, 23], "sphinx": [20, 21, 22], "breath": 20, "kept": 20, "cpp": [20, 22, 23], "cu": 20, "cuh": 20, "everyth": 20, "ifndef": 20, "doxygen_this_will_be_skip": 20, "endif": 20, "hidden": 20, "html": [20, 21, 22], "descriptionss": 20, "configur": 20, "publish": [20, 22], "docstr": [20, 21, 22], "method": [20, 21, 22], "organ": 20, "yet": 20, "top": [20, 24], "defgroup": 20, "directli": [20, 22], "behavior": [20, 22], "tparam": 20, "param": [20, 22], "thrown": [20, 22], "ingroup": 20, "brief": 20, "short": 20, "example_method": [20, 22], "def": [20, 22], "foo": [20, 22], "lst": [20, 22], "And": [20, 22], "verbatim": [20, 22], "text": [20, 22, 23], "diagram": [20, 22], "unpars": 20, "second": [20, 22], "prev": [20, 22], "usabl": [20, 22], "space": [20, 21, 22], "endcod": 20, "align": [20, 22], "param1": [20, 22], "param2": 20, "bad_alloc": 20, "logic_error": 20, "href": 20, "www": [20, 22], "nl": 20, "cmdlink": 20, "On": [20, 22], "doxygengroup": 20, "rst": [20, 22, 23], "content": [20, 23, 24], "toctre": [20, 22], "ini": 20, "been": 20, "taken": 20, "care": 20, "append": [20, 22], "doc": [20, 21, 22, 23], "local": [20, 22], "netlifi": [20, 21, 22], "preview": [20, 22], "serv": 21, "accompani": 21, "put": 21, "yourself": 21, "shoe": 21, "who": 21, "understand": 21, "live": 21, "easier": 21, "leav": 21, "separ": 21, "task": 21, "instead": 21, "pointer": 21, "tool": 21, "pip": 21, "graphviz": [21, 23], "assembl": 21, "view": 21, "prepend": 21, "sphinx_lint": 21, "technic": 21, "why": 21, "invok": 21, "occasion": 21, "unresolv": 21, "might": 21, "opt": 21, "pycapsul": 21, "class": [21, 22], "neg": 21, "silenc": 21, "being": 21, "nitpick": 21, "conf": 21, "domain": 21, "deploi": 21, "app": 21, "googl": 22, "c_size_t": 22, "about": 22, "ret": 22, "emplace_back": 22, "item": 22, "valueerror": 22, "14": 22, "restructuredtext": 22, "en": 22, "master": 22, "__": 22, "pep": 22, "0287": 22, "42": 22, "autofunct": 22, "c_ulong": 22, "mani": 22, "attach": 22, "fact": 22, "helper": 22, "codebas": 22, "add_doc": 22, "jag": [22, 24], "forc": 22, "hoc": 22, "the_new_doc_modul": 22, "remain": 22, "render": [22, 23], "anchor": 23, "_doc": 23, "underscor": 23, "_": 23, "There": 23, "elsewher": 23, "ref": 23, "anoth": 23, "literalinclud": 23, "rel": 23, "enclos": 23, "bracket": 23, "skiplin": 23, "suppli": 23, "math": 23, "inlin": 23, "k_": 23, "k_n": 23, "expressino": 23, "int_a": 23, "frac": 23, "2v": 23, "dx": 23, "left": 23, "dv": 23, "_a": 23, "du": 23, "digraph": 23, "altern": 23, "extern": 23, "dot": 23, "examplegraph": 23, "low": 24, "precis": 24, "high": 24, "convolut": 24, "server": 24, "infer": 24, "backend": 24, "caffe2": 24, "quantiz": 24, "collect": 24, "transform": 24, "contribut": 24, "contact": 24, "licens": 24, "combin": 24, "tbe": 24}, "objects": {"": [[8, 0, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref"], [8, 1, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref::ebits"], [8, 1, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref::exponent_bias"], [8, 1, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref::input"], [8, 1, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref::ncols"], [8, 1, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref::nrows"], [8, 1, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref::output"], [8, 0, 1, "_CPPv423FP8rowwise_to_float_cpuRK6TensorbK7int64_t", "FP8rowwise_to_float_cpu"], [8, 1, 1, "_CPPv423FP8rowwise_to_float_cpuRK6TensorbK7int64_t", "FP8rowwise_to_float_cpu::forward"], [8, 1, 1, "_CPPv423FP8rowwise_to_float_cpuRK6TensorbK7int64_t", "FP8rowwise_to_float_cpu::input"], [8, 1, 1, "_CPPv423FP8rowwise_to_float_cpuRK6TensorbK7int64_t", "FP8rowwise_to_float_cpu::output_dtype"], [0, 0, 1, "_CPPv410FindMinMaxPKfPfPf7int64_t", "FindMinMax"], [0, 1, 1, "_CPPv410FindMinMaxPKfPfPf7int64_t", "FindMinMax::len"], [0, 1, 1, "_CPPv410FindMinMaxPKfPfPf7int64_t", "FindMinMax::m"], [0, 1, 1, "_CPPv410FindMinMaxPKfPfPf7int64_t", "FindMinMax::max"], [0, 1, 1, "_CPPv410FindMinMaxPKfPfPf7int64_t", "FindMinMax::min"], [0, 0, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf"], [0, 2, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf::InputType"], [0, 1, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf::bit_rate"], [0, 1, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf::input"], [0, 1, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf::input_columns"], [0, 1, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf::input_rows"], [0, 1, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf::output"], [8, 0, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref"], [8, 1, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::ebits"], [8, 1, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::exponent_bias"], [8, 1, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::input"], [8, 1, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::max_pos"], [8, 1, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::ncols"], [8, 1, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::nrows"], [8, 1, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::output"], [0, 0, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize"], [0, 2, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::T"], [0, 1, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::dst"], [0, 1, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::len"], [0, 1, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::noise_ratio"], [0, 1, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::num_threads"], [0, 1, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::qparams"], [0, 1, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::src"], [0, 1, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::thread_id"], [0, 0, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise"], [0, 1, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::C"], [0, 1, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::G"], [0, 1, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::K"], [0, 2, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::LAYOUT"], [0, 2, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::T"], [0, 1, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::X"], [0, 1, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::dst"], [0, 1, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::scales"], [0, 1, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::src"], [0, 1, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::zero_points"], [0, 0, 1, "_CPPv46Xor128v", "Xor128"], [8, 0, 1, "_CPPv424_FP8rowwise_to_float_gpuRKN2at6TensorEbK7int64_t", "_FP8rowwise_to_float_gpu"], [8, 1, 1, "_CPPv424_FP8rowwise_to_float_gpuRKN2at6TensorEbK7int64_t", "_FP8rowwise_to_float_gpu::forward"], [8, 1, 1, "_CPPv424_FP8rowwise_to_float_gpuRKN2at6TensorEbK7int64_t", "_FP8rowwise_to_float_gpu::input"], [8, 1, 1, "_CPPv424_FP8rowwise_to_float_gpuRKN2at6TensorEbK7int64_t", "_FP8rowwise_to_float_gpu::output_dtype"], [8, 0, 1, "_CPPv422_bfloat16_to_float_gpuRKN2at6TensorE", "_bfloat16_to_float_gpu"], [8, 1, 1, "_CPPv422_bfloat16_to_float_gpuRKN2at6TensorE", "_bfloat16_to_float_gpu::input"], [8, 0, 1, "_CPPv424_float_to_FP8rowwise_gpuRK6TensorKb", "_float_to_FP8rowwise_gpu"], [8, 1, 1, "_CPPv424_float_to_FP8rowwise_gpuRK6TensorKb", "_float_to_FP8rowwise_gpu::forward"], [8, 1, 1, "_CPPv424_float_to_FP8rowwise_gpuRK6TensorKb", "_float_to_FP8rowwise_gpu::input"], [8, 0, 1, "_CPPv422_float_to_bfloat16_gpuRKN2at6TensorE", "_float_to_bfloat16_gpu"], [8, 1, 1, "_CPPv422_float_to_bfloat16_gpuRKN2at6TensorE", "_float_to_bfloat16_gpu::input"], [8, 0, 1, "_CPPv434_float_to_fused8bitrowwise_cpu_outR6TensorRK6Tensor", "_float_to_fused8bitrowwise_cpu_out"], [8, 1, 1, "_CPPv434_float_to_fused8bitrowwise_cpu_outR6TensorRK6Tensor", "_float_to_fused8bitrowwise_cpu_out::input"], [8, 1, 1, "_CPPv434_float_to_fused8bitrowwise_cpu_outR6TensorRK6Tensor", "_float_to_fused8bitrowwise_cpu_out::output"], [8, 0, 1, "_CPPv430_float_to_fused8bitrowwise_gpuRK6Tensor", "_float_to_fused8bitrowwise_gpu"], [8, 1, 1, "_CPPv430_float_to_fused8bitrowwise_gpuRK6Tensor", "_float_to_fused8bitrowwise_gpu::input"], [8, 0, 1, "_CPPv430_float_to_fusednbitrowwise_gpuRK6TensorK7int64_t", "_float_to_fusednbitrowwise_gpu"], [8, 1, 1, "_CPPv430_float_to_fusednbitrowwise_gpuRK6TensorK7int64_t", "_float_to_fusednbitrowwise_gpu::bit_rate"], [8, 1, 1, "_CPPv430_float_to_fusednbitrowwise_gpuRK6TensorK7int64_t", "_float_to_fusednbitrowwise_gpu::input"], [8, 0, 1, "_CPPv418_float_to_hfp8_gpuRKN2at6TensorEK7int64_tK7int64_tKd", "_float_to_hfp8_gpu"], [8, 1, 1, "_CPPv418_float_to_hfp8_gpuRKN2at6TensorEK7int64_tK7int64_tKd", "_float_to_hfp8_gpu::ebits"], [8, 1, 1, "_CPPv418_float_to_hfp8_gpuRKN2at6TensorEK7int64_tK7int64_tKd", "_float_to_hfp8_gpu::exponent_bias"], [8, 1, 1, "_CPPv418_float_to_hfp8_gpuRKN2at6TensorEK7int64_tK7int64_tKd", "_float_to_hfp8_gpu::input"], [8, 1, 1, "_CPPv418_float_to_hfp8_gpuRKN2at6TensorEK7int64_tK7int64_tKd", "_float_to_hfp8_gpu::max_pos"], [8, 0, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu"], [8, 1, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::bias"], [8, 1, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::bounding_box_size"], [8, 1, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::ebits"], [8, 1, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::input"], [8, 1, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::max_pos"], [8, 1, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::mbits"], [8, 1, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::min_pos"], [8, 0, 1, "_CPPv430_float_to_paddedFP8rowwise_gpuRK6TensorKbK7int64_t", "_float_to_paddedFP8rowwise_gpu"], [8, 1, 1, "_CPPv430_float_to_paddedFP8rowwise_gpuRK6TensorKbK7int64_t", "_float_to_paddedFP8rowwise_gpu::forward"], [8, 1, 1, "_CPPv430_float_to_paddedFP8rowwise_gpuRK6TensorKbK7int64_t", "_float_to_paddedFP8rowwise_gpu::input"], [8, 1, 1, "_CPPv430_float_to_paddedFP8rowwise_gpuRK6TensorKbK7int64_t", "_float_to_paddedFP8rowwise_gpu::row_dim"], [8, 0, 1, "_CPPv434_fused8bitrowwise_to_float_cpu_outR6TensorRK6Tensor", "_fused8bitrowwise_to_float_cpu_out"], [8, 1, 1, "_CPPv434_fused8bitrowwise_to_float_cpu_outR6TensorRK6Tensor", "_fused8bitrowwise_to_float_cpu_out::input"], [8, 1, 1, "_CPPv434_fused8bitrowwise_to_float_cpu_outR6TensorRK6Tensor", "_fused8bitrowwise_to_float_cpu_out::output"], [8, 0, 1, "_CPPv430_fused8bitrowwise_to_float_gpuRKN2at6TensorE", "_fused8bitrowwise_to_float_gpu"], [8, 1, 1, "_CPPv430_fused8bitrowwise_to_float_gpuRKN2at6TensorE", "_fused8bitrowwise_to_float_gpu::input"], [8, 0, 1, "_CPPv440_fused8bitrowwise_to_float_mixed_dim_gpuRKN2at6TensorERKN2at6TensorEK7int64_t", "_fused8bitrowwise_to_float_mixed_dim_gpu"], [8, 1, 1, "_CPPv440_fused8bitrowwise_to_float_mixed_dim_gpuRKN2at6TensorERKN2at6TensorEK7int64_t", "_fused8bitrowwise_to_float_mixed_dim_gpu::D_offsets"], [8, 1, 1, "_CPPv440_fused8bitrowwise_to_float_mixed_dim_gpuRKN2at6TensorERKN2at6TensorEK7int64_t", "_fused8bitrowwise_to_float_mixed_dim_gpu::input"], [8, 1, 1, "_CPPv440_fused8bitrowwise_to_float_mixed_dim_gpuRKN2at6TensorERKN2at6TensorEK7int64_t", "_fused8bitrowwise_to_float_mixed_dim_gpu::output_dtype"], [8, 0, 1, "_CPPv429_fused8bitrowwise_to_half_gpuRKN2at6TensorE", "_fused8bitrowwise_to_half_gpu"], [8, 1, 1, "_CPPv429_fused8bitrowwise_to_half_gpuRKN2at6TensorE", "_fused8bitrowwise_to_half_gpu::input"], [8, 0, 1, "_CPPv449_fused8bitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tKbKb", "_fused8bitrowwise_to_single_or_half_precision_gpu"], [8, 1, 1, "_CPPv449_fused8bitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tKbKb", "_fused8bitrowwise_to_single_or_half_precision_gpu::input"], [8, 1, 1, "_CPPv449_fused8bitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tKbKb", "_fused8bitrowwise_to_single_or_half_precision_gpu::output_dtype"], [8, 1, 1, "_CPPv449_fused8bitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tKbKb", "_fused8bitrowwise_to_single_or_half_precision_gpu::quant_padding_float_type"], [8, 1, 1, "_CPPv449_fused8bitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tKbKb", "_fused8bitrowwise_to_single_or_half_precision_gpu::scale_bias_last"], [8, 0, 1, "_CPPv430_fusednbitrowwise_to_float_gpuRKN2at6TensorEK7int64_t", "_fusednbitrowwise_to_float_gpu"], [8, 1, 1, "_CPPv430_fusednbitrowwise_to_float_gpuRKN2at6TensorEK7int64_t", "_fusednbitrowwise_to_float_gpu::bit_rate"], [8, 1, 1, "_CPPv430_fusednbitrowwise_to_float_gpuRKN2at6TensorEK7int64_t", "_fusednbitrowwise_to_float_gpu::input"], [8, 0, 1, "_CPPv429_fusednbitrowwise_to_half_gpuRKN2at6TensorEK7int64_t", "_fusednbitrowwise_to_half_gpu"], [8, 1, 1, "_CPPv429_fusednbitrowwise_to_half_gpuRKN2at6TensorEK7int64_t", "_fusednbitrowwise_to_half_gpu::bit_rate"], [8, 1, 1, "_CPPv429_fusednbitrowwise_to_half_gpuRKN2at6TensorEK7int64_t", "_fusednbitrowwise_to_half_gpu::input"], [8, 0, 1, "_CPPv449_fusednbitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tK7int64_t", "_fusednbitrowwise_to_single_or_half_precision_gpu"], [8, 1, 1, "_CPPv449_fusednbitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tK7int64_t", "_fusednbitrowwise_to_single_or_half_precision_gpu::bit_rate"], [8, 1, 1, "_CPPv449_fusednbitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tK7int64_t", "_fusednbitrowwise_to_single_or_half_precision_gpu::input"], [8, 1, 1, "_CPPv449_fusednbitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tK7int64_t", "_fusednbitrowwise_to_single_or_half_precision_gpu::output_dtype"], [8, 0, 1, "_CPPv429_half_to_fused8bitrowwise_gpuRK6Tensor", "_half_to_fused8bitrowwise_gpu"], [8, 1, 1, "_CPPv429_half_to_fused8bitrowwise_gpuRK6Tensor", "_half_to_fused8bitrowwise_gpu::input"], [8, 0, 1, "_CPPv429_half_to_fusednbitrowwise_gpuRKN2at6TensorEK7int64_t", "_half_to_fusednbitrowwise_gpu"], [8, 1, 1, "_CPPv429_half_to_fusednbitrowwise_gpuRKN2at6TensorEK7int64_t", "_half_to_fusednbitrowwise_gpu::bit_rate"], [8, 1, 1, "_CPPv429_half_to_fusednbitrowwise_gpuRKN2at6TensorEK7int64_t", "_half_to_fusednbitrowwise_gpu::input"], [8, 0, 1, "_CPPv418_hfp8_to_float_gpuRKN2at6TensorEK7int64_tK7int64_t", "_hfp8_to_float_gpu"], [8, 1, 1, "_CPPv418_hfp8_to_float_gpuRKN2at6TensorEK7int64_tK7int64_t", "_hfp8_to_float_gpu::ebits"], [8, 1, 1, "_CPPv418_hfp8_to_float_gpuRKN2at6TensorEK7int64_tK7int64_t", "_hfp8_to_float_gpu::exponent_bias"], [8, 1, 1, "_CPPv418_hfp8_to_float_gpuRKN2at6TensorEK7int64_tK7int64_t", "_hfp8_to_float_gpu::input"], [8, 0, 1, "_CPPv418_msfp_to_float_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_t", "_msfp_to_float_gpu"], [8, 1, 1, "_CPPv418_msfp_to_float_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_t", "_msfp_to_float_gpu::bias"], [8, 1, 1, "_CPPv418_msfp_to_float_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_t", "_msfp_to_float_gpu::ebits"], [8, 1, 1, "_CPPv418_msfp_to_float_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_t", "_msfp_to_float_gpu::input"], [8, 1, 1, "_CPPv418_msfp_to_float_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_t", "_msfp_to_float_gpu::mbits"], [8, 0, 1, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t", "_paddedFP8rowwise_to_float_gpu"], [8, 1, 1, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t", "_paddedFP8rowwise_to_float_gpu::forward"], [8, 1, 1, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t", "_paddedFP8rowwise_to_float_gpu::input"], [8, 1, 1, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t", "_paddedFP8rowwise_to_float_gpu::output_dtype"], [8, 1, 1, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t", "_paddedFP8rowwise_to_float_gpu::output_last_dim"], [8, 1, 1, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t", "_paddedFP8rowwise_to_float_gpu::row_dim"], [8, 0, 1, "_CPPv449_single_or_half_precision_to_fused8bitrowwise_gpuRK6Tensor", "_single_or_half_precision_to_fused8bitrowwise_gpu"], [8, 1, 1, "_CPPv449_single_or_half_precision_to_fused8bitrowwise_gpuRK6Tensor", "_single_or_half_precision_to_fused8bitrowwise_gpu::input"], [8, 0, 1, "_CPPv449_single_or_half_precision_to_fusednbitrowwise_gpuRK6TensorK7int64_t", "_single_or_half_precision_to_fusednbitrowwise_gpu"], [8, 1, 1, "_CPPv449_single_or_half_precision_to_fusednbitrowwise_gpuRK6TensorK7int64_t", "_single_or_half_precision_to_fusednbitrowwise_gpu::bit_rate"], [8, 1, 1, "_CPPv449_single_or_half_precision_to_fusednbitrowwise_gpuRK6TensorK7int64_t", "_single_or_half_precision_to_fusednbitrowwise_gpu::input"], [7, 0, 1, "_CPPv417all_to_one_deviceNSt6vectorIN2at6TensorEEEN2at6DeviceE", "all_to_one_device"], [7, 1, 1, "_CPPv417all_to_one_deviceNSt6vectorIN2at6TensorEEEN2at6DeviceE", "all_to_one_device::inputTensors"], [7, 1, 1, "_CPPv417all_to_one_deviceNSt6vectorIN2at6TensorEEEN2at6DeviceE", "all_to_one_device::target_device"], [4, 0, 1, "_CPPv431batched_dense_vec_jagged_2d_mulRK6TensorRK6TensorRK6Tensor", "batched_dense_vec_jagged_2d_mul"], [4, 1, 1, "_CPPv431batched_dense_vec_jagged_2d_mulRK6TensorRK6TensorRK6Tensor", "batched_dense_vec_jagged_2d_mul::a_offsets"], [4, 1, 1, "_CPPv431batched_dense_vec_jagged_2d_mulRK6TensorRK6TensorRK6Tensor", "batched_dense_vec_jagged_2d_mul::a_values"], [4, 1, 1, "_CPPv431batched_dense_vec_jagged_2d_mulRK6TensorRK6TensorRK6Tensor", "batched_dense_vec_jagged_2d_mul::v"], [2, 0, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKN3c108optionalI6TensorEERKN3c108optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda"], [2, 1, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKN3c108optionalI6TensorEERKN3c108optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::B_ofsets"], [2, 1, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKN3c108optionalI6TensorEERKN3c108optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::bounds_check_mode"], [2, 1, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKN3c108optionalI6TensorEERKN3c108optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::indices"], [2, 1, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKN3c108optionalI6TensorEERKN3c108optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::max_B"], [2, 1, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKN3c108optionalI6TensorEERKN3c108optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::offsets"], [2, 1, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKN3c108optionalI6TensorEERKN3c108optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::rows_per_table"], [2, 1, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKN3c108optionalI6TensorEERKN3c108optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::warning"], [2, 1, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKN3c108optionalI6TensorEERKN3c108optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::weights"], [4, 0, 1, "_CPPv415dense_to_jaggedRK6TensorRKNSt6vectorI6TensorEEN3c108optionalIN2at6SymIntEEE", "dense_to_jagged"], [4, 1, 1, "_CPPv415dense_to_jaggedRK6TensorRKNSt6vectorI6TensorEEN3c108optionalIN2at6SymIntEEE", "dense_to_jagged::dense"], [4, 1, 1, "_CPPv415dense_to_jaggedRK6TensorRKNSt6vectorI6TensorEEN3c108optionalIN2at6SymIntEEE", "dense_to_jagged::offsets"], [4, 1, 1, "_CPPv415dense_to_jaggedRK6TensorRKNSt6vectorI6TensorEEN3c108optionalIN2at6SymIntEEE", "dense_to_jagged::total_L"], [10, 0, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda"], [10, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::D_offsets"], [10, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::cache_index_table_map"], [10, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::gather_cache_stats"], [10, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::hash_size_cumsum"], [10, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::linear_cache_indices"], [10, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::lru_state"], [10, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::lxu_cache_miss_timestamp"], [10, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::lxu_cache_state"], [10, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::lxu_cache_weights"], [10, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::row_alignment"], [10, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::time_stamp"], [10, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::total_cache_hash_size"], [10, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::uvm_cache_stats"], [10, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::weights"], [10, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::weights_offsets"], [10, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::weights_tys"], [10, 0, 1, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lxu_cache_lookup_cuda"], [10, 1, 1, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lxu_cache_lookup_cuda::gather_cache_stats"], [10, 1, 1, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lxu_cache_lookup_cuda::invalid_index"], [10, 1, 1, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lxu_cache_lookup_cuda::linear_cache_indices"], [10, 1, 1, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lxu_cache_lookup_cuda::lxu_cache_state"], [10, 1, 1, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lxu_cache_lookup_cuda::uvm_cache_stats"], [20, 0, 1, "_CPPv4I0_NSt6size_tEE14example_method7int32_t1Tf", "example_method"], [20, 2, 1, "_CPPv4I0_NSt6size_tEE14example_method7int32_t1Tf", "example_method::Alignment"], [20, 2, 1, "_CPPv4I0_NSt6size_tEE14example_method7int32_t1Tf", "example_method::T"], [20, 1, 1, "_CPPv4I0_NSt6size_tEE14example_method7int32_t1Tf", "example_method::param1"], [20, 1, 1, "_CPPv4I0_NSt6size_tEE14example_method7int32_t1Tf", "example_method::param2"], [9, 0, 1, "_CPPv431expand_into_jagged_permute_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_t", "expand_into_jagged_permute_cuda"], [9, 1, 1, "_CPPv431expand_into_jagged_permute_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_t", "expand_into_jagged_permute_cuda::input_offsets"], [9, 1, 1, "_CPPv431expand_into_jagged_permute_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_t", "expand_into_jagged_permute_cuda::output_offsets"], [9, 1, 1, "_CPPv431expand_into_jagged_permute_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_t", "expand_into_jagged_permute_cuda::output_size"], [9, 1, 1, "_CPPv431expand_into_jagged_permute_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_t", "expand_into_jagged_permute_cuda::permute"], [8, 0, 1, "_CPPv437float_or_half_to_fused8bitrowwise_cpuRK6Tensor", "float_or_half_to_fused8bitrowwise_cpu"], [8, 1, 1, "_CPPv437float_or_half_to_fused8bitrowwise_cpuRK6Tensor", "float_or_half_to_fused8bitrowwise_cpu::input"], [8, 0, 1, "_CPPv423float_to_FP8rowwise_cpuRK6Tensorb", "float_to_FP8rowwise_cpu"], [8, 1, 1, "_CPPv423float_to_FP8rowwise_cpuRK6Tensorb", "float_to_FP8rowwise_cpu::forward"], [8, 1, 1, "_CPPv423float_to_FP8rowwise_cpuRK6Tensorb", "float_to_FP8rowwise_cpu::input"], [8, 0, 1, "_CPPv429float_to_fused8bitrowwise_cpuRK6Tensor", "float_to_fused8bitrowwise_cpu"], [8, 1, 1, "_CPPv429float_to_fused8bitrowwise_cpuRK6Tensor", "float_to_fused8bitrowwise_cpu::input"], [8, 0, 1, "_CPPv429fused8bitrowwise_to_float_cpuRK6Tensor", "fused8bitrowwise_to_float_cpu"], [8, 1, 1, "_CPPv429fused8bitrowwise_to_float_cpuRK6Tensor", "fused8bitrowwise_to_float_cpu::input"], [8, 0, 1, "_CPPv437fused8bitrowwise_to_float_or_half_cpuRK6TensorK7int64_tKbKb", "fused8bitrowwise_to_float_or_half_cpu"], [8, 1, 1, "_CPPv437fused8bitrowwise_to_float_or_half_cpuRK6TensorK7int64_tKbKb", "fused8bitrowwise_to_float_or_half_cpu::input"], [8, 1, 1, "_CPPv437fused8bitrowwise_to_float_or_half_cpuRK6TensorK7int64_tKbKb", "fused8bitrowwise_to_float_or_half_cpu::output_dtype"], [8, 1, 1, "_CPPv437fused8bitrowwise_to_float_or_half_cpuRK6TensorK7int64_tKbKb", "fused8bitrowwise_to_float_or_half_cpu::quant_padding_float_type"], [8, 1, 1, "_CPPv437fused8bitrowwise_to_float_or_half_cpuRK6TensorK7int64_tKbKb", "fused8bitrowwise_to_float_or_half_cpu::scale_bias_last"], [8, 0, 1, "_CPPv428fused8bitrowwise_to_half_cpuRK6Tensor", "fused8bitrowwise_to_half_cpu"], [8, 1, 1, "_CPPv428fused8bitrowwise_to_half_cpuRK6Tensor", "fused8bitrowwise_to_half_cpu::input"], [8, 0, 1, "_CPPv429fusednbitrowwise_to_float_cpuRK6TensorK7int64_t", "fusednbitrowwise_to_float_cpu"], [8, 1, 1, "_CPPv429fusednbitrowwise_to_float_cpuRK6TensorK7int64_t", "fusednbitrowwise_to_float_cpu::bit_rate"], [8, 1, 1, "_CPPv429fusednbitrowwise_to_float_cpuRK6TensorK7int64_t", "fusednbitrowwise_to_float_cpu::input"], [8, 0, 1, "_CPPv437fusednbitrowwise_to_float_or_half_cpuRK6TensorK7int64_tK7int64_t", "fusednbitrowwise_to_float_or_half_cpu"], [8, 1, 1, "_CPPv437fusednbitrowwise_to_float_or_half_cpuRK6TensorK7int64_tK7int64_t", "fusednbitrowwise_to_float_or_half_cpu::bit_rate"], [8, 1, 1, "_CPPv437fusednbitrowwise_to_float_or_half_cpuRK6TensorK7int64_tK7int64_t", "fusednbitrowwise_to_float_or_half_cpu::input"], [8, 1, 1, "_CPPv437fusednbitrowwise_to_float_or_half_cpuRK6TensorK7int64_tK7int64_t", "fusednbitrowwise_to_float_or_half_cpu::output_dtype"], [8, 0, 1, "_CPPv428fusednbitrowwise_to_half_cpuRK6TensorK7int64_t", "fusednbitrowwise_to_half_cpu"], [8, 1, 1, "_CPPv428fusednbitrowwise_to_half_cpuRK6TensorK7int64_t", "fusednbitrowwise_to_half_cpu::bit_rate"], [8, 1, 1, "_CPPv428fusednbitrowwise_to_half_cpuRK6TensorK7int64_t", "fusednbitrowwise_to_half_cpu::input"], [9, 0, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu"], [9, 1, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::bin_boundaries"], [9, 1, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::bin_ctr_in_use_after"], [9, 1, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::bin_ctr_weight_value"], [9, 1, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::bin_num_examples"], [9, 1, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::bin_num_positives"], [9, 1, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::logit"], [9, 1, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::num_segments"], [9, 1, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::positive_weight"], [9, 1, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::segment_lengths"], [9, 1, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::segment_value"], [10, 0, 1, "_CPPv423get_unique_indices_cudaN2at6TensorE7int64_tb", "get_unique_indices_cuda"], [10, 1, 1, "_CPPv423get_unique_indices_cudaN2at6TensorE7int64_tb", "get_unique_indices_cuda::compute_count"], [10, 1, 1, "_CPPv423get_unique_indices_cudaN2at6TensorE7int64_tb", "get_unique_indices_cuda::linear_indices"], [10, 1, 1, "_CPPv423get_unique_indices_cudaN2at6TensorE7int64_tb", "get_unique_indices_cuda::max_indices"], [8, 0, 1, "_CPPv428half_to_fused8bitrowwise_cpuRK6Tensor", "half_to_fused8bitrowwise_cpu"], [8, 1, 1, "_CPPv428half_to_fused8bitrowwise_cpuRK6Tensor", "half_to_fused8bitrowwise_cpu::input"], [9, 0, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu"], [9, 1, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::bin_ctr_in_use_after"], [9, 1, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::bin_ctr_weight_value"], [9, 1, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::bin_num_examples"], [9, 1, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::bin_num_positives"], [9, 1, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::logit"], [9, 1, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::lower_bound"], [9, 1, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::positive_weight"], [9, 1, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::upper_bound"], [10, 0, 1, "_CPPv419host_lxu_cache_slot7int64_t7int64_t", "host_lxu_cache_slot"], [10, 1, 1, "_CPPv419host_lxu_cache_slot7int64_t7int64_t", "host_lxu_cache_slot::C"], [10, 1, 1, "_CPPv419host_lxu_cache_slot7int64_t7int64_t", "host_lxu_cache_slot::h_in"], [2, 0, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::D_offsets"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::dev_weights"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::fp8_exponent_bias"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::fp8_exponent_bits"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::indice_weights"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::indices"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::lxu_cache_locations"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::lxu_cache_weights"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::max_float16_D"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::max_float32_D"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::max_float8_D"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::max_int2_D"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::max_int4_D"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::max_int8_D"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::offsets"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::output_dtype"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::pooling_mode"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::row_alignment"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::total_D"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::uvm_weights"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::weights_offsets"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::weights_placements"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::weights_tys"], [2, 0, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::D_offsets"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::dev_weights"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::fp8_exponent_bias"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::fp8_exponent_bits"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::indice_weights"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::indices"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::lxu_cache_locations"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::lxu_cache_weights"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::max_float16_D"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::max_float32_D"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::max_float8_D"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::max_int2_D"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::max_int4_D"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::max_int8_D"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::offsets"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::output_dtype"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::pooling_mode"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::row_alignment"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::total_D"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::uvm_weights"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::weights_offsets"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::weights_placements"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::weights_tys"], [2, 0, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::D_offsets"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::cache_hash_size_cumsum"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::cache_index_table_map"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::dev_weights"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::fp8_exponent_bias"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::fp8_exponent_bits"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::indice_weights"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::indices"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::lxu_cache_locations"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::lxu_cache_state"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::lxu_cache_weights"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::lxu_state"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::max_float16_D"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::max_float32_D"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::max_float8_D"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::max_int2_D"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::max_int4_D"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::max_int8_D"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::offsets"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::output_dtype"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::pooling_mode"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::row_alignment"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::total_D"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::total_cache_hash_size"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::uvm_weights"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::weights_offsets"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::weights_placements"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::weights_tys"], [2, 0, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::D_offsets"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::cache_hash_size_cumsum"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::cache_index_table_map"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::dev_weights"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::fp8_exponent_bias"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::fp8_exponent_bits"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::indice_weights"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::indices"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::lxu_cache_locations"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::lxu_cache_state"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::lxu_cache_weights"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::lxu_state"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::max_float16_D"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::max_float32_D"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::max_float8_D"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::max_int2_D"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::max_int4_D"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::max_int8_D"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::offsets"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::output_dtype"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::pooling_mode"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::row_alignment"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::total_D"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::total_cache_hash_size"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::uvm_weights"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::weights_offsets"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::weights_placements"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::weights_tys"], [6, 0, 1, "_CPPv413is_uvm_tensorRK6Tensor", "is_uvm_tensor"], [6, 1, 1, "_CPPv413is_uvm_tensorRK6Tensor", "is_uvm_tensor::self"], [4, 0, 1, "_CPPv418jagged_1d_to_dense6Tensor6TensorN3c106SymIntE7int64_t", "jagged_1d_to_dense"], [4, 1, 1, "_CPPv418jagged_1d_to_dense6Tensor6TensorN3c106SymIntE7int64_t", "jagged_1d_to_dense::max_L"], [4, 1, 1, "_CPPv418jagged_1d_to_dense6Tensor6TensorN3c106SymIntE7int64_t", "jagged_1d_to_dense::offsets"], [4, 1, 1, "_CPPv418jagged_1d_to_dense6Tensor6TensorN3c106SymIntE7int64_t", "jagged_1d_to_dense::padding_value"], [4, 1, 1, "_CPPv418jagged_1d_to_dense6Tensor6TensorN3c106SymIntE7int64_t", "jagged_1d_to_dense::values"], [4, 0, 1, "_CPPv418jagged_2d_to_dense6Tensor6TensorN3c106SymIntE", "jagged_2d_to_dense"], [4, 1, 1, "_CPPv418jagged_2d_to_dense6Tensor6TensorN3c106SymIntE", "jagged_2d_to_dense::max_sequence_length"], [4, 1, 1, "_CPPv418jagged_2d_to_dense6Tensor6TensorN3c106SymIntE", "jagged_2d_to_dense::offsets"], [4, 1, 1, "_CPPv418jagged_2d_to_dense6Tensor6TensorN3c106SymIntE", "jagged_2d_to_dense::values"], [4, 0, 1, "_CPPv428jagged_dense_elementwise_addRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add"], [4, 1, 1, "_CPPv428jagged_dense_elementwise_addRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add::x_offsets"], [4, 1, 1, "_CPPv428jagged_dense_elementwise_addRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add::x_values"], [4, 1, 1, "_CPPv428jagged_dense_elementwise_addRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add::y"], [4, 0, 1, "_CPPv442jagged_dense_elementwise_add_jagged_outputRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output"], [4, 1, 1, "_CPPv442jagged_dense_elementwise_add_jagged_outputRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output::x_offsets"], [4, 1, 1, "_CPPv442jagged_dense_elementwise_add_jagged_outputRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output::x_values"], [4, 1, 1, "_CPPv442jagged_dense_elementwise_add_jagged_outputRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output::y"], [4, 0, 1, "_CPPv447jagged_dense_elementwise_add_jagged_output_cudaRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output_cuda"], [4, 1, 1, "_CPPv447jagged_dense_elementwise_add_jagged_output_cudaRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output_cuda::x_offsets"], [4, 1, 1, "_CPPv447jagged_dense_elementwise_add_jagged_output_cudaRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output_cuda::x_values"], [4, 1, 1, "_CPPv447jagged_dense_elementwise_add_jagged_output_cudaRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output_cuda::y"], [4, 0, 1, "_CPPv428jagged_dense_elementwise_mulRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_mul"], [4, 1, 1, "_CPPv428jagged_dense_elementwise_mulRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_mul::x_offsets"], [4, 1, 1, "_CPPv428jagged_dense_elementwise_mulRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_mul::x_values"], [4, 1, 1, "_CPPv428jagged_dense_elementwise_mulRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_mul::y"], [4, 0, 1, "_CPPv422jagged_to_padded_denseRK6TensorRKNSt6vectorI6TensorEEKN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense"], [4, 1, 1, "_CPPv422jagged_to_padded_denseRK6TensorRKNSt6vectorI6TensorEEKN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense::max_lengths"], [4, 1, 1, "_CPPv422jagged_to_padded_denseRK6TensorRKNSt6vectorI6TensorEEKN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense::offsets"], [4, 1, 1, "_CPPv422jagged_to_padded_denseRK6TensorRKNSt6vectorI6TensorEEKN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense::padding_value"], [4, 1, 1, "_CPPv422jagged_to_padded_denseRK6TensorRKNSt6vectorI6TensorEEKN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense::values"], [4, 0, 1, "_CPPv430jagged_to_padded_dense_forwardRK6TensorRKNSt6vectorI6TensorEEN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense_forward"], [4, 1, 1, "_CPPv430jagged_to_padded_dense_forwardRK6TensorRKNSt6vectorI6TensorEEN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense_forward::max_lengths"], [4, 1, 1, "_CPPv430jagged_to_padded_dense_forwardRK6TensorRKNSt6vectorI6TensorEEN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense_forward::offsets"], [4, 1, 1, "_CPPv430jagged_to_padded_dense_forwardRK6TensorRKNSt6vectorI6TensorEEN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense_forward::padding_value"], [4, 1, 1, "_CPPv430jagged_to_padded_dense_forwardRK6TensorRKNSt6vectorI6TensorEEN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense_forward::values"], [10, 0, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda"], [10, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::D_offsets"], [10, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::cache_hash_size_cumsum"], [10, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::cache_index_table_map"], [10, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::lfu_state"], [10, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::linear_cache_indices"], [10, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::lxu_cache_state"], [10, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::lxu_cache_weights"], [10, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::row_alignment"], [10, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::total_cache_hash_size"], [10, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::weights"], [10, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::weights_offsets"], [10, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::weights_tys"], [10, 0, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda"], [10, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::D_offsets"], [10, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::cache_hash_size_cumsum"], [10, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::cache_index_table_map"], [10, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::lfu_state"], [10, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::linear_cache_indices"], [10, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::lxu_cache_state"], [10, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::lxu_cache_weights"], [10, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::stochastic_rounding"], [10, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::total_cache_hash_size"], [10, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::weights"], [10, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::weights_offsets"], [10, 0, 1, "_CPPv428linearize_cache_indices_cudaN2at6TensorEN2at6TensorEN2at6TensorE", "linearize_cache_indices_cuda"], [10, 1, 1, "_CPPv428linearize_cache_indices_cudaN2at6TensorEN2at6TensorEN2at6TensorE", "linearize_cache_indices_cuda::cache_hash_size_cumsum"], [10, 1, 1, "_CPPv428linearize_cache_indices_cudaN2at6TensorEN2at6TensorEN2at6TensorE", "linearize_cache_indices_cuda::indices"], [10, 1, 1, "_CPPv428linearize_cache_indices_cudaN2at6TensorEN2at6TensorEN2at6TensorE", "linearize_cache_indices_cuda::offsets"], [10, 0, 1, "_CPPv441linearize_cache_indices_from_row_idx_cudaN2at6TensorEN2at6TensorEN2at6TensorE", "linearize_cache_indices_from_row_idx_cuda"], [10, 1, 1, "_CPPv441linearize_cache_indices_from_row_idx_cudaN2at6TensorEN2at6TensorEN2at6TensorE", "linearize_cache_indices_from_row_idx_cuda::cache_hash_size_cumsum"], [10, 1, 1, "_CPPv441linearize_cache_indices_from_row_idx_cudaN2at6TensorEN2at6TensorEN2at6TensorE", "linearize_cache_indices_from_row_idx_cuda::update_row_indices"], [10, 1, 1, "_CPPv441linearize_cache_indices_from_row_idx_cudaN2at6TensorEN2at6TensorEN2at6TensorE", "linearize_cache_indices_from_row_idx_cuda::update_table_indices"], [10, 0, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE", "lru_cache_find_uncached_cuda"], [10, 1, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE", "lru_cache_find_uncached_cuda::gather_cache_stats"], [10, 1, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE", "lru_cache_find_uncached_cuda::lock_cache_line"], [10, 1, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE", "lru_cache_find_uncached_cuda::lru_state"], [10, 1, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE", "lru_cache_find_uncached_cuda::lxu_cache_locking_counter"], [10, 1, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE", "lru_cache_find_uncached_cuda::lxu_cache_state"], [10, 1, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE", "lru_cache_find_uncached_cuda::max_indices"], [10, 1, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE", "lru_cache_find_uncached_cuda::time_stamp"], [10, 1, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE", "lru_cache_find_uncached_cuda::unique_indices"], [10, 1, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE", "lru_cache_find_uncached_cuda::unique_indices_length"], [10, 1, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE", "lru_cache_find_uncached_cuda::uvm_cache_stats"], [10, 0, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda"], [10, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::D_offsets"], [10, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::cache_index_table_map"], [10, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::gather_cache_stats"], [10, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::hash_size_cumsum"], [10, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::linear_cache_indices"], [10, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::lru_state"], [10, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::lxu_cache_state"], [10, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::lxu_cache_weights"], [10, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::row_alignment"], [10, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::time_stamp"], [10, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::total_cache_hash_size"], [10, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::uvm_cache_stats"], [10, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::weights"], [10, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::weights_offsets"], [10, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::weights_tys"], [10, 0, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda"], [10, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::D_offsets"], [10, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::cache_index_table_map"], [10, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::gather_cache_stats"], [10, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::hash_size_cumsum"], [10, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::linear_cache_indices"], [10, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::lock_cache_line"], [10, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::lru_state"], [10, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::lxu_cache_locking_counter"], [10, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::lxu_cache_state"], [10, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::lxu_cache_weights"], [10, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::stochastic_rounding"], [10, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::time_stamp"], [10, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::total_cache_hash_size"], [10, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::uvm_cache_stats"], [10, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::weights"], [10, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::weights_offsets"], [10, 0, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda"], [10, 1, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::D_offsets"], [10, 1, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::cache_hash_size_cumsum"], [10, 1, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::cache_index_table_map"], [10, 1, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::lxu_cache_state"], [10, 1, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::lxu_cache_weights"], [10, 1, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::stochastic_rounding"], [10, 1, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::total_D"], [10, 1, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::uvm_weights"], [10, 1, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::weights_offsets"], [10, 0, 1, "_CPPv431lxu_cache_locations_update_cudaN2at6TensorEN2at6TensorEN3c108optionalIN2at6TensorEEE", "lxu_cache_locations_update_cuda"], [10, 1, 1, "_CPPv431lxu_cache_locations_update_cudaN2at6TensorEN2at6TensorEN3c108optionalIN2at6TensorEEE", "lxu_cache_locations_update_cuda::lxu_cache_locations"], [10, 1, 1, "_CPPv431lxu_cache_locations_update_cudaN2at6TensorEN2at6TensorEN3c108optionalIN2at6TensorEEE", "lxu_cache_locations_update_cuda::lxu_cache_locations_new"], [10, 1, 1, "_CPPv431lxu_cache_locations_update_cudaN2at6TensorEN2at6TensorEN3c108optionalIN2at6TensorEEE", "lxu_cache_locations_update_cuda::num_uniq_cache_indices"], [10, 0, 1, "_CPPv440lxu_cache_locking_counter_decrement_cudaN2at6TensorEN2at6TensorE", "lxu_cache_locking_counter_decrement_cuda"], [10, 1, 1, "_CPPv440lxu_cache_locking_counter_decrement_cudaN2at6TensorEN2at6TensorE", "lxu_cache_locking_counter_decrement_cuda::lxu_cache_locations"], [10, 1, 1, "_CPPv440lxu_cache_locking_counter_decrement_cudaN2at6TensorEN2at6TensorE", "lxu_cache_locking_counter_decrement_cuda::lxu_cache_locking_counter"], [10, 0, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda"], [10, 1, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::gather_cache_stats"], [10, 1, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::invalid_index"], [10, 1, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::linear_cache_indices"], [10, 1, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::lxu_cache_locations_output"], [10, 1, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::lxu_cache_state"], [10, 1, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::num_uniq_cache_indices"], [10, 1, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::uvm_cache_stats"], [6, 0, 1, "_CPPv422new_host_mapped_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_host_mapped_tensor"], [6, 1, 1, "_CPPv422new_host_mapped_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_host_mapped_tensor::self"], [6, 1, 1, "_CPPv422new_host_mapped_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_host_mapped_tensor::sizes"], [6, 0, 1, "_CPPv418new_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_managed_tensor"], [6, 1, 1, "_CPPv418new_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_managed_tensor::self"], [6, 1, 1, "_CPPv418new_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_managed_tensor::sizes"], [6, 0, 1, "_CPPv423new_managed_tensor_metaRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_managed_tensor_meta"], [6, 1, 1, "_CPPv423new_managed_tensor_metaRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_managed_tensor_meta::self"], [6, 1, 1, "_CPPv423new_managed_tensor_metaRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_managed_tensor_meta::sizes"], [6, 0, 1, "_CPPv418new_unified_tensorRK6TensorRKNSt6vectorINSt7int64_tEEEb", "new_unified_tensor"], [6, 1, 1, "_CPPv418new_unified_tensorRK6TensorRKNSt6vectorINSt7int64_tEEEb", "new_unified_tensor::is_host_mapped"], [6, 1, 1, "_CPPv418new_unified_tensorRK6TensorRKNSt6vectorINSt7int64_tEEEb", "new_unified_tensor::self"], [6, 1, 1, "_CPPv418new_unified_tensorRK6TensorRKNSt6vectorINSt7int64_tEEEb", "new_unified_tensor::sizes"], [6, 0, 1, "_CPPv426new_vanilla_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_vanilla_managed_tensor"], [6, 1, 1, "_CPPv426new_vanilla_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_vanilla_managed_tensor::self"], [6, 1, 1, "_CPPv426new_vanilla_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_vanilla_managed_tensor::sizes"], [3, 0, 1, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t", "padding_fused_tbe_input_combine_cpu"], [3, 1, 1, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t", "padding_fused_tbe_input_combine_cpu::batch_size"], [3, 1, 1, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t", "padding_fused_tbe_input_combine_cpu::include_last_offsets"], [3, 1, 1, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t", "padding_fused_tbe_input_combine_cpu::indices_list"], [3, 1, 1, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t", "padding_fused_tbe_input_combine_cpu::offsets_list"], [3, 1, 1, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t", "padding_fused_tbe_input_combine_cpu::per_sample_weights"], [7, 0, 1, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad"], [7, 1, 1, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad::inv_offset_dim_list"], [7, 1, 1, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad::inv_permute_list"], [7, 1, 1, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad::offset_dim_list"], [7, 1, 1, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad::permute_list"], [7, 1, 1, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad::pooled_embs"], [7, 0, 1, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_cpu"], [7, 1, 1, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_cpu::inv_offset_dim_list"], [7, 1, 1, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_cpu::inv_permute_list"], [7, 1, 1, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_cpu::offset_dim_list"], [7, 1, 1, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_cpu::permute_list"], [7, 1, 1, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_cpu::pooled_embs"], [7, 0, 1, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_gpu"], [7, 1, 1, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_gpu::inv_offset_dim_list"], [7, 1, 1, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_gpu::inv_permute_list"], [7, 1, 1, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_gpu::offset_dim_list"], [7, 1, 1, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_gpu::permute_list"], [7, 1, 1, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_gpu::pooled_embs"], [7, 0, 1, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_cpu"], [7, 1, 1, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_cpu::inv_offset_dim_list"], [7, 1, 1, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_cpu::inv_permute_list"], [7, 1, 1, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_cpu::offset_dim_list"], [7, 1, 1, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_cpu::permute_list"], [7, 1, 1, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_cpu::pooled_embs"], [7, 0, 1, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_gpu"], [7, 1, 1, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_gpu::inv_offset_dim_list"], [7, 1, 1, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_gpu::inv_permute_list"], [7, 1, 1, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_gpu::offset_dim_list"], [7, 1, 1, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_gpu::permute_list"], [7, 1, 1, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_gpu::pooled_embs"], [7, 0, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl"], [7, 1, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl::allow_duplicates"], [7, 1, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl::inv_offset_dim_list"], [7, 1, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl::inv_permute_list"], [7, 1, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl::offset_dim_list"], [7, 1, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl::permute_list"], [7, 1, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl::pooled_embs"], [7, 0, 1, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_cpu"], [7, 1, 1, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_cpu::inv_offset_dim_list"], [7, 1, 1, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_cpu::inv_permute_list"], [7, 1, 1, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_cpu::offset_dim_list"], [7, 1, 1, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_cpu::permute_list"], [7, 1, 1, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_cpu::pooled_embs"], [7, 0, 1, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_gpu"], [7, 1, 1, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_gpu::inv_offset_dim_list"], [7, 1, 1, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_gpu::inv_permute_list"], [7, 1, 1, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_gpu::offset_dim_list"], [7, 1, 1, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_gpu::permute_list"], [7, 1, 1, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_gpu::pooled_embs"], [2, 0, 1, "_CPPv423pruned_array_lookup_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cpu"], [2, 1, 1, "_CPPv423pruned_array_lookup_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cpu::index_remappings"], [2, 1, 1, "_CPPv423pruned_array_lookup_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cpu::index_remappings_offsets"], [2, 1, 1, "_CPPv423pruned_array_lookup_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cpu::indices"], [2, 1, 1, "_CPPv423pruned_array_lookup_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cpu::offsets"], [2, 0, 1, "_CPPv424pruned_array_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cuda"], [2, 1, 1, "_CPPv424pruned_array_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cuda::index_remappings"], [2, 1, 1, "_CPPv424pruned_array_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cuda::index_remappings_offsets"], [2, 1, 1, "_CPPv424pruned_array_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cuda::indices"], [2, 1, 1, "_CPPv424pruned_array_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cuda::offsets"], [2, 0, 1, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_insert_unweighted_cpu"], [2, 1, 1, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_insert_unweighted_cpu::dense_indices"], [2, 1, 1, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_insert_unweighted_cpu::hash_table"], [2, 1, 1, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_insert_unweighted_cpu::hash_table_offsets"], [2, 1, 1, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_insert_unweighted_cpu::indices"], [2, 1, 1, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_insert_unweighted_cpu::offsets"], [2, 0, 1, "_CPPv426pruned_hashmap_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_cuda"], [2, 1, 1, "_CPPv426pruned_hashmap_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_cuda::hash_table"], [2, 1, 1, "_CPPv426pruned_hashmap_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_cuda::hash_table_offsets"], [2, 1, 1, "_CPPv426pruned_hashmap_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_cuda::indices"], [2, 1, 1, "_CPPv426pruned_hashmap_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_cuda::offsets"], [2, 0, 1, "_CPPv436pruned_hashmap_lookup_unweighted_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_unweighted_cpu"], [2, 1, 1, "_CPPv436pruned_hashmap_lookup_unweighted_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_unweighted_cpu::hash_table"], [2, 1, 1, "_CPPv436pruned_hashmap_lookup_unweighted_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_unweighted_cpu::hash_table_offsets"], [2, 1, 1, "_CPPv436pruned_hashmap_lookup_unweighted_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_unweighted_cpu::indices"], [2, 1, 1, "_CPPv436pruned_hashmap_lookup_unweighted_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_unweighted_cpu::offsets"], [5, 0, 1, "_CPPv432recat_embedding_grad_output_cuda6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_cuda"], [5, 1, 1, "_CPPv432recat_embedding_grad_output_cuda6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_cuda::grad_output"], [5, 1, 1, "_CPPv432recat_embedding_grad_output_cuda6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_cuda::num_features_per_rank"], [5, 0, 1, "_CPPv446recat_embedding_grad_output_mixed_D_batch_cudaRK6TensorRK6TensorRK6Tensor", "recat_embedding_grad_output_mixed_D_batch_cuda"], [5, 1, 1, "_CPPv446recat_embedding_grad_output_mixed_D_batch_cudaRK6TensorRK6TensorRK6Tensor", "recat_embedding_grad_output_mixed_D_batch_cuda::cumsum_dim_sum_per_rank"], [5, 1, 1, "_CPPv446recat_embedding_grad_output_mixed_D_batch_cudaRK6TensorRK6TensorRK6Tensor", "recat_embedding_grad_output_mixed_D_batch_cuda::dim_sum_per_rank"], [5, 1, 1, "_CPPv446recat_embedding_grad_output_mixed_D_batch_cudaRK6TensorRK6TensorRK6Tensor", "recat_embedding_grad_output_mixed_D_batch_cuda::grad_output"], [5, 0, 1, "_CPPv439recat_embedding_grad_output_mixed_D_cpuRK6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_mixed_D_cpu"], [5, 1, 1, "_CPPv439recat_embedding_grad_output_mixed_D_cpuRK6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_mixed_D_cpu::dim_sum_per_rank"], [5, 1, 1, "_CPPv439recat_embedding_grad_output_mixed_D_cpuRK6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_mixed_D_cpu::grad_output"], [5, 0, 1, "_CPPv440recat_embedding_grad_output_mixed_D_cudaRK6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_mixed_D_cuda"], [5, 1, 1, "_CPPv440recat_embedding_grad_output_mixed_D_cudaRK6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_mixed_D_cuda::dim_sum_per_rank"], [5, 1, 1, "_CPPv440recat_embedding_grad_output_mixed_D_cudaRK6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_mixed_D_cuda::grad_output"], [0, 0, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::A_SYMMETRIC"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::BIAS_TYPE"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::B_SYMMETRIC"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::DIRECT"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::FUSE_RELU"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::HAS_BIAS"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::Q_GRAN"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::block"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::inp"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::ld_in"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::ld_out"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::out"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::r"], [0, 0, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::A_SYMMETRIC"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::BIAS_TYPE"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::B_SYMMETRIC"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::C_PER_G"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::FUSE_RELU"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::HAS_BIAS"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::Q_GRAN"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::block"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::inp"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::ld_in"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::ld_out"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::out"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::r"], [10, 0, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda"], [10, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::D_offsets"], [10, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::buffer_ids"], [10, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::cache_hash_size_cumsum"], [10, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::dev_weights"], [10, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::logical_table_ids"], [10, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::lxu_cache_state"], [10, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::lxu_cache_weights"], [10, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::momentum1_dev"], [10, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::momentum1_offsets"], [10, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::momentum1_placements"], [10, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::momentum1_uvm"], [10, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::pruned_indices"], [10, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::pruned_indices_offsets"], [10, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::total_cache_hash_size"], [10, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::uvm_weights"], [10, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::weights_offsets"], [10, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::weights_placements"], [3, 0, 1, "_CPPv421tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE", "tbe_input_combine_cpu"], [3, 1, 1, "_CPPv421tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE", "tbe_input_combine_cpu::include_last_offsets"], [3, 1, 1, "_CPPv421tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE", "tbe_input_combine_cpu::indices_list"], [3, 1, 1, "_CPPv421tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE", "tbe_input_combine_cpu::offsets_list"], [3, 1, 1, "_CPPv421tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE", "tbe_input_combine_cpu::per_sample_weights"], [6, 0, 1, "_CPPv419uvm_cuda_mem_adviseRK6Tensor7int64_t", "uvm_cuda_mem_advise"], [6, 1, 1, "_CPPv419uvm_cuda_mem_adviseRK6Tensor7int64_t", "uvm_cuda_mem_advise::cuda_memory_advise"], [6, 1, 1, "_CPPv419uvm_cuda_mem_adviseRK6Tensor7int64_t", "uvm_cuda_mem_advise::self"], [6, 0, 1, "_CPPv427uvm_cuda_mem_prefetch_asyncRK6TensorN3c108optionalI6TensorEE", "uvm_cuda_mem_prefetch_async"], [6, 1, 1, "_CPPv427uvm_cuda_mem_prefetch_asyncRK6TensorN3c108optionalI6TensorEE", "uvm_cuda_mem_prefetch_async::device_t"], [6, 1, 1, "_CPPv427uvm_cuda_mem_prefetch_asyncRK6TensorN3c108optionalI6TensorEE", "uvm_cuda_mem_prefetch_async::self"], [6, 0, 1, "_CPPv424uvm_mem_advice_dont_forkRK6Tensor", "uvm_mem_advice_dont_fork"], [6, 1, 1, "_CPPv424uvm_mem_advice_dont_forkRK6Tensor", "uvm_mem_advice_dont_fork::self"], [6, 0, 1, "_CPPv411uvm_storageRK6Tensor", "uvm_storage"], [6, 1, 1, "_CPPv411uvm_storageRK6Tensor", "uvm_storage::self"], [6, 0, 1, "_CPPv410uvm_to_cpuRK6Tensor", "uvm_to_cpu"], [6, 1, 1, "_CPPv410uvm_to_cpuRK6Tensor", "uvm_to_cpu::self"], [6, 0, 1, "_CPPv416uvm_to_cpu_cloneRK6Tensor", "uvm_to_cpu_clone"], [6, 1, 1, "_CPPv416uvm_to_cpu_cloneRK6Tensor", "uvm_to_cpu_clone::self"], [6, 0, 1, "_CPPv413uvm_to_deviceRK6TensorRK6Tensor", "uvm_to_device"], [6, 1, 1, "_CPPv413uvm_to_deviceRK6TensorRK6Tensor", "uvm_to_device::prototype"], [6, 1, 1, "_CPPv413uvm_to_deviceRK6TensorRK6Tensor", "uvm_to_device::self"], [16, 3, 0, "-", "fbgemm_gpu"]], "fbgemm_gpu.docs.examples": [[22, 4, 1, "", "example_method"]], "fbgemm_gpu.split_table_batched_embeddings_ops": [[16, 4, 1, "", "SplitTableBatchedEmbeddingBagsCodegen"]], "torch.ops.fbgemm": [[15, 4, 1, "", "batched_dense_vec_jagged_2d_mul"], [15, 4, 1, "", "dense_to_jagged"], [15, 4, 1, "", "jagged_1d_to_dense"], [15, 4, 1, "", "jagged_2d_to_dense"], [15, 4, 1, "", "jagged_dense_dense_elementwise_add_jagged_output"], [15, 4, 1, "", "jagged_dense_elementwise_add"], [15, 4, 1, "", "jagged_dense_elementwise_add_jagged_output"], [15, 4, 1, "", "jagged_dense_elementwise_mul"], [15, 4, 1, "", "jagged_to_padded_dense"], [15, 4, 1, "", "stacked_jagged_1d_to_dense"], [15, 4, 1, "", "stacked_jagged_2d_to_dense"]]}, "objtypes": {"0": "cpp:function", "1": "cpp:functionParam", "2": "cpp:templateParam", "3": "py:module", "4": "py:function"}, "objnames": {"0": ["cpp", "function", "C++ function"], "1": ["cpp", "functionParam", "C++ function parameter"], "2": ["cpp", "templateParam", "C++ template parameter"], "3": ["py", "module", "Python module"], "4": ["py", "function", "Python function"]}, "titleterms": {"quantiz": [0, 8], "util": 0, "refer": [0, 23], "implement": 0, "method": 0, "avx": 0, "2": 0, "512": 0, "build": [1, 11, 21], "instruct": [1, 11, 12], "fbgemm": [1, 24], "requir": 1, "hardwar": 1, "softwar": 1, "depend": 1, "asmjit": 1, "cpuinfo": 1, "googletest": 1, "set": [1, 11, 12, 21], "up": [1, 11, 12, 21], "an": [1, 11], "isol": [1, 11], "environ": [1, 11, 12, 21], "instal": [1, 11, 12], "tool": [1, 11], "c": [1, 11, 20, 24], "compil": [1, 11], "other": [1, 11, 23], "librari": [1, 12], "prepar": [1, 11], "linux": 1, "maco": 1, "cmake": 1, "bazel": 1, "window": 1, "embed": [2, 7, 10, 16], "oper": [2, 3, 4, 5, 6, 7, 8, 9, 10, 14, 15, 16], "cuda": [2, 4, 5, 6, 8, 9, 11, 12, 13], "cpu": [2, 4, 5, 8, 9, 11, 12], "combin": [3, 14], "input": 3, "jag": [4, 14, 15], "tensor": [4, 14, 15], "layout": 5, "transform": 5, "memori": 6, "pool": 7, "merg": 7, "permut": 7, "spars": 9, "data": 9, "tabl": [10, 16], "batch": [10, 16], "miniconda": 11, "conda": [11, 12], "onli": [11, 12], "docker": [11, 12], "imag": 11, "cudnn": 11, "rocm": [11, 12, 13], "miopen": 11, "pytorch": [11, 12], "through": [11, 12], "pip": [11, 12], "post": [11, 12], "check": [11, 12], "fbgemm_gpu": [11, 12, 13, 21, 24], "packag": [11, 12], "The": 11, "process": 11, "wheel": 11, "variabl": 11, "For": 11, "develop": [11, 24], "undefin": [11, 12], "symbol": [11, 12], "glibc": 11, "version": 11, "compat": 11, "nvidia": 12, "driver": 12, "contain": 12, "runtim": 12, "amdgpu": 12, "python": [12, 22, 24], "public": 12, "pypi": 12, "test": 13, "variant": 13, "benchmark": 13, "high": 14, "level": 14, "overview": [14, 24], "format": 14, "valu": 14, "offset": 14, "max": 14, "length": 14, "exampl": 14, "arithmet": 14, "convers": 14, "dens": 14, "tbe": 16, "contact": 17, "u": 17, "github": 17, "slack": 17, "contribut": 18, "code": [18, 20, 22, 23], "conduct": 18, "pull": 18, "request": 18, "contributor": 18, "licens": [18, 19], "agreement": 18, "cla": 18, "issu": 18, "ad": [20, 22, 23], "document": [20, 21, 22, 23, 24], "gener": [21, 22, 24], "guidelin": 21, "specif": 21, "guid": 21, "toolchain": 21, "lint": 21, "deploy": 21, "preview": 21, "todo": 22, "auto": 22, "sphinx": 23, "pointer": 23, "section": 23, "referenc": 23, "sourc": 23, "latex": 23, "graph": 23, "homepag": 24, "info": 24, "api": 24}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx.ext.todo": 2, "sphinx.ext.viewcode": 1, "sphinx": 57}, "alltitles": {"Testing FBGEMM_GPU": [[13, "testing-fbgemm-gpu"]], "FBGEMM_GPU Tests": [[13, "fbgemm-gpu-tests"]], "Testing with the CUDA Variant": [[13, "testing-with-the-cuda-variant"]], "Testing with the ROCm Variant": [[13, "testing-with-the-rocm-variant"]], "FBGEMM_GPU Benchmarks": [[13, "fbgemm-gpu-benchmarks"]], "Installation Instructions": [[12, "installation-instructions"]], "Set Up CPU-Only Environment": [[12, "set-up-cpu-only-environment"]], "Set Up CUDA Environment": [[12, "set-up-cuda-environment"]], "Install NVIDIA Drivers": [[12, "install-nvidia-drivers"]], "Set Up the CUDA Docker Container and Conda Environment": [[12, "set-up-the-cuda-docker-container-and-conda-environment"]], "Install the CUDA Runtime": [[12, "install-the-cuda-runtime"]], "Set Up ROCm Environment": [[12, "set-up-rocm-environment"]], "Install AMDGPU Drivers": [[12, "install-amdgpu-drivers"]], "Set Up the ROCm Docker Container and Conda Environment": [[12, "set-up-the-rocm-docker-container-and-conda-environment"]], "Install Python Libraries": [[12, "install-python-libraries"]], "Install PyTorch": [[12, "install-pytorch"], [11, "install-pytorch"]], "Install the FBGEMM_GPU Package": [[12, "install-the-fbgemm-gpu-package"]], "Install through PyTorch PIP": [[12, "install-through-pytorch-pip"]], "Install through Public PyPI": [[12, "install-through-public-pypi"]], "Post-Installation Checks": [[12, "post-installation-checks"]], "Undefined Symbols": [[12, "undefined-symbols"]], "Jagged Tensor Operators": [[14, "jagged-tensor-operators"], [15, "jagged-tensor-operators"], [4, "jagged-tensor-operators"]], "High Level Overview": [[14, "high-level-overview"]], "Jagged Tensor Format": [[14, "jagged-tensor-format"]], "Values": [[14, "values"]], "Offsets": [[14, "offsets"]], "Max Lengths": [[14, "max-lengths"]], "Jagged Tensor Example": [[14, "jagged-tensor-example"]], "Jagged Tensor Operations": [[14, "jagged-tensor-operations"]], "Arithmetic Operations": [[14, "arithmetic-operations"]], "Conversion Operations": [[14, "conversion-operations"]], "Jagged to Dense": [[14, "jagged-to-dense"]], "Dense to Jagged": [[14, "dense-to-jagged"]], "Combined Arithmetic + Conversion Operations": [[14, "combined-arithmetic-conversion-operations"]], "Sphinx Documentation Pointers": [[23, "sphinx-documentation-pointers"]], "References Other Sections of the Documentation": [[23, "references-other-sections-of-the-documentation"]], "Referencing the Source Code": [[23, "referencing-the-source-code"]], "Adding LaTeX": [[23, "adding-latex"]], "Adding Graphs": [[23, "adding-graphs"]], "Adding Documentation to Python Code": [[22, "adding-documentation-to-python-code"]], "Todo": [[22, "id1"]], "Adding Documentation to Auto-Generated Python Code": [[22, "adding-documentation-to-auto-generated-python-code"]], "Documentation": [[21, "documentation"]], "General Documentation Guidelines": [[21, "general-documentation-guidelines"]], "Specific Documentation Guides": [[21, "specific-documentation-guides"]], "Building the Documentation": [[21, "building-the-documentation"]], "Set Up Build Environment": [[21, "set-up-build-environment"]], "Build FBGEMM_GPU": [[21, "build-fbgemm-gpu"]], "Set Up the Documentation Toolchain": [[21, "set-up-the-documentation-toolchain"]], "Build the Documentation": [[21, "build-the-documentation"]], "Linting the Documentation": [[21, "linting-the-documentation"]], "Deployment Preview": [[21, "deployment-preview"]], "Contact Us": [[17, "contact-us"]], "GitHub": [[17, "github"]], "Slack": [[17, "slack"]], "Table Batched Embedding (TBE) Operators": [[16, "module-fbgemm_gpu"]], "FBGEMM and FBGEMM_GPU Documentation Homepage": [[24, "fbgemm-and-fbgemm-gpu-documentation-homepage"]], "General Info": [[24, null]], "FBGEMM Development": [[24, null]], "FBGEMM_GPU Development": [[24, null]], "FBGEMM_GPU Overview": [[24, null]], "FBGEMM C++ API": [[24, null]], "FBGEMM_GPU C++ API": [[24, null]], "FBGEMM_GPU Python API": [[24, null]], "Layout Transformation Operators": [[5, "layout-transformation-operators"]], "CUDA Operators": [[5, "cuda-operators"], [4, "cuda-operators"], [2, "cuda-operators"], [9, "cuda-operators"], [8, "cuda-operators"]], "CPU Operators": [[5, "cpu-operators"], [4, "cpu-operators"], [2, "cpu-operators"], [9, "cpu-operators"], [8, "cpu-operators"]], "Combine Input Operators": [[3, "combine-input-operators"]], "Contributing": [[18, "contributing"]], "Code of Conduct": [[18, "code-of-conduct"]], "Pull Requests": [[18, "pull-requests"]], "Contributor License Agreement (\u201cCLA\u201d)": [[18, "contributor-license-agreement-cla"]], "Issues": [[18, "issues"]], "License": [[18, "license"], [19, "license"]], "Adding Documentation to C++ Code": [[20, "adding-documentation-to-c-code"]], "Embedding Operators": [[2, "embedding-operators"]], "Quantization Utilities": [[0, "quantization-utilities"]], "Reference Implementation Methods": [[0, "reference-implementation-methods"]], "AVX-2 Implementation Methods": [[0, "avx-2-implementation-methods"]], "AVX-512 Implementation Methods": [[0, "avx-512-implementation-methods"]], "Build Instructions": [[1, "build-instructions"], [11, "build-instructions"]], "FBGEMM Requirements": [[1, "fbgemm-requirements"]], "Hardware Requirements": [[1, "hardware-requirements"]], "Software Dependencies": [[1, "software-dependencies"]], "asmjit": [[1, "asmjit"]], "cpuinfo": [[1, "cpuinfo"]], "GoogleTest": [[1, "googletest"]], "Set Up an Isolated Build Environment": [[1, "set-up-an-isolated-build-environment"], [11, "set-up-an-isolated-build-environment"]], "Install the Build Tools": [[1, "install-the-build-tools"], [11, "install-the-build-tools"]], "C/C++ Compiler": [[1, "c-c-compiler"], [11, "c-c-compiler"]], "Other Build Tools": [[1, "other-build-tools"], [11, "other-build-tools"]], "Build the FBGEMM Library": [[1, "build-the-fbgemm-library"]], "Preparing the Build": [[1, "preparing-the-build"], [11, "preparing-the-build"]], "Building on Linux and macOS (CMake)": [[1, "building-on-linux-and-macos-cmake"]], "Building on Linux (Bazel)": [[1, "building-on-linux-bazel"]], "Building on Windows": [[1, "building-on-windows"]], "Install Miniconda": [[11, "install-miniconda"]], "Set Up the Conda Environment": [[11, "set-up-the-conda-environment"]], "Set Up for CPU-Only Build": [[11, "set-up-for-cpu-only-build"]], "Set Up for CUDA Build": [[11, "set-up-for-cuda-build"]], "CUDA Docker Image": [[11, "cuda-docker-image"]], "Install CUDA": [[11, "install-cuda"]], "Install cuDNN": [[11, "install-cudnn"]], "Set Up for ROCm Build": [[11, "set-up-for-rocm-build"]], "ROCm Docker Image": [[11, "rocm-docker-image"]], "Install ROCm": [[11, "install-rocm"]], "Install MIOpen": [[11, "install-miopen"]], "Installation Through Conda": [[11, "installation-through-conda"]], "Installation Through PyTorch PIP": [[11, "installation-through-pytorch-pip"]], "Post-Install Checks": [[11, "post-install-checks"]], "Build the FBGEMM_GPU Package": [[11, "build-the-fbgemm-gpu-package"]], "The Build Process": [[11, "the-build-process"]], "Set Wheel Build Variables": [[11, "set-wheel-build-variables"]], "CPU-Only Build": [[11, "cpu-only-build"]], "CUDA Build": [[11, "cuda-build"]], "ROCm Build": [[11, "rocm-build"]], "Post-Build Checks (For Developers)": [[11, "post-build-checks-for-developers"]], "Undefined Symbols Check": [[11, "undefined-symbols-check"]], "GLIBC Version Compatibility Check": [[11, "glibc-version-compatibility-check"]], "Sparse Data Operators": [[9, "sparse-data-operators"]], "Table Batched Embedding Operators": [[10, "table-batched-embedding-operators"]], "Quantization Operators": [[8, "quantization-operators"]], "CUDA Memory Operators": [[6, "cuda-memory-operators"]], "Pooled Embeddings Operators": [[7, "pooled-embeddings-operators"]], "Merge Operators": [[7, "merge-operators"]], "Permutation Operators": [[7, "permutation-operators"]]}, "indexentries": {"findminmax (c++ function)": [[0, "_CPPv410FindMinMaxPKfPfPf7int64_t"]], "floatorhalftofusednbitrowwisequantizedsbhalf (c++ function)": [[0, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE"]], "fusedquantizedequantize (c++ function)": [[0, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif"]], "quantizegroupwise (c++ function)": [[0, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T"]], "xor128 (c++ function)": [[0, "_CPPv46Xor128v"]], "requantizeoutputprocessingavx2 (c++ function)": [[0, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE"]], "requantizeoutputprocessinggconvavx512 (c++ function)": [[0, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE"]], "bounds_check_indices_cuda (c++ function)": [[2, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKN3c108optionalI6TensorEERKN3c108optionalI6TensorEEK7int64_t"]], "int_nbit_split_embedding_codegen_lookup_function (c++ function)": [[2, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE"]], "int_nbit_split_embedding_codegen_lookup_function_cpu (c++ function)": [[2, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE"]], "int_nbit_split_embedding_uvm_caching_codegen_lookup_function (c++ function)": [[2, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE"]], "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu (c++ function)": [[2, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE"]], "pruned_array_lookup_cpu (c++ function)": [[2, "_CPPv423pruned_array_lookup_cpu6Tensor6Tensor6Tensor6Tensor"]], "pruned_array_lookup_cuda (c++ function)": [[2, "_CPPv424pruned_array_lookup_cuda6Tensor6Tensor6Tensor6Tensor"]], "pruned_hashmap_insert_unweighted_cpu (c++ function)": [[2, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor"]], "pruned_hashmap_lookup_cuda (c++ function)": [[2, "_CPPv426pruned_hashmap_lookup_cuda6Tensor6Tensor6Tensor6Tensor"]], "pruned_hashmap_lookup_unweighted_cpu (c++ function)": [[2, "_CPPv436pruned_hashmap_lookup_unweighted_cpu6Tensor6Tensor6Tensor6Tensor"]], "padding_fused_tbe_input_combine_cpu (c++ function)": [[3, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t"]], "tbe_input_combine_cpu (c++ function)": [[3, "_CPPv421tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE"]], "batched_dense_vec_jagged_2d_mul (c++ function)": [[4, "_CPPv431batched_dense_vec_jagged_2d_mulRK6TensorRK6TensorRK6Tensor"]], "dense_to_jagged (c++ function)": [[4, "_CPPv415dense_to_jaggedRK6TensorRKNSt6vectorI6TensorEEN3c108optionalIN2at6SymIntEEE"]], "jagged_1d_to_dense (c++ function)": [[4, "_CPPv418jagged_1d_to_dense6Tensor6TensorN3c106SymIntE7int64_t"]], "jagged_2d_to_dense (c++ function)": [[4, "_CPPv418jagged_2d_to_dense6Tensor6TensorN3c106SymIntE"]], "jagged_dense_elementwise_add (c++ function)": [[4, "_CPPv428jagged_dense_elementwise_addRK6TensorRKNSt6vectorI6TensorEERK6Tensor"]], "jagged_dense_elementwise_add_jagged_output (c++ function)": [[4, "_CPPv442jagged_dense_elementwise_add_jagged_outputRK6TensorRKNSt6vectorI6TensorEERK6Tensor"]], "jagged_dense_elementwise_add_jagged_output_cuda (c++ function)": [[4, "_CPPv447jagged_dense_elementwise_add_jagged_output_cudaRK6TensorRKNSt6vectorI6TensorEERK6Tensor"]], "jagged_dense_elementwise_mul (c++ function)": [[4, "_CPPv428jagged_dense_elementwise_mulRK6TensorRKNSt6vectorI6TensorEERK6Tensor"]], "jagged_to_padded_dense (c++ function)": [[4, "_CPPv422jagged_to_padded_denseRK6TensorRKNSt6vectorI6TensorEEKN3c1014SymIntArrayRefEKd"]], "jagged_to_padded_dense_forward (c++ function)": [[4, "_CPPv430jagged_to_padded_dense_forwardRK6TensorRKNSt6vectorI6TensorEEN3c1014SymIntArrayRefEKd"]], "recat_embedding_grad_output_cuda (c++ function)": [[5, "_CPPv432recat_embedding_grad_output_cuda6TensorRKNSt6vectorI7int64_tEE"]], "recat_embedding_grad_output_mixed_d_batch_cuda (c++ function)": [[5, "_CPPv446recat_embedding_grad_output_mixed_D_batch_cudaRK6TensorRK6TensorRK6Tensor"]], "recat_embedding_grad_output_mixed_d_cpu (c++ function)": [[5, "_CPPv439recat_embedding_grad_output_mixed_D_cpuRK6TensorRKNSt6vectorI7int64_tEE"]], "recat_embedding_grad_output_mixed_d_cuda (c++ function)": [[5, "_CPPv440recat_embedding_grad_output_mixed_D_cudaRK6TensorRKNSt6vectorI7int64_tEE"]], "is_uvm_tensor (c++ function)": [[6, "_CPPv413is_uvm_tensorRK6Tensor"]], "new_host_mapped_tensor (c++ function)": [[6, "_CPPv422new_host_mapped_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE"]], "new_managed_tensor (c++ function)": [[6, "_CPPv418new_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE"]], "new_managed_tensor_meta (c++ function)": [[6, "_CPPv423new_managed_tensor_metaRK6TensorRKNSt6vectorINSt7int64_tEEE"]], "new_unified_tensor (c++ function)": [[6, "_CPPv418new_unified_tensorRK6TensorRKNSt6vectorINSt7int64_tEEEb"]], "new_vanilla_managed_tensor (c++ function)": [[6, "_CPPv426new_vanilla_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE"]], "uvm_cuda_mem_advise (c++ function)": [[6, "_CPPv419uvm_cuda_mem_adviseRK6Tensor7int64_t"]], "uvm_cuda_mem_prefetch_async (c++ function)": [[6, "_CPPv427uvm_cuda_mem_prefetch_asyncRK6TensorN3c108optionalI6TensorEE"]], "uvm_mem_advice_dont_fork (c++ function)": [[6, "_CPPv424uvm_mem_advice_dont_forkRK6Tensor"]], "uvm_storage (c++ function)": [[6, "_CPPv411uvm_storageRK6Tensor"]], "uvm_to_cpu (c++ function)": [[6, "_CPPv410uvm_to_cpuRK6Tensor"]], "uvm_to_cpu_clone (c++ function)": [[6, "_CPPv416uvm_to_cpu_cloneRK6Tensor"]], "uvm_to_device (c++ function)": [[6, "_CPPv413uvm_to_deviceRK6TensorRK6Tensor"]], "all_to_one_device (c++ function)": [[7, "_CPPv417all_to_one_deviceNSt6vectorIN2at6TensorEEEN2at6DeviceE"]], "permute_pooled_embs_auto_grad (c++ function)": [[7, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor"]], "permute_pooled_embs_auto_grad_cpu (c++ function)": [[7, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor"]], "permute_pooled_embs_auto_grad_gpu (c++ function)": [[7, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor"]], "permute_pooled_embs_auto_grad_split_cpu (c++ function)": [[7, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE"]], "permute_pooled_embs_auto_grad_split_gpu (c++ function)": [[7, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE"]], "permute_pooled_embs_cpu_impl (c++ function)": [[7, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb"]], "permute_pooled_embs_split_cpu (c++ function)": [[7, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE"]], "permute_pooled_embs_split_gpu (c++ function)": [[7, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE"]], "fp8quantizedtofloat_ref (c++ function)": [[8, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi"]], "fp8rowwise_to_float_cpu (c++ function)": [[8, "_CPPv423FP8rowwise_to_float_cpuRK6TensorbK7int64_t"]], "floattofp8quantized_ref (c++ function)": [[8, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd"]], "_fp8rowwise_to_float_gpu (c++ function)": [[8, "_CPPv424_FP8rowwise_to_float_gpuRKN2at6TensorEbK7int64_t"]], "_bfloat16_to_float_gpu (c++ function)": [[8, "_CPPv422_bfloat16_to_float_gpuRKN2at6TensorE"]], "_float_to_fp8rowwise_gpu (c++ function)": [[8, "_CPPv424_float_to_FP8rowwise_gpuRK6TensorKb"]], "_float_to_bfloat16_gpu (c++ function)": [[8, "_CPPv422_float_to_bfloat16_gpuRKN2at6TensorE"]], "_float_to_fused8bitrowwise_cpu_out (c++ function)": [[8, "_CPPv434_float_to_fused8bitrowwise_cpu_outR6TensorRK6Tensor"]], "_float_to_fused8bitrowwise_gpu (c++ function)": [[8, "_CPPv430_float_to_fused8bitrowwise_gpuRK6Tensor"]], "_float_to_fusednbitrowwise_gpu (c++ function)": [[8, "_CPPv430_float_to_fusednbitrowwise_gpuRK6TensorK7int64_t"]], "_float_to_hfp8_gpu (c++ function)": [[8, "_CPPv418_float_to_hfp8_gpuRKN2at6TensorEK7int64_tK7int64_tKd"]], "_float_to_msfp_gpu (c++ function)": [[8, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd"]], "_float_to_paddedfp8rowwise_gpu (c++ function)": [[8, "_CPPv430_float_to_paddedFP8rowwise_gpuRK6TensorKbK7int64_t"]], "_fused8bitrowwise_to_float_cpu_out (c++ function)": [[8, "_CPPv434_fused8bitrowwise_to_float_cpu_outR6TensorRK6Tensor"]], "_fused8bitrowwise_to_float_gpu (c++ function)": [[8, "_CPPv430_fused8bitrowwise_to_float_gpuRKN2at6TensorE"]], "_fused8bitrowwise_to_float_mixed_dim_gpu (c++ function)": [[8, "_CPPv440_fused8bitrowwise_to_float_mixed_dim_gpuRKN2at6TensorERKN2at6TensorEK7int64_t"]], "_fused8bitrowwise_to_half_gpu (c++ function)": [[8, "_CPPv429_fused8bitrowwise_to_half_gpuRKN2at6TensorE"]], "_fused8bitrowwise_to_single_or_half_precision_gpu (c++ function)": [[8, "_CPPv449_fused8bitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tKbKb"]], "_fusednbitrowwise_to_float_gpu (c++ function)": [[8, "_CPPv430_fusednbitrowwise_to_float_gpuRKN2at6TensorEK7int64_t"]], "_fusednbitrowwise_to_half_gpu (c++ function)": [[8, "_CPPv429_fusednbitrowwise_to_half_gpuRKN2at6TensorEK7int64_t"]], "_fusednbitrowwise_to_single_or_half_precision_gpu (c++ function)": [[8, "_CPPv449_fusednbitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tK7int64_t"]], "_half_to_fused8bitrowwise_gpu (c++ function)": [[8, "_CPPv429_half_to_fused8bitrowwise_gpuRK6Tensor"]], "_half_to_fusednbitrowwise_gpu (c++ function)": [[8, "_CPPv429_half_to_fusednbitrowwise_gpuRKN2at6TensorEK7int64_t"]], "_hfp8_to_float_gpu (c++ function)": [[8, "_CPPv418_hfp8_to_float_gpuRKN2at6TensorEK7int64_tK7int64_t"]], "_msfp_to_float_gpu (c++ function)": [[8, "_CPPv418_msfp_to_float_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_t"]], "_paddedfp8rowwise_to_float_gpu (c++ function)": [[8, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t"]], "_single_or_half_precision_to_fused8bitrowwise_gpu (c++ function)": [[8, "_CPPv449_single_or_half_precision_to_fused8bitrowwise_gpuRK6Tensor"]], "_single_or_half_precision_to_fusednbitrowwise_gpu (c++ function)": [[8, "_CPPv449_single_or_half_precision_to_fusednbitrowwise_gpuRK6TensorK7int64_t"]], "float_or_half_to_fused8bitrowwise_cpu (c++ function)": [[8, "_CPPv437float_or_half_to_fused8bitrowwise_cpuRK6Tensor"]], "float_to_fp8rowwise_cpu (c++ function)": [[8, "_CPPv423float_to_FP8rowwise_cpuRK6Tensorb"]], "float_to_fused8bitrowwise_cpu (c++ function)": [[8, "_CPPv429float_to_fused8bitrowwise_cpuRK6Tensor"]], "fused8bitrowwise_to_float_cpu (c++ function)": [[8, "_CPPv429fused8bitrowwise_to_float_cpuRK6Tensor"]], "fused8bitrowwise_to_float_or_half_cpu (c++ function)": [[8, "_CPPv437fused8bitrowwise_to_float_or_half_cpuRK6TensorK7int64_tKbKb"]], "fused8bitrowwise_to_half_cpu (c++ function)": [[8, "_CPPv428fused8bitrowwise_to_half_cpuRK6Tensor"]], "fusednbitrowwise_to_float_cpu (c++ function)": [[8, "_CPPv429fusednbitrowwise_to_float_cpuRK6TensorK7int64_t"]], "fusednbitrowwise_to_float_or_half_cpu (c++ function)": [[8, "_CPPv437fusednbitrowwise_to_float_or_half_cpuRK6TensorK7int64_tK7int64_t"]], "fusednbitrowwise_to_half_cpu (c++ function)": [[8, "_CPPv428fusednbitrowwise_to_half_cpuRK6TensorK7int64_t"]], "half_to_fused8bitrowwise_cpu (c++ function)": [[8, "_CPPv428half_to_fused8bitrowwise_cpuRK6Tensor"]], "expand_into_jagged_permute_cuda (c++ function)": [[9, "_CPPv431expand_into_jagged_permute_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_t"]], "generic_histogram_binning_calibration_by_feature_cpu (c++ function)": [[9, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td"]], "histogram_binning_calibration_cpu (c++ function)": [[9, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td"]], "direct_mapped_lru_cache_populate_byte_cuda (c++ function)": [[10, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE"]], "direct_mapped_lxu_cache_lookup_cuda (c++ function)": [[10, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE"]], "get_unique_indices_cuda (c++ function)": [[10, "_CPPv423get_unique_indices_cudaN2at6TensorE7int64_tb"]], "host_lxu_cache_slot (c++ function)": [[10, "_CPPv419host_lxu_cache_slot7int64_t7int64_t"]], "lfu_cache_populate_byte_cuda (c++ function)": [[10, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t"]], "lfu_cache_populate_cuda (c++ function)": [[10, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb"]], "linearize_cache_indices_cuda (c++ function)": [[10, "_CPPv428linearize_cache_indices_cudaN2at6TensorEN2at6TensorEN2at6TensorE"]], "linearize_cache_indices_from_row_idx_cuda (c++ function)": [[10, "_CPPv441linearize_cache_indices_from_row_idx_cudaN2at6TensorEN2at6TensorEN2at6TensorE"]], "lru_cache_find_uncached_cuda (c++ function)": [[10, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE"]], "lru_cache_populate_byte_cuda (c++ function)": [[10, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE"]], "lru_cache_populate_cuda (c++ function)": [[10, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE"]], "lxu_cache_flush_cuda (c++ function)": [[10, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb"]], "lxu_cache_locations_update_cuda (c++ function)": [[10, "_CPPv431lxu_cache_locations_update_cudaN2at6TensorEN2at6TensorEN3c108optionalIN2at6TensorEEE"]], "lxu_cache_locking_counter_decrement_cuda (c++ function)": [[10, "_CPPv440lxu_cache_locking_counter_decrement_cudaN2at6TensorEN2at6TensorE"]], "lxu_cache_lookup_cuda (c++ function)": [[10, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEE"]], "reset_weight_momentum_cuda (c++ function)": [[10, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t"]], "batched_dense_vec_jagged_2d_mul() (in module torch.ops.fbgemm)": [[15, "torch.ops.fbgemm.batched_dense_vec_jagged_2d_mul"]], "dense_to_jagged() (in module torch.ops.fbgemm)": [[15, "torch.ops.fbgemm.dense_to_jagged"]], "jagged_1d_to_dense() (in module torch.ops.fbgemm)": [[15, "torch.ops.fbgemm.jagged_1d_to_dense"]], "jagged_2d_to_dense() (in module torch.ops.fbgemm)": [[15, "torch.ops.fbgemm.jagged_2d_to_dense"]], "jagged_dense_dense_elementwise_add_jagged_output() (in module torch.ops.fbgemm)": [[15, "torch.ops.fbgemm.jagged_dense_dense_elementwise_add_jagged_output"]], "jagged_dense_elementwise_add() (in module torch.ops.fbgemm)": [[15, "torch.ops.fbgemm.jagged_dense_elementwise_add"]], "jagged_dense_elementwise_add_jagged_output() (in module torch.ops.fbgemm)": [[15, "torch.ops.fbgemm.jagged_dense_elementwise_add_jagged_output"]], "jagged_dense_elementwise_mul() (in module torch.ops.fbgemm)": [[15, "torch.ops.fbgemm.jagged_dense_elementwise_mul"]], "jagged_to_padded_dense() (in module torch.ops.fbgemm)": [[15, "torch.ops.fbgemm.jagged_to_padded_dense"]], "stacked_jagged_1d_to_dense() (in module torch.ops.fbgemm)": [[15, "torch.ops.fbgemm.stacked_jagged_1d_to_dense"]], "stacked_jagged_2d_to_dense() (in module torch.ops.fbgemm)": [[15, "torch.ops.fbgemm.stacked_jagged_2d_to_dense"]], "splittablebatchedembeddingbagscodegen() (in module fbgemm_gpu.split_table_batched_embeddings_ops)": [[16, "fbgemm_gpu.split_table_batched_embeddings_ops.SplitTableBatchedEmbeddingBagsCodegen"]], "fbgemm_gpu": [[16, "module-fbgemm_gpu"]], "module": [[16, "module-fbgemm_gpu"]], "example_method (c++ function)": [[20, "_CPPv4I0_NSt6size_tEE14example_method7int32_t1Tf"]], "example_method() (in module fbgemm_gpu.docs.examples)": [[22, "fbgemm_gpu.docs.examples.example_method"]]}}) \ No newline at end of file