From 16539f1f6dbc615f9578d21fdbbd2bf18f622063 Mon Sep 17 00:00:00 2001 From: facebook-github-bot Date: Mon, 6 May 2024 00:12:09 +0000 Subject: [PATCH] =?UTF-8?q?Deploying=20to=20gh-pages=20from=20@=20pytorch/?= =?UTF-8?q?FBGEMM@bc4e9c335956280c2cad926418fe75b85a9dedf6=20=F0=9F=9A=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- fbgemm_gpu-python-api/table_batched_embedding_ops.html | 2 +- searchindex.js | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fbgemm_gpu-python-api/table_batched_embedding_ops.html b/fbgemm_gpu-python-api/table_batched_embedding_ops.html index 72595f79a..8624db7e3 100644 --- a/fbgemm_gpu-python-api/table_batched_embedding_ops.html +++ b/fbgemm_gpu-python-api/table_batched_embedding_ops.html @@ -410,7 +410,7 @@
  • beta1 (float, optional) – The beta1 value used by LAMB and ADAM

  • beta2 (float, optional) – The beta2 value used by LAMB and ADAM

  • pooling_mode (PoolingMode, optional) – Pooling mode (PoolingMode.SUM, PoolingMode.MEAN, PoolingMode.NONE)

  • -
  • device (torch.device, optional) – The current device to place tensors on

  • +
  • device (torch.device, optional) – The current device to place tensors on

  • bounds_check_mode (BoundsCheckMode, optional) – If not set to BoundsCheckMode.NONE, apply boundary check for indices (BoundsCheckMode.NONE, BoundsCheckMode.FATAL, BoundsCheckMode.WARNING, BoundsCheckMode.IGNORE)

  • diff --git a/searchindex.js b/searchindex.js index 33a6271c6..072cb18e5 100644 --- a/searchindex.js +++ b/searchindex.js @@ -1 +1 @@ -Search.setIndex({"docnames": ["fbgemm-cpp-api/QuantUtils", "fbgemm-development/BuildInstructions", "fbgemm_gpu-cpp-api/embedding_ops", "fbgemm_gpu-cpp-api/experimental_ops", "fbgemm_gpu-cpp-api/input_combine", "fbgemm_gpu-cpp-api/jagged_tensor_ops", "fbgemm_gpu-cpp-api/layout_transform_ops", "fbgemm_gpu-cpp-api/memory_utils", "fbgemm_gpu-cpp-api/merge_pooled_embeddings", "fbgemm_gpu-cpp-api/quantize_ops", "fbgemm_gpu-cpp-api/sparse_ops", "fbgemm_gpu-cpp-api/split_table_batched_embeddings", "fbgemm_gpu-development/BuildInstructions", "fbgemm_gpu-development/InstallationInstructions", "fbgemm_gpu-development/TestInstructions", "fbgemm_gpu-overview/jagged-tensor-ops/JaggedTensorOps", "fbgemm_gpu-python-api/jagged_tensor_ops", "fbgemm_gpu-python-api/table_batched_embedding_ops", "general/ContactUs", "general/Contributing", "general/License", "general/documentation/Cpp", "general/documentation/Overview", "general/documentation/Python", "general/documentation/Sphinx", "index"], "filenames": ["fbgemm-cpp-api/QuantUtils.rst", "fbgemm-development/BuildInstructions.rst", "fbgemm_gpu-cpp-api/embedding_ops.rst", "fbgemm_gpu-cpp-api/experimental_ops.rst", "fbgemm_gpu-cpp-api/input_combine.rst", "fbgemm_gpu-cpp-api/jagged_tensor_ops.rst", "fbgemm_gpu-cpp-api/layout_transform_ops.rst", "fbgemm_gpu-cpp-api/memory_utils.rst", "fbgemm_gpu-cpp-api/merge_pooled_embeddings.rst", "fbgemm_gpu-cpp-api/quantize_ops.rst", "fbgemm_gpu-cpp-api/sparse_ops.rst", "fbgemm_gpu-cpp-api/split_table_batched_embeddings.rst", "fbgemm_gpu-development/BuildInstructions.rst", "fbgemm_gpu-development/InstallationInstructions.rst", "fbgemm_gpu-development/TestInstructions.rst", "fbgemm_gpu-overview/jagged-tensor-ops/JaggedTensorOps.rst", "fbgemm_gpu-python-api/jagged_tensor_ops.rst", "fbgemm_gpu-python-api/table_batched_embedding_ops.rst", "general/ContactUs.rst", "general/Contributing.rst", "general/License.rst", "general/documentation/Cpp.rst", "general/documentation/Overview.rst", "general/documentation/Python.rst", "general/documentation/Sphinx.rst", "index.rst"], "titles": ["Quantization Utilities", "Build Instructions", "Embedding Operators", "Experimental Operators", "Combine Input Operators", "Jagged Tensor Operators", "Layout Transformation Operators", "CUDA Memory Operators", "Pooled Embeddings Operators", "Quantization Operators", "Sparse Data Operators", "Table Batched Embedding Operators", "Build Instructions", "Installation Instructions", "Test Instructions", "Jagged Tensor Operators", "Jagged Tensor Operators", "Table Batched Embedding (TBE) Operators", "Contact Us", "Contributing", "License", "Adding Documentation to C++ Code", "Documentation", "Adding Documentation to Python Code", "Sphinx Documentation Pointers", "FBGEMM and FBGEMM_GPU Documentation Homepage"], "terms": {"templat": [0, 12, 21], "typenam": [0, 21], "t": [0, 1, 3, 7, 10, 12, 17, 19, 21, 22], "layout_t": 0, "layout": [0, 25], "kcx": 0, "void": [0, 2, 7, 9, 11], "quantizegroupwis": 0, "const": [0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 23], "float": [0, 9, 16, 17, 21, 23], "src": 0, "int": [0, 9, 16, 17, 21, 23], "k": [0, 3], "c": [0, 11, 13, 15, 20, 22, 23, 24], "x": [0, 5, 15, 21, 23], "g": [0, 1, 10, 12, 21, 23], "scale": [0, 3], "std": [0, 3, 4, 5, 6, 7, 8, 10, 11, 12, 21, 23], "int32_t": [0, 21, 23], "zero_point": 0, "dst": 0, "point": [0, 9, 16, 21, 23], "data": [0, 7, 15, 17, 20, 25], "type": [0, 1, 9, 13, 15, 16, 17, 21], "paramet": [0, 3, 7, 9, 10, 16, 17, 21, 22, 23], "output": [0, 3, 5, 9, 10, 16, 17, 21, 23], "int8_t": 0, "uint8_t": [0, 9, 11], "ar": [0, 1, 5, 11, 12, 13, 15, 16, 17, 20, 21, 22, 23], "support": [0, 1, 3, 12, 13, 15, 23, 25], "input": [0, 3, 5, 7, 9, 10, 15, 16, 17, 21, 25], "tensor": [0, 2, 3, 4, 6, 7, 8, 9, 10, 11, 17, 22, 23, 25], "kxc": 0, "correspond": [0, 10, 11, 15, 21, 23], "kcr": 0, "kctr": 0, "weight": [0, 2, 10, 11, 17], "time": [0, 1, 12, 13, 15], "dimens": [0, 3, 5, 7, 10, 15, 16, 17, 23], "krsc": 0, "ktrsc": 0, "channel": [0, 12, 13, 18], "number": [0, 1, 3, 10, 12, 15, 16, 17, 22], "r": [0, 14, 22], "": [0, 1, 7, 12, 14, 15, 19, 21, 22, 23], "group": [0, 3, 15, 21], "function": [0, 1, 12, 21, 23], "perform": [0, 1, 9, 10, 15, 25], "channelwis": 0, "1": [0, 1, 3, 10, 11, 12, 13, 14, 15, 16, 17, 22, 23, 24], "groupwis": 0, "per": [0, 15], "size": [0, 1, 3, 7, 9, 10, 15, 16, 17], "should": [0, 10, 11, 12, 13, 15, 19, 21, 22, 23], "equal": [0, 15, 23], "zero": [0, 16, 23], "reprsent": 0, "fusedquantizedequant": 0, "int64_t": [0, 2, 3, 4, 5, 6, 7, 9, 10, 11], "len": [0, 15], "tensorquantizationparam": 0, "qparam": 0, "thread_id": 0, "0": [0, 1, 9, 10, 11, 12, 13, 15, 16, 17, 23], "num_thread": 0, "noise_ratio": 0, "0f": 0, "fuse": [0, 9, 17], "integ": [0, 7, 9, 15], "dequant": 0, "kernel": [0, 1, 7, 14, 25], "acceler": 0, "awar": 0, "train": [0, 17, 25], "fp32": [0, 9, 17], "valu": [0, 5, 7, 9, 10, 11, 16, 17, 21, 22, 23], "u": [0, 12, 24, 25], "int8": [0, 17], "us": [0, 1, 3, 7, 10, 12, 14, 15, 17, 18, 19, 20, 21, 22, 23, 24, 25], "provid": [0, 1, 12, 13, 14, 20, 21, 22, 23, 25], "back": [0, 7, 11, 12, 13], "inputtyp": 0, "floatorhalftofusednbitrowwisequantizedsbhalf": 0, "bit_rat": [0, 9], "size_t": [0, 9, 21], "input_row": 0, "input_column": 0, "convert": [0, 7, 9, 15, 16, 23], "fp16": [0, 9, 17], "rowwis": [0, 9, 17], "bitrat": 0, "specifi": [0, 1, 9, 10, 12, 16, 17], "bit": [0, 9], "bia": [0, 3, 9], "each": [0, 3, 10, 12, 15, 16, 17, 23], "row": [0, 5, 11, 15, 16, 17, 23], "store": [0, 10, 11], "itself": [0, 15, 22], "end": [0, 13, 15, 24], "can": [0, 1, 9, 10, 12, 13, 15, 21, 22, 23, 24], "4": [0, 12, 13, 15, 16, 17, 23], "8": [0, 9, 12, 15, 17], "uint32_t": 0, "xor128": 0, "random": 0, "gener": [0, 1, 10, 12, 13, 21, 24], "9": [0, 12, 15, 17], "base": [0, 1, 10, 11, 12, 15], "thi": [0, 1, 5, 7, 8, 10, 12, 13, 15, 18, 19, 20, 21, 23, 24, 25], "paper": 0, "findminmax": 0, "m": [0, 12, 13, 14], "min": 0, "max": [0, 3, 17], "find": [0, 11, 12], "matrix": [0, 1, 16, 25], "bool": [0, 3, 7, 8, 9, 11, 17], "a_symmetr": 0, "b_symmetr": 0, "quantizationgranular": 0, "q_gran": 0, "has_bia": 0, "fuse_relu": 0, "bias_typ": 0, "direct": [0, 11, 13, 20, 21, 23, 24], "fals": [0, 7, 17, 22], "requantizeoutputprocessingavx2": 0, "out": [0, 12, 18, 20, 22], "inp": 0, "block_type_t": 0, "block": [0, 21, 23, 24], "ld_out": 0, "ld_in": 0, "requantizationparams_t": 0, "requant": 0, "avx2": [0, 1], "i": [0, 1, 3, 5, 7, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 21, 22, 23, 24, 25], "c_per_g": 0, "requantizeoutputprocessinggconvavx512": 0, "avx512": 0, "note": [1, 11, 12, 13, 21, 22, 23, 24], "The": [1, 3, 7, 9, 10, 13, 14, 15, 16, 17, 19, 21, 22, 23, 24], "most": [1, 12, 13, 15, 22], "date": [1, 12, 13, 22], "embed": [1, 12, 13, 22, 25], "script": [1, 12, 13, 22], "bundl": [1, 12, 13, 22], "repo": [1, 12, 13, 22, 23], "under": [1, 12, 13, 19, 20, 22, 23], "setup_env": [1, 12, 13, 22], "bash": [1, 12, 13, 22], "step": [1, 12, 13, 15, 22, 23], "fbgemm_gpu": [1, 7, 15, 17, 18, 19, 20, 21, 23], "follow": [1, 10, 12, 13, 15, 20, 21, 22, 23], "toolchain": [1, 12, 13], "run": [1, 12, 13, 22], "cpu": [1, 7, 8, 14, 22], "higher": 1, "In": [1, 10, 12, 13, 15, 19, 21, 23], "doe": [1, 2, 13, 21, 22, 23], "have": [1, 10, 11, 12, 15, 22], "ani": [1, 10, 12, 16, 19, 20, 22, 23], "intel": 1, "mkl": 1, "howev": [1, 12, 15, 20], "comparison": 1, "some": [1, 12, 15, 22], "benchmark": 1, "If": [1, 12, 13, 17, 19, 21, 22, 23], "found": [1, 12, 13, 22], "path": [1, 12, 21, 24], "through": [1, 19, 21, 23], "intel_mkl_dir": 1, "variabl": 1, "built": [1, 12, 13, 22, 25], "report": [1, 13], "otherwis": [1, 7, 13, 20], "subset": 1, "all": [1, 10, 11, 12, 13, 15, 17, 20, 22], "three": [1, 15], "git": [1, 12], "submodul": [1, 12], "custom": [1, 24], "version": [1, 13], "desir": [1, 12, 15, 16, 21], "thei": [1, 12, 22, 24], "asmjit_src_dir": 1, "cpuinfo_src_dir": 1, "googletest_source_dir": 1, "With": 1, "inner": [1, 15], "take": [1, 12], "one": [1, 3, 9, 10, 11, 16, 17, 21, 23], "doesn": 1, "fit": [1, 20], "approach": 1, "so": [1, 10, 12, 13, 15], "implement": [1, 3, 12, 15], "dynam": 1, "effici": [1, 25], "shape": [1, 3, 15, 17], "specif": [1, 10, 12, 17, 20], "vector": [1, 4, 5, 6, 7, 8, 16, 23], "code": [1, 12, 20, 22], "third": 1, "parti": 1, "call": [1, 7, 13], "detect": [1, 14], "runtim": [1, 12], "pytorch": [1, 15, 18, 22, 23, 25], "project": [1, 19], "dispatch": [1, 7], "optim": [1, 9, 17], "test": [1, 12, 13, 19, 25], "you": [1, 19, 21, 23], "don": [1, 10, 12, 22], "want": [1, 19], "togeth": [1, 21, 22], "default": [1, 10, 12, 13, 17], "turn": [1, 22], "off": [1, 13, 18], "simpli": [1, 12], "fbgemm_build_test": 1, "conda": [1, 14, 22], "For": [1, 14, 15, 18, 20, 21, 22, 23, 24], "platform": [1, 12, 20], "machin": [1, 12, 13, 14, 25], "microsoft": [1, 9], "visual": 1, "studio": 1, "2019": 1, "newer": [1, 12], "recommend": [1, 5, 12, 13, 15], "here": [1, 7, 12, 19, 21, 22, 23, 24], "necessari": [1, 12], "ninja": [1, 12], "etc": [1, 12, 17], "n": [1, 9, 12, 13, 24], "env_nam": [1, 12, 13], "y": [1, 5, 12, 13, 16, 22], "doxygen": [1, 21, 22], "make": [1, 11, 12, 19, 21, 22, 23], "openbla": 1, "packag": [1, 14, 22], "onli": [1, 3, 10, 11, 14, 15, 19, 21, 22, 24], "clone": [1, 12], "along": [1, 12, 13], "its": [1, 7, 10, 12, 17, 20, 22, 24], "insid": [1, 12, 13, 14, 22, 24], "recurs": [1, 12], "http": [1, 12, 13, 19, 21, 22, 23], "github": [1, 12, 19], "com": [1, 12, 19], "cd": [1, 12, 14, 22], "assum": [1, 10], "process": [1, 5, 13, 15, 19, 23], "straightforward": 1, "creat": [1, 7, 12, 15, 19, 21, 23, 24], "directori": [1, 12, 14, 19, 21, 22], "mkdir": 1, "argument": [1, 10, 21, 22, 23], "build_arg": 1, "duse_sanit": 1, "address": [1, 12], "dfbgemm_library_typ": 1, "share": [1, 7], "dpython_execut": 1, "which": [1, 10, 12, 13, 15, 17, 22], "python3": [1, 13], "option": [1, 2, 5, 7, 11, 12, 16, 17], "document": [1, 7, 19, 20], "dfbgemm_build_doc": 1, "ON": [1, 20], "j": [1, 15], "verbos": 1, "As": [1, 10, 12, 13, 15], "write": [1, 12, 13, 22, 23], "fail": [1, 13, 14, 21], "due": [1, 12], "known": [1, 12, 17], "regress": 1, "To": [1, 12, 14, 24], "work": [1, 12, 13, 15, 19], "around": 1, "append": [1, 12, 21, 23], "export": [1, 12, 14], "prior": [1, 12, 13, 20], "cflag": 1, "wno": 1, "error": [1, 9, 13, 21, 22, 23], "mayb": 1, "uniniti": 1, "restrict": 1, "cxxflag": 1, "pleas": [1, 19, 21, 23], "see": [1, 7, 12, 13, 15, 21, 23, 24], "77939": 1, "1094": 1, "1666": 1, "more": [1, 7, 12, 17, 21, 23, 24], "detail": [1, 13], "exactli": 1, "same": [1, 3, 7, 10, 12, 15, 16, 21, 22, 23], "extra": 1, "need": [1, 12, 13, 14, 15, 19, 21, 23, 24], "ad": [1, 19, 22], "invoc": [1, 12, 22], "llvm": [1, 12], "standard": [1, 12], "libc": 1, "openmp": [1, 12], "libomp": 1, "locat": [1, 7, 11, 12, 15], "cc_path": 1, "cxx_path": 1, "dcmake_c_compil": 1, "dcmake_cxx_compil": 1, "dcmake_c_flag": [1, 12], "fopenmp": 1, "stdlib": [1, 12], "conda_prefix": [1, 12], "includ": [1, 8, 12, 20, 21, 23], "dcmake_cxx_flag": [1, 12], "likewis": 1, "also": [1, 12, 17, 24], "veri": [1, 12, 21, 22, 23], "target": [1, 7, 9, 10, 12, 15, 21, 22, 23, 24], "architectur": [1, 12, 13], "bc": [1, 12], "x64": 1, "program": [1, 19], "file": [1, 12, 13, 18, 19, 21, 22, 23, 24], "x86": [1, 25], "enterpris": 1, "vc": 1, "auxiliari": 1, "vcvarsal": 1, "bat": 1, "build_dir": 1, "dfbgemm_build_benchmark": 1, "dcmake_build_typ": 1, "releas": [1, 13], "cl": 1, "ex": 1, "v": [1, 3, 5, 14, 16], "int_nbit_split_embedding_codegen_lookup_funct": 2, "dev_weight": [2, 11], "uvm_weight": [2, 11], "weights_plac": [2, 11], "weights_offset": [2, 11], "weights_ti": [2, 11], "d_offset": [2, 9, 11], "total_d": [2, 11, 17], "max_int2_d": 2, "max_int4_d": 2, "max_int8_d": 2, "max_float16_d": 2, "max_float32_d": 2, "indic": [2, 11, 15, 17], "offset": [2, 5, 10, 11, 16, 17], "pooling_mod": [2, 17], "c10": [2, 5, 7, 9, 11], "indice_weight": 2, "output_dtyp": [2, 9, 17], "lxu_cache_weight": [2, 11], "lxu_cache_loc": [2, 11], "row_align": [2, 11], "max_float8_d": 2, "fp8_exponent_bit": 2, "fp8_exponent_bia": 2, "int_nbit_split_embedding_uvm_caching_codegen_lookup_funct": 2, "cache_hash_size_cumsum": [2, 11], "total_cache_hash_s": [2, 11], "cache_index_table_map": [2, 11], "lxu_cache_st": [2, 11], "lxu_stat": 2, "simlar": 2, "uvm_cach": 2, "lookup": [2, 11], "pruned_hashmap_lookup_cuda": 2, "hash_tabl": 2, "hash_table_offset": 2, "pruned_array_lookup_cuda": 2, "index_remap": 2, "index_remappings_offset": 2, "bounds_check_indices_cuda": 2, "rows_per_t": 2, "bounds_check_mod": [2, 17], "warn": [2, 17, 21], "b_ofset": 2, "max_b": [2, 11], "int_nbit_split_embedding_codegen_lookup_function_cpu": 2, "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu": 2, "pruned_hashmap_insert_unweighted_cpu": 2, "dense_indic": 2, "pruned_hashmap_lookup_unweighted_cpu": 2, "pruned_array_lookup_cpu": 2, "tupl": [3, 4, 5, 10, 11, 17], "gqa_attn_splitk": 3, "xq": 3, "cache_k": 3, "cache_v": 3, "seq_posit": 3, "doubl": [3, 5, 9, 10], "qk_scale": 3, "num_split_k": 3, "num_int4_kv_group": 3, "use_tensor_cor": 3, "decod": 3, "queri": 3, "split": 3, "w": [3, 14], "bf16": [3, 9], "int4": 3, "kv": 3, "cuda": [3, 8, 17, 25], "gqa": 3, "cach": [3, 11, 12, 17], "It": [3, 12, 13, 15], "current": [3, 12, 13, 15, 17], "context": 3, "length": [3, 5, 10, 16, 17, 23], "16384": 3, "fix": [3, 10, 12], "head": 3, "128": 3, "an": [3, 7, 10, 13, 14, 15, 17, 21, 22, 23, 24], "arbitrari": 3, "b": [3, 10, 12, 15, 16, 17, 21, 22, 23, 24], "h_q": 3, "d": [3, 15, 16, 24], "where": [3, 5, 7, 10, 15, 16, 17], "batch": [3, 5, 10, 15, 16, 25], "num": 3, "max_t": 3, "h_kv": 3, "sequenc": 3, "posit": [3, 10, 17], "contain": [3, 7, 12, 15, 16, 17, 23], "actual": [3, 12], "token": [3, 15], "appli": [3, 10, 12, 15, 17], "after": [3, 10, 12, 13, 14, 15, 17, 22, 23, 24], "qk": 3, "control": 3, "amount": [3, 17], "parallel": 3, "wise": [3, 15, 17], "quantiz": [3, 25], "whether": [3, 7, 12, 20], "core": 3, "wmma": 3, "instruct": [3, 19, 21, 22, 23, 25], "fast": 3, "return": [3, 7, 9, 10, 16, 17, 21, 22, 23], "A": [3, 7, 9, 13, 15, 16, 17, 20, 21, 22, 23], "combin": [3, 25], "non": [3, 7, 17], "metadata": 3, "softmax": 3, "sum": [3, 10, 16, 17], "tbe_input_combine_cpu": 4, "indices_list": 4, "offsets_list": 4, "per_sample_weight": [4, 17], "include_last_offset": 4, "padding_fused_tbe_input_combine_cpu": 4, "batch_siz": 4, "solv": 5, "issu": [5, 7, 12, 13, 18], "when": [5, 10, 12, 14, 15, 17, 21, 22, 24], "differ": [5, 10, 15], "often": 5, "occur": [5, 21], "spars": [5, 15, 25], "featur": [5, 10, 15, 17, 18], "system": [5, 12, 13, 15], "well": [5, 10, 12, 21], "natur": [5, 15], "languag": [5, 15, 24], "jagged_to_padded_dense_forward": 5, "symintarrayref": 5, "max_length": [5, 16], "padding_valu": [5, 16], "jagged_dense_elementwise_add_jagged_output_cuda": 5, "x_valu": [5, 16], "x_offset": [5, 16, 23], "dens": [5, 16, 23], "jagged_to_padded_dens": [5, 16], "jagged_dense_elementwise_add": [5, 16], "jagged_dense_elementwise_mul": [5, 16], "batched_dense_vec_jagged_2d_mul": [5, 16], "a_valu": [5, 16], "a_offset": [5, 16], "dense_to_jag": [5, 16], "symint": 5, "total_l": [5, 16], "jagged_dense_elementwise_add_jagged_output": [5, 16], "jagged_1d_to_dens": [5, 16], "max_l": 5, "jagged_2d_to_dens": [5, 12, 13, 16, 22, 23], "max_sequence_length": [5, 16, 23], "recat_embedding_grad_output_cuda": 6, "grad_output": 6, "num_features_per_rank": 6, "recat_embedding_grad_output_mixed_d_cuda": 6, "dim_sum_per_rank": 6, "recat_embedding_grad_output_mixed_d_batch_cuda": 6, "cumsum_dim_sum_per_rank": 6, "recat_embedding_grad_output_mixed_d_cpu": 6, "new_managed_tensor": 7, "self": 7, "alloc": [7, 21], "unifi": 7, "manag": [7, 12, 13, 17], "uvm": [7, 14], "Then": 7, "set": [7, 11, 14, 15, 16, 17], "prefer": [7, 13], "storag": [7, 9, 11], "host": [7, 12], "establish": 7, "map": [7, 10, 11, 15, 17], "devic": [7, 8, 12, 14, 17], "new": [7, 9, 11, 21, 22, 23], "new_managed_tensor_meta": 7, "placehold": 7, "meta": [7, 20], "kei": 7, "empti": [7, 15, 16, 24], "new_host_mapped_tensor": 7, "new_unified_tensor": 7, "is_host_map": 7, "either": [7, 9, 10, 12, 13], "depend": [7, 9, 12, 13, 15], "new_vanilla_managed_tensor": 7, "allow": [7, 12], "automat": [7, 10, 14, 22], "uvm_storag": 7, "check": [7, 17], "gpu": [7, 12, 13, 14, 25], "true": [7, 17], "is_uvm_tensor": 7, "BUT": [7, 20], "uvm_to_cpu": 7, "effect": [7, 15], "move": 7, "from": [7, 9, 10, 11, 12, 13, 14, 15, 17, 19, 20, 21, 22, 23, 24], "uvm_to_devic": 7, "prototyp": 7, "whose": 7, "uvm_cuda_mem_advis": 7, "cuda_memory_advis": 7, "cudamemadvis": 7, "cudamemoryadvis": 7, "enum": [7, 9], "avail": [7, 12, 14, 22], "python": [7, 12, 14, 21, 22, 24], "side": [7, 21, 23, 25], "namespac": 7, "over": [7, 12], "valid": 7, "inform": [7, 15, 23, 24], "uvm_cuda_mem_prefetch_async": 7, "device_t": 7, "cudamemprefetchasync": 7, "prefetch": 7, "destin": 7, "uvm_mem_advice_dont_fork": 7, "madvis": 7, "madv_dontfork": 7, "workaround": 7, "driver": [7, 12], "un": 7, "page": [7, 19, 24, 25], "tabl": [7, 10, 15, 25], "fork": [7, 19], "caus": [7, 12, 13, 20, 22], "slowdown": 7, "next": [7, 15, 21, 23], "access": [7, 17], "uvm_to_cpu_clon": 7, "copi": 7, "contigu": [7, 10], "singl": [7, 9], "thread": 7, "memcpi": 7, "section": [8, 12, 23], "variou": 8, "all_to_one_devic": 8, "inputtensor": 8, "target_devic": 8, "permute_pooled_embs_split_gpu": 8, "pooled_emb": 8, "offset_dim_list": 8, "permute_list": 8, "inv_offset_dim_list": 8, "inv_permute_list": 8, "permute_pooled_embs_auto_grad_split_gpu": 8, "permute_pooled_embs_auto_grad_gpu": 8, "permute_pooled_embs_cpu_impl": 8, "allow_dupl": 8, "permute_pooled_embs_split_cpu": 8, "permute_pooled_embs_auto_grad_split_cpu": 8, "permute_pooled_embs_auto_grad": 8, "permute_pooled_embs_auto_grad_cpu": 8, "model": [9, 10], "techniqu": 9, "reduc": 9, "larg": [9, 12], "order": [9, 15, 19], "achiev": [9, 13], "better": [9, 21], "small": 9, "loss": [9, 20], "accuraci": 9, "_float_to_bfloat16_gpu": 9, "brain": 9, "bfloat16": 9, "_bfloat16_to_float_gpu": 9, "_float_to_fp8rowwise_gpu": 9, "forward": 9, "fp8": 9, "dtype": [9, 17], "sparsetyp": [9, 17], "throw": [9, 21], "_fp8rowwise_to_float_gpu": 9, "represent": [9, 15], "_float_to_fused8bitrowwise_gpu": 9, "_half_to_fused8bitrowwise_gpu": 9, "half": 9, "_single_or_half_precision_to_fused8bitrowwise_gpu": 9, "_fused8bitrowwise_to_float_gpu": 9, "_fused8bitrowwise_to_half_gpu": 9, "_fused8bitrowwise_to_single_or_half_precision_gpu": 9, "scale_bias_last": 9, "quant_padding_float_typ": 9, "_fused8bitrowwise_to_float_mixed_dim_gpu": 9, "kfloat": 9, "khalf": 9, "_float_to_fusednbitrowwise_gpu": 9, "_half_to_fusednbitrowwise_gpu": 9, "_single_or_half_precision_to_fusednbitrowwise_gpu": 9, "_fusednbitrowwise_to_float_gpu": 9, "_fusednbitrowwise_to_half_gpu": 9, "_fusednbitrowwise_to_single_or_half_precision_gpu": 9, "_float_to_hfp8_gpu": 9, "ebit": 9, "exponent_bia": 9, "max_po": 9, "hybrid": 9, "hfp8": 9, "_hfp8_to_float_gpu": 9, "_float_to_msfp_gpu": 9, "bounding_box_s": 9, "mbit": 9, "min_po": 9, "msfp": 9, "_msfp_to_float_gpu": 9, "_float_to_paddedfp8rowwise_gpu": 9, "row_dim": 9, "pad": [9, 15, 16, 23], "_paddedfp8rowwise_to_float_gpu": 9, "output_last_dim": 9, "_fused8bitrowwise_to_float_cpu_out": 9, "_float_to_fused8bitrowwise_cpu_out": 9, "float_to_fused8bitrowwise_cpu": 9, "half_to_fused8bitrowwise_cpu": 9, "float_or_half_to_fused8bitrowwise_cpu": 9, "fused8bitrowwise_to_float_cpu": 9, "fused8bitrowwise_to_half_cpu": 9, "fused8bitrowwise_to_float_or_half_cpu": 9, "float_to_fp8rowwise_cpu": 9, "fp8rowwise_to_float_cpu": 9, "fusednbitrowwise_to_float_cpu": 9, "fusednbitrowwise_to_half_cpu": 9, "fusednbitrowwise_to_float_or_half_cpu": 9, "floattofp8quantized_ref": 9, "nrow": 9, "ncol": 9, "fp8quantizedtofloat_ref": 9, "expand_into_jagged_permute_cuda": 10, "permut": 10, "input_offset": 10, "output_offset": 10, "output_s": 10, "expand_into_jagged_permut": 10, "expand": 10, "index": [10, 11, 12, 13, 15, 21, 23], "case": [10, 12, 13, 15, 19], "ha": [10, 13, 15, 19, 21, 22], "across": [10, 12], "rank": [10, 15], "level": 10, "exclus": 10, "op": [10, 13, 16, 23], "bag": [10, 17, 25], "sit": 10, "we": [10, 12, 15, 19], "deriv": [10, 15, 20], "arrai": [10, 16, 23], "comput": [10, 12, 13, 17], "formula": 10, "output_permut": 10, "table_offset": 10, "bag_offset": 10, "histogram_binning_calibration_cpu": 10, "logit": 10, "bin_num_exampl": 10, "bin_num_posit": 10, "positive_weight": 10, "lower_bound": 10, "upper_bound": 10, "bin_ctr_in_use_aft": 10, "bin_ctr_weight_valu": 10, "divid": [10, 15], "predict": 10, "rang": [10, 15], "e": [10, 12, 15, 21, 23, 24], "bin": [10, 12], "two": [10, 15, 16, 17, 22], "exampl": [10, 12, 13, 14, 16, 17, 21, 22, 23, 24], "fall": [10, 12, 13], "bucket": [10, 12], "basic": [10, 23], "histogram": 10, "result": [10, 12, 16], "statist": 10, "real": 10, "ctr": 10, "num_po": 10, "num_exampl": 10, "final": 10, "calibr": 10, "pre": [10, 12, 13], "cali": 10, "wai": [10, 20], "within": 10, "suffici": [10, 19, 22], "That": 10, "fine": 10, "grain": 10, "modul": [10, 13, 17, 23], "theoret": 10, "layer": 10, "uncalibr": 10, "befor": [10, 12, 17, 24], "sigmoid": 10, "calibart": 10, "pass": [10, 17, 19, 22], "lower": 10, "bound": [10, 15], "calibration_target": 10, "observ": 10, "statisct": 10, "final_calibrated_predict": 10, "bin_ctr_weight": 10, "bin_ctr": 10, "calibrated_predict": 10, "bin_id": 10, "generic_histogram_binning_calibration_by_feature_cpu": 10, "segment_valu": 10, "segment_length": 10, "num_seg": 10, "bin_boundari": 10, "extens": [10, 21, 22], "ectr": 10, "abov": [10, 13, 15, 20, 21, 23, 24], "accept": [10, 19], "sort": [10, 11, 12], "keyjaggedtensor": 10, "num_bin": 10, "longer": [10, 18, 21], "still": [10, 12], "parambin_ctr_weight_valu": 10, "get_unique_indices_cuda": 11, "linear_indic": 11, "max_indic": 11, "compute_count": 11, "dedupl": 11, "pair": [11, 24], "lru_cache_find_uncached_cuda": 11, "unique_indic": 11, "unique_indices_length": 11, "time_stamp": 11, "lru_stat": 11, "gather_cache_stat": 11, "uvm_cache_stat": 11, "lock_cache_lin": 11, "lxu_cache_locking_count": 11, "lru": [11, 17], "uncach": 11, "them": 11, "host_lxu_cache_slot": 11, "h_in": 11, "cache_set": [11, 17], "linearize_cache_indices_cuda": 11, "b_offset": 11, "linear": 11, "uniqu": [11, 24], "linearize_cache_indices_from_row_idx_cuda": 11, "update_table_indic": 11, "update_row_indic": 11, "format": [11, 22, 23], "inplac": 11, "updat": [11, 12, 13, 17, 19], "lru_cache_populate_cuda": 11, "hash_size_cumsum": 11, "linear_cache_indic": 11, "stochastic_round": [11, 17], "fetch": 11, "insert": [11, 24], "timestep": 11, "lru_cache_populate_byte_cuda": 11, "byte": 11, "element": [11, 15], "direct_mapped_lru_cache_populate_byte_cuda": 11, "lxu_cache_miss_timestamp": 11, "assoc": 11, "variant": [11, 12, 13, 22], "lfu_cache_populate_cuda": 11, "lfu_stat": 11, "lfu": [11, 17], "lfu_cache_populate_byte_cuda": 11, "lxu_cache_lookup_cuda": 11, "invalid_index": 11, "num_uniq_cache_indic": 11, "lxu_cache_locations_output": 11, "look": [11, 17], "up": [11, 17], "slot": 11, "sentinel": 11, "miss": [11, 12], "direct_mapped_lxu_cache_lookup_cuda": 11, "lxu_cache_flush_cuda": 11, "flush": 11, "reset_weight_momentum_cuda": 11, "momentum1_dev": 11, "momentum1_uvm": 11, "momentum1_plac": 11, "momentum1_offset": 11, "pruned_indic": 11, "pruned_indices_offset": 11, "logical_table_id": 11, "buffer_id": 11, "lxu_cache_locking_counter_decrement_cuda": 11, "decrement": 11, "counter": 11, "lxu_cache_locations_update_cuda": 11, "lxu_cache_locations_new": 11, "fbgemm": [12, 13, 16, 18, 19, 20, 22, 23], "reproduc": [12, 13, 19, 20], "platform_nam": 12, "unam": 12, "prefix": [12, 24], "miniconda_prefix": 12, "home": 12, "download": [12, 13], "wget": 12, "q": 12, "anaconda": 12, "miniconda3": 12, "latest": 12, "sh": 12, "o": [12, 13], "p": 12, "load": [12, 15, 23], "shortcut": 12, "bashrc": 12, "command": [12, 13, 21, 22], "against": [12, 14], "env": [12, 13], "name": [12, 13, 20, 21, 23], "python_vers": 12, "3": [12, 15, 16, 17, 20, 23], "12": [12, 15, 17], "upgrad": 12, "pyopenssl": 12, "22": [12, 15], "requir": [12, 13, 14, 15, 17, 22, 23], "recent": [12, 13], "nvcc": 12, "capabl": [12, 14], "5": [12, 15, 17], "done": [12, 13], "bare": 12, "metal": 12, "neither": [12, 20], "nor": [12, 20], "nvidia": 12, "present": [12, 23], "sinc": [12, 15], "setup": [12, 13], "pull": [12, 13, 22], "linux": [12, 13], "distribut": [12, 20], "ubuntu": 12, "04": 12, "11": [12, 13, 15], "entrypoint": 12, "devel": 12, "ubuntu22": 12, "rest": [12, 13], "mai": [12, 13, 15, 20], "construct": [12, 13, 15], "mechan": 12, "full": [12, 13, 24], "nvml": 12, "org": [12, 13, 23], "cuda_vers": 12, "label": 12, "verifi": [12, 13, 21, 23], "cuda_runtim": 12, "h": [12, 16, 21], "libnvidia": [12, 13], "ml": [12, 13], "printenv": 12, "extract": 12, "given": [12, 15, 16], "url": [12, 13], "builder": 12, "blob": 12, "main": [12, 19], "common": [12, 13, 15, 23], "install_cuda": 12, "cudnn_url": 12, "redist": 12, "x86_64": 12, "2": [12, 13, 15, 16, 17, 21, 23, 24], "26_cuda12": 12, "archiv": 12, "tar": 12, "xz": 12, "unpack": 12, "xvf": 12, "amd": [12, 13], "minim": 12, "6": [12, 13, 15], "termin": 12, "while": [12, 22], "come": 12, "reason": [12, 13, 22], "oper": [12, 13, 25], "guid": [12, 23], "disabl": 12, "apt": 12, "prompt": 12, "debian_frontend": 12, "noninteract": 12, "db": 12, "radeon": 12, "amdgpu": 12, "focal": 12, "install_5": 12, "50601": 12, "1_all": 12, "deb": 12, "usecas": 12, "hiplibsdk": 12, "dkm": 12, "hipifi": 12, "hip": 12, "dev": 12, "20": 12, "sysroot": 12, "avoid": 12, "glibcxx": 12, "fbgemm_cpu": 12, "10": [12, 13, 15], "keep": 12, "older": [12, 13], "gcc_version": 12, "15": 12, "7": [12, 13, 15, 16, 17], "forg": [12, 22], "gxx_linux": 12, "64": [12, 15], "sysroot_linux": 12, "17": 12, "binari": [12, 20], "cento": 12, "stream": 12, "becaus": [12, 15], "librari": [12, 22, 25], "refer": [12, 15, 22, 23], "libstdc": 12, "what": [12, 22], "libcxx_path": 12, "print": [12, 13, 17, 23], "objdump": 12, "tc": 12, "grep": 12, "glibc_": 12, "sed": 12, "vu": 12, "cat": 12, "glibcxx_": 12, "possibl": [12, 15, 19, 20], "just": 12, "do": [12, 13, 19], "llvm_version": 12, "libcxx": 12, "outdat": 12, "aarch64": [12, 13], "cannot": 12, "explicitli": 12, "clangxx": 12, "rt": 12, "lib": [12, 13], "ld_library_path": [12, 13], "config": 12, "var": 12, "nvcc_prepend_flag": 12, "correctli": [12, 13, 14, 21, 22], "xcompil": 12, "ccbin": 12, "clangxx_path": 12, "unsupport": 12, "even": [12, 20], "though": [12, 13], "libstd": 12, "being": [12, 22], "mean": [12, 15, 17], "regardless": 12, "scenario": 12, "first": [12, 21, 23, 24], "binpath": 12, "overrid": 12, "exist": [12, 21, 23], "ln": 12, "sf": 12, "path_to_either_gcc_or_clang": 12, "cc": 12, "These": 12, "later": 12, "configur": [12, 21], "stage": [12, 15], "cmake": 12, "click": 12, "hypothesi": [12, 13], "jinja2": 12, "ncurs": 12, "numpi": [12, 13], "scikit": [12, 13], "offici": 12, "homepag": 12, "authorit": [12, 13, 22], "how": [12, 13, 14, 23], "nightli": [12, 13], "rc": 12, "without": [12, 20], "alwai": 12, "reliabl": 12, "arriv": 12, "hour": 12, "than": [12, 15], "window": 12, "silent": 12, "both": [12, 18, 20, 22], "place": [12, 17], "artifact": 12, "select": 12, "dure": [12, 15, 17, 23], "thu": [12, 17], "import": [12, 13, 17, 23, 24], "much": [12, 21], "determinist": 12, "torch": [12, 13, 16, 17, 22, 23], "whl": [12, 13], "cu121": [12, 13], "rocm5": [12, 13], "ensur": [12, 13, 19], "properli": 12, "__version__": 12, "minimum": [12, 21, 22, 23], "cuda_cmake_macro": 12, "txt": [12, 14, 22, 24], "tag": [12, 21, 24], "fbgemm_vers": 12, "v0": 12, "fbgemm_": 12, "addit": [12, 14, 15, 16], "flow": 12, "state": 12, "becom": 12, "stale": 12, "problem": 12, "re": [12, 13], "attempt": 12, "failur": [12, 13], "clear": [12, 19], "py": [12, 13, 14, 22, 23], "clean": [12, 22], "must": [12, 13, 14, 15, 17, 20, 24], "package_nam": 12, "fbgemm_gpu_": 12, "convent": 12, "major": 12, "minor": 12, "py312": 12, "python_tag": 12, "determin": [12, 15], "processor": 12, "arch": 12, "python_plat_nam": 12, "manylinux2014_": 12, "maco": 12, "macosx_10_9_": 12, "arm64": 12, "macosx_11_0_": 12, "win_": 12, "cpu_onli": 12, "flag": [12, 22], "bdist_wheel": 12, "package_vari": 12, "plat": 12, "instead": [12, 22], "cxxprefix": 12, "presum": 12, "made": [12, 22], "presenc": 12, "similar": [12, 15, 17], "enabl": [12, 14], "been": [12, 21], "unabl": 12, "cudacxx": 12, "cuda_bin_path": 12, "cub": 12, "applic": [12, 17, 21, 23], "cub_dir": 12, "header": [12, 21, 24], "cudnn_include_dir": 12, "cudnn_librari": 12, "nvml_lib_path": 12, "sm70": [12, 13], "80": 12, "v100": [12, 13], "a100": [12, 13], "cuda_arch_list": 12, "unset": 12, "torch_cuda_arch_list": 12, "preced": 12, "dtorch_cuda_arch_list": 12, "rocm_path": 12, "pytorch_rocm_arch": 12, "gfx906": 12, "gfx908": 12, "gfx90a": 12, "wiki": 12, "gentoo": 12, "list": [12, 15, 16, 17, 20, 21, 23], "rocminfo": 12, "gfx": 12, "dhip_root_dir": 12, "dtorch_use_hip_dsa": 12, "complet": [12, 19, 22], "correct": 12, "lot": 12, "jinja": 12, "instanti": 12, "sure": [12, 19, 21, 23], "accident": 12, "cours": 12, "fbgemm_gpu_lib_path": 12, "fbgemm_gpu_pi": [12, 13], "defin": [12, 15, 21], "nm": 12, "gdcu": 12, "referenc": 12, "certain": 12, "gdc": 12, "merge_pooled_embed": [12, 13], "isol": [13, 22], "build": [13, 14, 21, 23, 25], "sm80": 13, "respect": 13, "other": [13, 15, 20, 21, 22, 23], "scratch": 13, "guarante": 13, "especi": 13, "displai": [13, 24], "smi": 13, "515": 13, "76": 13, "persist": 13, "bu": [13, 24], "id": 13, "disp": 13, "volatil": 13, "uncorr": 13, "ecc": 13, "fan": 13, "temp": 13, "perf": 13, "pwr": 13, "usag": [13, 22, 23], "cap": 13, "memori": [13, 17, 25], "util": [13, 25], "mig": 13, "a10g": 13, "00000000": 13, "00": 13, "1e": 13, "31c": 13, "p0": 13, "59w": 13, "300w": 13, "0mib": 13, "23028mib": 13, "gi": 13, "ci": 13, "pid": 13, "No": 13, "expos": 13, "onc": [13, 19], "imag": 13, "launch": 13, "alreadi": [13, 19, 21, 23], "toolkit": 13, "interfac": 13, "concis": 13, "info": [13, 21, 23], "dieedg": 13, "avgpwr": 13, "sclk": 13, "mclk": 13, "pwrcap": 13, "vram": 13, "33": 13, "0c": 13, "37": 13, "0w": 13, "300mhz": 13, "1200mhz": 13, "auto": [13, 22], "290": 13, "32": 13, "39": 13, "log": 13, "difficult": 13, "relev": [13, 21], "link": [13, 22], "encount": 13, "signatur": [13, 22], "traceback": 13, "last": 13, "root": [13, 19], "miniconda": 13, "mycondaenv": 13, "site": 13, "_op": [13, 22], "line": [13, 23, 24], "565": 13, "__getattr__": 13, "overload_nam": 13, "_c": 13, "_jit_get_oper": 13, "qualified_op_nam": 13, "runtimeerror": 13, "except": [13, 21, 23], "wa": 13, "string": [13, 24], "post47": 13, "py3": 13, "egg": 13, "__init__": [13, 23], "21": 13, "_fbgemm_gpu_doc": 13, "noqa": 13, "f401": 13, "e402": 13, "18": 13, "569": 13, "rais": [13, 23], "attributeerror": [13, 23], "_opnamespac": 13, "object": [13, 15], "attribut": [13, 23], "cli": 13, "main_run": 13, "execut": [13, 14], "47": 13, "_zn6fbgemm48floatorhalftofusednbitrowwisequantizedsbhalfavx2itli2eeevpkt_miph": 13, "appear": 13, "libtorch": 13, "visibl": 13, "incorrectli": [13, 22], "declar": [13, 21], "were": [13, 16], "pr": [13, 21, 22, 23], "1618": 13, "former": 13, "resolv": 13, "manual": [13, 21], "latter": 13, "seriou": 13, "tha": 13, "develop": [13, 22], "bench": 14, "good": [14, 20], "instal": [14, 22, 25], "pip": [14, 22], "pytest": 14, "rsx": 14, "ignor": [14, 17, 22], "pytestcollectionwarn": 14, "split_table_batched_embeddings_test": 14, "quantize_ops_test": 14, "sparse_ops_test": 14, "split_embedding_inference_converter_test": 14, "mode": [14, 17], "cuda_visible_devic": 14, "debug": 14, "cuda_launch_block": 14, "fbgemm_test_with_rocm": 14, "hip_launch_block": 14, "split_table_batched_embeddings_benchmark": 14, "purpos": [15, 16, 17, 20], "handl": 15, "consecut": 15, "nestedtensor": 15, "raggedtensor": 15, "tensorflow": 15, "notabl": 15, "sentenc": 15, "repres": 15, "maxlength": 15, "2d": [15, 16, 17, 23], "numel": 15, "greatest": 15, "divisor": 15, "smallest": 15, "sub": 15, "exclud": 15, "partit": 15, "impli": [15, 20], "denot": [15, 21, 23], "offest": 15, "outer": 15, "would": 15, "begin": 15, "maximum": [15, 16, 23], "between": [15, 21, 22, 24], "normal": 15, "densor": 15, "form": [15, 20], "figur": 15, "below": 15, "show": [15, 22], "accomod": 15, "logic": [15, 21], "At": [15, 21, 22, 23], "multipl": [15, 16, 17, 23, 25], "hadamard": 15, "product": [15, 20], "involv": 15, "bmatrix": 15, "rightarrow": 15, "16": 15, "25": 15, "36": 15, "49": 15, "81": 15, "50": 15, "operand": 15, "word": 15, "ax": 15, "properti": 15, "hold": 15, "elementwis": [15, 16], "equival": 15, "start": [15, 16, 23, 24], "dim": 15, "onto": 15, "part": 15, "everi": 15, "those": [15, 16, 19, 23], "converson": 15, "could": 15, "lead": 15, "read": [15, 17], "relat": 15, "smaller": 15, "expect": 15, "happen": 15, "give": 15, "situat": 15, "like": 15, "dense_tensor": 15, "jagged_tensor": 15, "break": 15, "exact": 15, "usual": 15, "1d": [16, 17, 23], "area": 16, "outsid": 16, "coverag": 16, "total": [16, 17], "identit": 16, "add": [16, 19, 21, 22, 23], "structur": 16, "jagged_dense_dense_elementwise_add_jagged_output": 16, "y_0": 16, "y_1": 16, "multipli": [16, 17], "max_n": 16, "matmul": 16, "stacked_jagged_1d_to_dens": 16, "arg": [16, 23], "kwarg": 16, "stacked_jagged_2d_to_dens": 16, "split_table_batched_embeddings_op": 17, "splittablebatchedembeddingbagscodegen": 17, "embedding_spec": 17, "feature_table_map": 17, "none": 17, "cache_algorithm": 17, "cachealgorithm": 17, "cache_load_factor": 17, "cache_reserved_memori": 17, "cache_precis": 17, "weights_precis": 17, "enforce_hbm": 17, "optimtyp": 17, "exact_sgd": 17, "record_cache_metr": 17, "gradient_clip": 17, "max_gradi": 17, "learning_r": 17, "01": 17, "ep": 17, "0e": 17, "momentum": 17, "weight_decai": 17, "weight_decay_mod": 17, "weightdecaymod": 17, "eta": 17, "001": 17, "beta1": 17, "beta2": 17, "999": 17, "poolingmod": 17, "boundscheckmod": 17, "sourc": [17, 19, 20, 21, 22, 23], "backward": 17, "embeddingloc": 17, "computedevic": 17, "spec": 17, "placement": 17, "lxu": 17, "algorithm": 17, "capac": 17, "reserv": [17, 20], "hbm": 17, "adam": 17, "exact_adagrad": 17, "exact_rowwise_adagrad": 17, "lamb": 17, "lars_sgd": 17, "partial_rowwise_adam": 17, "partial_rowwise_lamb": 17, "sgd": 17, "recordcachemetr": 17, "record": 17, "hit": 17, "request": [17, 18, 22], "record_cache_miss_count": 17, "metric": 17, "record_tablewise_cache_miss": 17, "stochast": 17, "round": 17, "gradient": 17, "clip": 17, "learn": 17, "rate": 17, "epsilon": 17, "adagrad": 17, "lar": 17, "decai": 17, "l2": 17, "decoupl": 17, "pool": [17, 25], "boundari": 17, "fatal": 17, "conatin": 17, "column": 17, "feature_requires_grad": 17, "split_table_batched_embeddings_ops_common": 17, "split_table_batched_embeddings_ops_train": 17, "init_embedding_weights_uniform": 17, "split_embedding_weight": 17, "9426": 17, "7046": 17, "4214": 17, "0419": 17, "1331": 17, "7856": 17, "8124": 17, "2021": 17, "5771": 17, "5911": 17, "7792": 17, "1068": 17, "6203": 17, "4813": 17, "1677": 17, "4790": 17, "5587": 17, "0941": 17, "5754": 17, "3475": 17, "8952": 17, "1964": 17, "0810": 17, "4174": 17, "2513": 17, "4039": 17, "3775": 17, "3273": 17, "5399": 17, "0229": 17, "1455": 17, "8770": 17, "9520": 17, "4593": 17, "7169": 17, "6307": 17, "1765": 17, "8757": 17, "8614": 17, "2051": 17, "0603": 17, "9980": 17, "7958": 17, "5826": 17, "long": 17, "13": 17, "5197": 17, "2957": 17, "3578": 17, "1487": 17, "4873": 17, "3044": 17, "9801": 17, "2769": 17, "7164": 17, "8528": 17, "7159": 17, "6719": 17, "0784": 17, "2016": 17, "2176": 17, "1988": 17, "3825": 17, "5008": 17, "8991": 17, "1405": 17, "2637": 17, "9427": 17, "8902": 17, "3754": 17, "5013": 17, "6105": 17, "9968": 17, "3057": 17, "7621": 17, "9821": 17, "7314": 17, "6195": 17, "grad_fn": 17, "cppnode": 17, "splitlookupfunction_sgd_op": 17, "question": 18, "concern": 18, "discuss": 18, "kick": 18, "regard": 18, "feel": 18, "free": 18, "reach": 18, "easi": 19, "transpar": 19, "describ": 19, "activ": 19, "welcom": [19, 25], "your": [19, 22, 23], "repositori": 19, "branch": 19, "ve": 19, "chang": [19, 21, 23], "api": [19, 21, 22, 23], "suit": 19, "lint": 19, "haven": 19, "submit": [19, 21, 23], "facebook": [19, 20, 25], "open": 19, "track": 19, "public": [19, 22], "bug": 19, "descript": [19, 21, 22, 23, 24], "abl": 19, "bounti": 19, "safe": 19, "disclosur": 19, "secur": 19, "go": 19, "outlin": 19, "By": 19, "agre": 19, "tree": 19, "claus": 20, "bsd": 20, "softwar": 20, "copyright": 20, "inc": 20, "affili": 20, "right": [20, 24], "redistribut": 20, "modif": 20, "permit": 20, "condit": 20, "met": 20, "retain": 20, "notic": 20, "disclaim": 20, "materi": 20, "contributor": 20, "endors": 20, "promot": 20, "written": 20, "permiss": 20, "BY": 20, "THE": 20, "holder": 20, "AND": 20, "AS": 20, "express": [20, 24], "OR": 20, "warranti": 20, "NOT": 20, "limit": [20, 22], "TO": 20, "OF": 20, "merchant": 20, "FOR": 20, "particular": 20, "IN": 20, "NO": 20, "event": 20, "shall": 20, "BE": 20, "liabl": 20, "indirect": 20, "incident": 20, "special": 20, "exemplari": 20, "consequenti": 20, "damag": 20, "procur": 20, "substitut": 20, "servic": 20, "profit": 20, "busi": 20, "interrupt": 20, "theori": 20, "liabil": 20, "contract": 20, "strict": 20, "tort": 20, "neglig": 20, "aris": 20, "IF": 20, "advis": 20, "SUCH": 20, "javadoc": 21, "style": [21, 23], "comment": [21, 22, 24], "sphinx": [21, 22, 23], "breath": 21, "kept": 21, "cpp": [21, 23, 24], "cu": 21, "cuh": 21, "everyth": 21, "ifndef": 21, "doxygen_this_will_be_skip": 21, "endif": 21, "hidden": 21, "html": [21, 22, 23], "descriptionss": 21, "publish": [21, 23], "docstr": [21, 22, 23], "method": [21, 22, 23], "organ": 21, "yet": 21, "top": [21, 25], "defgroup": 21, "directli": [21, 23], "behavior": [21, 23], "tparam": 21, "param": [21, 23], "thrown": [21, 23], "ingroup": 21, "brief": 21, "short": 21, "example_method": [21, 23], "def": [21, 23], "foo": [21, 23], "lst": [21, 23], "And": [21, 23], "verbatim": [21, 23], "text": [21, 23, 24], "diagram": [21, 23], "unpars": 21, "second": [21, 23], "prev": [21, 23], "usabl": [21, 23], "space": [21, 22, 23], "endcod": 21, "align": [21, 23], "param1": [21, 23], "param2": 21, "bad_alloc": 21, "logic_error": 21, "href": 21, "www": [21, 23], "nl": 21, "cmdlink": 21, "On": [21, 23], "doxygengroup": 21, "rst": [21, 23, 24], "content": [21, 24, 25], "toctre": [21, 23], "ini": 21, "taken": 21, "care": 21, "doc": [21, 22, 23, 24], "local": [21, 23], "netlifi": [21, 22, 23], "preview": [21, 23], "serv": 22, "accompani": 22, "put": 22, "yourself": 22, "shoe": 22, "who": 22, "understand": 22, "live": 22, "easier": 22, "leav": 22, "separ": 22, "task": 22, "pointer": 22, "tool": 22, "graphviz": [22, 24], "assembl": 22, "view": 22, "prepend": 22, "sphinx_lint": 22, "technic": 22, "why": 22, "invok": 22, "occasion": 22, "unresolv": 22, "might": 22, "opt": 22, "pycapsul": 22, "class": [22, 23], "neg": 22, "silenc": 22, "nitpick": 22, "conf": 22, "domain": 22, "deploi": 22, "app": 22, "googl": 23, "c_size_t": 23, "about": 23, "ret": 23, "emplace_back": 23, "item": 23, "valueerror": 23, "14": 23, "restructuredtext": 23, "en": 23, "master": 23, "__": 23, "pep": 23, "0287": 23, "42": 23, "autofunct": 23, "c_ulong": 23, "mani": 23, "attach": 23, "fact": 23, "helper": 23, "codebas": 23, "add_doc": 23, "jag": [23, 25], "forc": 23, "hoc": 23, "the_new_doc_modul": 23, "remain": 23, "render": [23, 24], "anchor": 24, "_doc": 24, "underscor": 24, "_": 24, "There": 24, "elsewher": 24, "ref": 24, "anoth": 24, "literalinclud": 24, "rel": 24, "enclos": 24, "bracket": 24, "skiplin": 24, "suppli": 24, "math": 24, "inlin": 24, "k_": 24, "k_n": 24, "expressino": 24, "int_a": 24, "frac": 24, "2v": 24, "dx": 24, "left": 24, "dv": 24, "_a": 24, "du": 24, "digraph": 24, "altern": 24, "extern": 24, "dot": 24, "examplegraph": 24, "low": 25, "precis": 25, "high": 25, "convolut": 25, "server": 25, "infer": 25, "backend": 25, "caffe2": 25, "collect": 25, "transform": 25, "contribut": 25, "contact": 25, "licens": 25, "experiment": 25, "tbe": 25}, "objects": {"": [[9, 0, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref"], [9, 1, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref::ebits"], [9, 1, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref::exponent_bias"], [9, 1, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref::input"], [9, 1, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref::ncols"], [9, 1, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref::nrows"], [9, 1, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref::output"], [9, 0, 1, "_CPPv423FP8rowwise_to_float_cpuRK6TensorbK7int64_t", "FP8rowwise_to_float_cpu"], [9, 1, 1, "_CPPv423FP8rowwise_to_float_cpuRK6TensorbK7int64_t", "FP8rowwise_to_float_cpu::forward"], [9, 1, 1, "_CPPv423FP8rowwise_to_float_cpuRK6TensorbK7int64_t", "FP8rowwise_to_float_cpu::input"], [9, 1, 1, "_CPPv423FP8rowwise_to_float_cpuRK6TensorbK7int64_t", "FP8rowwise_to_float_cpu::output_dtype"], [0, 0, 1, "_CPPv410FindMinMaxPKfPfPf7int64_t", "FindMinMax"], [0, 1, 1, "_CPPv410FindMinMaxPKfPfPf7int64_t", "FindMinMax::len"], [0, 1, 1, "_CPPv410FindMinMaxPKfPfPf7int64_t", "FindMinMax::m"], [0, 1, 1, "_CPPv410FindMinMaxPKfPfPf7int64_t", "FindMinMax::max"], [0, 1, 1, "_CPPv410FindMinMaxPKfPfPf7int64_t", "FindMinMax::min"], [0, 0, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf"], [0, 2, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf::InputType"], [0, 1, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf::bit_rate"], [0, 1, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf::input"], [0, 1, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf::input_columns"], [0, 1, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf::input_rows"], [0, 1, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf::output"], [9, 0, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref"], [9, 1, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::ebits"], [9, 1, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::exponent_bias"], [9, 1, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::input"], [9, 1, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::max_pos"], [9, 1, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::ncols"], [9, 1, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::nrows"], [9, 1, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::output"], [0, 0, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize"], [0, 2, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::T"], [0, 1, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::dst"], [0, 1, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::len"], [0, 1, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::noise_ratio"], [0, 1, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::num_threads"], [0, 1, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::qparams"], [0, 1, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::src"], [0, 1, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::thread_id"], [0, 0, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise"], [0, 1, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::C"], [0, 1, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::G"], [0, 1, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::K"], [0, 2, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::LAYOUT"], [0, 2, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::T"], [0, 1, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::X"], [0, 1, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::dst"], [0, 1, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::scales"], [0, 1, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::src"], [0, 1, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::zero_points"], [0, 0, 1, "_CPPv46Xor128v", "Xor128"], [9, 0, 1, "_CPPv424_FP8rowwise_to_float_gpuRKN2at6TensorEbK7int64_t", "_FP8rowwise_to_float_gpu"], [9, 1, 1, "_CPPv424_FP8rowwise_to_float_gpuRKN2at6TensorEbK7int64_t", "_FP8rowwise_to_float_gpu::forward"], [9, 1, 1, "_CPPv424_FP8rowwise_to_float_gpuRKN2at6TensorEbK7int64_t", "_FP8rowwise_to_float_gpu::input"], [9, 1, 1, "_CPPv424_FP8rowwise_to_float_gpuRKN2at6TensorEbK7int64_t", "_FP8rowwise_to_float_gpu::output_dtype"], [9, 0, 1, "_CPPv422_bfloat16_to_float_gpuRKN2at6TensorE", "_bfloat16_to_float_gpu"], [9, 1, 1, "_CPPv422_bfloat16_to_float_gpuRKN2at6TensorE", "_bfloat16_to_float_gpu::input"], [9, 0, 1, "_CPPv424_float_to_FP8rowwise_gpuRK6TensorKb", "_float_to_FP8rowwise_gpu"], [9, 1, 1, "_CPPv424_float_to_FP8rowwise_gpuRK6TensorKb", "_float_to_FP8rowwise_gpu::forward"], [9, 1, 1, "_CPPv424_float_to_FP8rowwise_gpuRK6TensorKb", "_float_to_FP8rowwise_gpu::input"], [9, 0, 1, "_CPPv422_float_to_bfloat16_gpuRKN2at6TensorE", "_float_to_bfloat16_gpu"], [9, 1, 1, "_CPPv422_float_to_bfloat16_gpuRKN2at6TensorE", "_float_to_bfloat16_gpu::input"], [9, 0, 1, "_CPPv434_float_to_fused8bitrowwise_cpu_outR6TensorRK6Tensor", "_float_to_fused8bitrowwise_cpu_out"], [9, 1, 1, "_CPPv434_float_to_fused8bitrowwise_cpu_outR6TensorRK6Tensor", "_float_to_fused8bitrowwise_cpu_out::input"], [9, 1, 1, "_CPPv434_float_to_fused8bitrowwise_cpu_outR6TensorRK6Tensor", "_float_to_fused8bitrowwise_cpu_out::output"], [9, 0, 1, "_CPPv430_float_to_fused8bitrowwise_gpuRK6Tensor", "_float_to_fused8bitrowwise_gpu"], [9, 1, 1, "_CPPv430_float_to_fused8bitrowwise_gpuRK6Tensor", "_float_to_fused8bitrowwise_gpu::input"], [9, 0, 1, "_CPPv430_float_to_fusednbitrowwise_gpuRK6TensorK7int64_t", "_float_to_fusednbitrowwise_gpu"], [9, 1, 1, "_CPPv430_float_to_fusednbitrowwise_gpuRK6TensorK7int64_t", "_float_to_fusednbitrowwise_gpu::bit_rate"], [9, 1, 1, "_CPPv430_float_to_fusednbitrowwise_gpuRK6TensorK7int64_t", "_float_to_fusednbitrowwise_gpu::input"], [9, 0, 1, "_CPPv418_float_to_hfp8_gpuRKN2at6TensorEK7int64_tK7int64_tKd", "_float_to_hfp8_gpu"], [9, 1, 1, "_CPPv418_float_to_hfp8_gpuRKN2at6TensorEK7int64_tK7int64_tKd", "_float_to_hfp8_gpu::ebits"], [9, 1, 1, "_CPPv418_float_to_hfp8_gpuRKN2at6TensorEK7int64_tK7int64_tKd", "_float_to_hfp8_gpu::exponent_bias"], [9, 1, 1, "_CPPv418_float_to_hfp8_gpuRKN2at6TensorEK7int64_tK7int64_tKd", "_float_to_hfp8_gpu::input"], [9, 1, 1, "_CPPv418_float_to_hfp8_gpuRKN2at6TensorEK7int64_tK7int64_tKd", "_float_to_hfp8_gpu::max_pos"], [9, 0, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu"], [9, 1, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::bias"], [9, 1, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::bounding_box_size"], [9, 1, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::ebits"], [9, 1, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::input"], [9, 1, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::max_pos"], [9, 1, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::mbits"], [9, 1, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::min_pos"], [9, 0, 1, "_CPPv430_float_to_paddedFP8rowwise_gpuRK6TensorKbK7int64_t", "_float_to_paddedFP8rowwise_gpu"], [9, 1, 1, "_CPPv430_float_to_paddedFP8rowwise_gpuRK6TensorKbK7int64_t", "_float_to_paddedFP8rowwise_gpu::forward"], [9, 1, 1, "_CPPv430_float_to_paddedFP8rowwise_gpuRK6TensorKbK7int64_t", "_float_to_paddedFP8rowwise_gpu::input"], [9, 1, 1, "_CPPv430_float_to_paddedFP8rowwise_gpuRK6TensorKbK7int64_t", "_float_to_paddedFP8rowwise_gpu::row_dim"], [9, 0, 1, "_CPPv434_fused8bitrowwise_to_float_cpu_outR6TensorRK6Tensor", "_fused8bitrowwise_to_float_cpu_out"], [9, 1, 1, "_CPPv434_fused8bitrowwise_to_float_cpu_outR6TensorRK6Tensor", "_fused8bitrowwise_to_float_cpu_out::input"], [9, 1, 1, "_CPPv434_fused8bitrowwise_to_float_cpu_outR6TensorRK6Tensor", "_fused8bitrowwise_to_float_cpu_out::output"], [9, 0, 1, "_CPPv430_fused8bitrowwise_to_float_gpuRKN2at6TensorE", "_fused8bitrowwise_to_float_gpu"], [9, 1, 1, "_CPPv430_fused8bitrowwise_to_float_gpuRKN2at6TensorE", "_fused8bitrowwise_to_float_gpu::input"], [9, 0, 1, "_CPPv440_fused8bitrowwise_to_float_mixed_dim_gpuRKN2at6TensorERKN2at6TensorEK7int64_t", "_fused8bitrowwise_to_float_mixed_dim_gpu"], [9, 1, 1, "_CPPv440_fused8bitrowwise_to_float_mixed_dim_gpuRKN2at6TensorERKN2at6TensorEK7int64_t", "_fused8bitrowwise_to_float_mixed_dim_gpu::D_offsets"], [9, 1, 1, "_CPPv440_fused8bitrowwise_to_float_mixed_dim_gpuRKN2at6TensorERKN2at6TensorEK7int64_t", "_fused8bitrowwise_to_float_mixed_dim_gpu::input"], [9, 1, 1, "_CPPv440_fused8bitrowwise_to_float_mixed_dim_gpuRKN2at6TensorERKN2at6TensorEK7int64_t", "_fused8bitrowwise_to_float_mixed_dim_gpu::output_dtype"], [9, 0, 1, "_CPPv429_fused8bitrowwise_to_half_gpuRKN2at6TensorE", "_fused8bitrowwise_to_half_gpu"], [9, 1, 1, "_CPPv429_fused8bitrowwise_to_half_gpuRKN2at6TensorE", "_fused8bitrowwise_to_half_gpu::input"], [9, 0, 1, "_CPPv449_fused8bitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tKbKb", "_fused8bitrowwise_to_single_or_half_precision_gpu"], [9, 1, 1, "_CPPv449_fused8bitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tKbKb", "_fused8bitrowwise_to_single_or_half_precision_gpu::input"], [9, 1, 1, "_CPPv449_fused8bitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tKbKb", "_fused8bitrowwise_to_single_or_half_precision_gpu::output_dtype"], [9, 1, 1, "_CPPv449_fused8bitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tKbKb", "_fused8bitrowwise_to_single_or_half_precision_gpu::quant_padding_float_type"], [9, 1, 1, "_CPPv449_fused8bitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tKbKb", "_fused8bitrowwise_to_single_or_half_precision_gpu::scale_bias_last"], [9, 0, 1, "_CPPv430_fusednbitrowwise_to_float_gpuRKN2at6TensorEK7int64_t", "_fusednbitrowwise_to_float_gpu"], [9, 1, 1, "_CPPv430_fusednbitrowwise_to_float_gpuRKN2at6TensorEK7int64_t", "_fusednbitrowwise_to_float_gpu::bit_rate"], [9, 1, 1, "_CPPv430_fusednbitrowwise_to_float_gpuRKN2at6TensorEK7int64_t", "_fusednbitrowwise_to_float_gpu::input"], [9, 0, 1, "_CPPv429_fusednbitrowwise_to_half_gpuRKN2at6TensorEK7int64_t", "_fusednbitrowwise_to_half_gpu"], [9, 1, 1, "_CPPv429_fusednbitrowwise_to_half_gpuRKN2at6TensorEK7int64_t", "_fusednbitrowwise_to_half_gpu::bit_rate"], [9, 1, 1, "_CPPv429_fusednbitrowwise_to_half_gpuRKN2at6TensorEK7int64_t", "_fusednbitrowwise_to_half_gpu::input"], [9, 0, 1, "_CPPv449_fusednbitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tK7int64_t", "_fusednbitrowwise_to_single_or_half_precision_gpu"], [9, 1, 1, "_CPPv449_fusednbitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tK7int64_t", "_fusednbitrowwise_to_single_or_half_precision_gpu::bit_rate"], [9, 1, 1, "_CPPv449_fusednbitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tK7int64_t", "_fusednbitrowwise_to_single_or_half_precision_gpu::input"], [9, 1, 1, "_CPPv449_fusednbitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tK7int64_t", "_fusednbitrowwise_to_single_or_half_precision_gpu::output_dtype"], [9, 0, 1, "_CPPv429_half_to_fused8bitrowwise_gpuRK6Tensor", "_half_to_fused8bitrowwise_gpu"], [9, 1, 1, "_CPPv429_half_to_fused8bitrowwise_gpuRK6Tensor", "_half_to_fused8bitrowwise_gpu::input"], [9, 0, 1, "_CPPv429_half_to_fusednbitrowwise_gpuRKN2at6TensorEK7int64_t", "_half_to_fusednbitrowwise_gpu"], [9, 1, 1, "_CPPv429_half_to_fusednbitrowwise_gpuRKN2at6TensorEK7int64_t", "_half_to_fusednbitrowwise_gpu::bit_rate"], [9, 1, 1, "_CPPv429_half_to_fusednbitrowwise_gpuRKN2at6TensorEK7int64_t", "_half_to_fusednbitrowwise_gpu::input"], [9, 0, 1, "_CPPv418_hfp8_to_float_gpuRKN2at6TensorEK7int64_tK7int64_t", "_hfp8_to_float_gpu"], [9, 1, 1, "_CPPv418_hfp8_to_float_gpuRKN2at6TensorEK7int64_tK7int64_t", "_hfp8_to_float_gpu::ebits"], [9, 1, 1, "_CPPv418_hfp8_to_float_gpuRKN2at6TensorEK7int64_tK7int64_t", "_hfp8_to_float_gpu::exponent_bias"], [9, 1, 1, "_CPPv418_hfp8_to_float_gpuRKN2at6TensorEK7int64_tK7int64_t", "_hfp8_to_float_gpu::input"], [9, 0, 1, "_CPPv418_msfp_to_float_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_t", "_msfp_to_float_gpu"], [9, 1, 1, "_CPPv418_msfp_to_float_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_t", "_msfp_to_float_gpu::bias"], [9, 1, 1, "_CPPv418_msfp_to_float_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_t", "_msfp_to_float_gpu::ebits"], [9, 1, 1, "_CPPv418_msfp_to_float_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_t", "_msfp_to_float_gpu::input"], [9, 1, 1, "_CPPv418_msfp_to_float_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_t", "_msfp_to_float_gpu::mbits"], [9, 0, 1, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t", "_paddedFP8rowwise_to_float_gpu"], [9, 1, 1, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t", "_paddedFP8rowwise_to_float_gpu::forward"], [9, 1, 1, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t", "_paddedFP8rowwise_to_float_gpu::input"], [9, 1, 1, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t", "_paddedFP8rowwise_to_float_gpu::output_dtype"], [9, 1, 1, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t", "_paddedFP8rowwise_to_float_gpu::output_last_dim"], [9, 1, 1, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t", "_paddedFP8rowwise_to_float_gpu::row_dim"], [9, 0, 1, "_CPPv449_single_or_half_precision_to_fused8bitrowwise_gpuRK6Tensor", "_single_or_half_precision_to_fused8bitrowwise_gpu"], [9, 1, 1, "_CPPv449_single_or_half_precision_to_fused8bitrowwise_gpuRK6Tensor", "_single_or_half_precision_to_fused8bitrowwise_gpu::input"], [9, 0, 1, "_CPPv449_single_or_half_precision_to_fusednbitrowwise_gpuRK6TensorK7int64_t", "_single_or_half_precision_to_fusednbitrowwise_gpu"], [9, 1, 1, "_CPPv449_single_or_half_precision_to_fusednbitrowwise_gpuRK6TensorK7int64_t", "_single_or_half_precision_to_fusednbitrowwise_gpu::bit_rate"], [9, 1, 1, "_CPPv449_single_or_half_precision_to_fusednbitrowwise_gpuRK6TensorK7int64_t", "_single_or_half_precision_to_fusednbitrowwise_gpu::input"], [8, 0, 1, "_CPPv417all_to_one_deviceNSt6vectorIN2at6TensorEEEN2at6DeviceE", "all_to_one_device"], [8, 1, 1, "_CPPv417all_to_one_deviceNSt6vectorIN2at6TensorEEEN2at6DeviceE", "all_to_one_device::inputTensors"], [8, 1, 1, "_CPPv417all_to_one_deviceNSt6vectorIN2at6TensorEEEN2at6DeviceE", "all_to_one_device::target_device"], [5, 0, 1, "_CPPv431batched_dense_vec_jagged_2d_mulRK6TensorRK6TensorRK6Tensor", "batched_dense_vec_jagged_2d_mul"], [5, 1, 1, "_CPPv431batched_dense_vec_jagged_2d_mulRK6TensorRK6TensorRK6Tensor", "batched_dense_vec_jagged_2d_mul::a_offsets"], [5, 1, 1, "_CPPv431batched_dense_vec_jagged_2d_mulRK6TensorRK6TensorRK6Tensor", "batched_dense_vec_jagged_2d_mul::a_values"], [5, 1, 1, "_CPPv431batched_dense_vec_jagged_2d_mulRK6TensorRK6TensorRK6Tensor", "batched_dense_vec_jagged_2d_mul::v"], [2, 0, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKN3c108optionalI6TensorEERKN3c108optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda"], [2, 1, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKN3c108optionalI6TensorEERKN3c108optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::B_ofsets"], [2, 1, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKN3c108optionalI6TensorEERKN3c108optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::bounds_check_mode"], [2, 1, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKN3c108optionalI6TensorEERKN3c108optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::indices"], [2, 1, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKN3c108optionalI6TensorEERKN3c108optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::max_B"], [2, 1, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKN3c108optionalI6TensorEERKN3c108optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::offsets"], [2, 1, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKN3c108optionalI6TensorEERKN3c108optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::rows_per_table"], [2, 1, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKN3c108optionalI6TensorEERKN3c108optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::warning"], [2, 1, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKN3c108optionalI6TensorEERKN3c108optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::weights"], [5, 0, 1, "_CPPv415dense_to_jaggedRK6TensorRKNSt6vectorI6TensorEEN3c108optionalIN2at6SymIntEEE", "dense_to_jagged"], [5, 1, 1, "_CPPv415dense_to_jaggedRK6TensorRKNSt6vectorI6TensorEEN3c108optionalIN2at6SymIntEEE", "dense_to_jagged::dense"], [5, 1, 1, "_CPPv415dense_to_jaggedRK6TensorRKNSt6vectorI6TensorEEN3c108optionalIN2at6SymIntEEE", "dense_to_jagged::offsets"], [5, 1, 1, "_CPPv415dense_to_jaggedRK6TensorRKNSt6vectorI6TensorEEN3c108optionalIN2at6SymIntEEE", "dense_to_jagged::total_L"], [11, 0, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda"], [11, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::D_offsets"], [11, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::cache_index_table_map"], [11, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::gather_cache_stats"], [11, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::hash_size_cumsum"], [11, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::linear_cache_indices"], [11, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::lru_state"], [11, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::lxu_cache_miss_timestamp"], [11, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::lxu_cache_state"], [11, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::lxu_cache_weights"], [11, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::row_alignment"], [11, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::time_stamp"], [11, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::total_cache_hash_size"], [11, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::uvm_cache_stats"], [11, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::weights"], [11, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::weights_offsets"], [11, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::weights_tys"], [11, 0, 1, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lxu_cache_lookup_cuda"], [11, 1, 1, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lxu_cache_lookup_cuda::gather_cache_stats"], [11, 1, 1, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lxu_cache_lookup_cuda::invalid_index"], [11, 1, 1, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lxu_cache_lookup_cuda::linear_cache_indices"], [11, 1, 1, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lxu_cache_lookup_cuda::lxu_cache_state"], [11, 1, 1, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lxu_cache_lookup_cuda::uvm_cache_stats"], [21, 0, 1, "_CPPv4I0_NSt6size_tEE14example_method7int32_t1Tf", "example_method"], [21, 2, 1, "_CPPv4I0_NSt6size_tEE14example_method7int32_t1Tf", "example_method::Alignment"], [21, 2, 1, "_CPPv4I0_NSt6size_tEE14example_method7int32_t1Tf", "example_method::T"], [21, 1, 1, "_CPPv4I0_NSt6size_tEE14example_method7int32_t1Tf", "example_method::param1"], [21, 1, 1, "_CPPv4I0_NSt6size_tEE14example_method7int32_t1Tf", "example_method::param2"], [10, 0, 1, "_CPPv431expand_into_jagged_permute_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_t", "expand_into_jagged_permute_cuda"], [10, 1, 1, "_CPPv431expand_into_jagged_permute_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_t", "expand_into_jagged_permute_cuda::input_offsets"], [10, 1, 1, "_CPPv431expand_into_jagged_permute_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_t", "expand_into_jagged_permute_cuda::output_offsets"], [10, 1, 1, "_CPPv431expand_into_jagged_permute_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_t", "expand_into_jagged_permute_cuda::output_size"], [10, 1, 1, "_CPPv431expand_into_jagged_permute_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_t", "expand_into_jagged_permute_cuda::permute"], [9, 0, 1, "_CPPv437float_or_half_to_fused8bitrowwise_cpuRK6Tensor", "float_or_half_to_fused8bitrowwise_cpu"], [9, 1, 1, "_CPPv437float_or_half_to_fused8bitrowwise_cpuRK6Tensor", "float_or_half_to_fused8bitrowwise_cpu::input"], [9, 0, 1, "_CPPv423float_to_FP8rowwise_cpuRK6Tensorb", "float_to_FP8rowwise_cpu"], [9, 1, 1, "_CPPv423float_to_FP8rowwise_cpuRK6Tensorb", "float_to_FP8rowwise_cpu::forward"], [9, 1, 1, "_CPPv423float_to_FP8rowwise_cpuRK6Tensorb", "float_to_FP8rowwise_cpu::input"], [9, 0, 1, "_CPPv429float_to_fused8bitrowwise_cpuRK6Tensor", "float_to_fused8bitrowwise_cpu"], [9, 1, 1, "_CPPv429float_to_fused8bitrowwise_cpuRK6Tensor", "float_to_fused8bitrowwise_cpu::input"], [9, 0, 1, "_CPPv429fused8bitrowwise_to_float_cpuRK6Tensor", "fused8bitrowwise_to_float_cpu"], [9, 1, 1, "_CPPv429fused8bitrowwise_to_float_cpuRK6Tensor", "fused8bitrowwise_to_float_cpu::input"], [9, 0, 1, "_CPPv437fused8bitrowwise_to_float_or_half_cpuRK6TensorK7int64_tKbKb", "fused8bitrowwise_to_float_or_half_cpu"], [9, 1, 1, "_CPPv437fused8bitrowwise_to_float_or_half_cpuRK6TensorK7int64_tKbKb", "fused8bitrowwise_to_float_or_half_cpu::input"], [9, 1, 1, "_CPPv437fused8bitrowwise_to_float_or_half_cpuRK6TensorK7int64_tKbKb", "fused8bitrowwise_to_float_or_half_cpu::output_dtype"], [9, 1, 1, "_CPPv437fused8bitrowwise_to_float_or_half_cpuRK6TensorK7int64_tKbKb", "fused8bitrowwise_to_float_or_half_cpu::quant_padding_float_type"], [9, 1, 1, "_CPPv437fused8bitrowwise_to_float_or_half_cpuRK6TensorK7int64_tKbKb", "fused8bitrowwise_to_float_or_half_cpu::scale_bias_last"], [9, 0, 1, "_CPPv428fused8bitrowwise_to_half_cpuRK6Tensor", "fused8bitrowwise_to_half_cpu"], [9, 1, 1, "_CPPv428fused8bitrowwise_to_half_cpuRK6Tensor", "fused8bitrowwise_to_half_cpu::input"], [9, 0, 1, "_CPPv429fusednbitrowwise_to_float_cpuRK6TensorK7int64_t", "fusednbitrowwise_to_float_cpu"], [9, 1, 1, "_CPPv429fusednbitrowwise_to_float_cpuRK6TensorK7int64_t", "fusednbitrowwise_to_float_cpu::bit_rate"], [9, 1, 1, "_CPPv429fusednbitrowwise_to_float_cpuRK6TensorK7int64_t", "fusednbitrowwise_to_float_cpu::input"], [9, 0, 1, "_CPPv437fusednbitrowwise_to_float_or_half_cpuRK6TensorK7int64_tK7int64_t", "fusednbitrowwise_to_float_or_half_cpu"], [9, 1, 1, "_CPPv437fusednbitrowwise_to_float_or_half_cpuRK6TensorK7int64_tK7int64_t", "fusednbitrowwise_to_float_or_half_cpu::bit_rate"], [9, 1, 1, "_CPPv437fusednbitrowwise_to_float_or_half_cpuRK6TensorK7int64_tK7int64_t", "fusednbitrowwise_to_float_or_half_cpu::input"], [9, 1, 1, "_CPPv437fusednbitrowwise_to_float_or_half_cpuRK6TensorK7int64_tK7int64_t", "fusednbitrowwise_to_float_or_half_cpu::output_dtype"], [9, 0, 1, "_CPPv428fusednbitrowwise_to_half_cpuRK6TensorK7int64_t", "fusednbitrowwise_to_half_cpu"], [9, 1, 1, "_CPPv428fusednbitrowwise_to_half_cpuRK6TensorK7int64_t", "fusednbitrowwise_to_half_cpu::bit_rate"], [9, 1, 1, "_CPPv428fusednbitrowwise_to_half_cpuRK6TensorK7int64_t", "fusednbitrowwise_to_half_cpu::input"], [10, 0, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu"], [10, 1, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::bin_boundaries"], [10, 1, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::bin_ctr_in_use_after"], [10, 1, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::bin_ctr_weight_value"], [10, 1, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::bin_num_examples"], [10, 1, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::bin_num_positives"], [10, 1, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::logit"], [10, 1, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::num_segments"], [10, 1, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::positive_weight"], [10, 1, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::segment_lengths"], [10, 1, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::segment_value"], [11, 0, 1, "_CPPv423get_unique_indices_cudaN2at6TensorE7int64_tb", "get_unique_indices_cuda"], [11, 1, 1, "_CPPv423get_unique_indices_cudaN2at6TensorE7int64_tb", "get_unique_indices_cuda::compute_count"], [11, 1, 1, "_CPPv423get_unique_indices_cudaN2at6TensorE7int64_tb", "get_unique_indices_cuda::linear_indices"], [11, 1, 1, "_CPPv423get_unique_indices_cudaN2at6TensorE7int64_tb", "get_unique_indices_cuda::max_indices"], [3, 0, 1, "_CPPv415gqa_attn_splitkRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_tKb", "gqa_attn_splitk"], [3, 1, 1, "_CPPv415gqa_attn_splitkRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_tKb", "gqa_attn_splitk::XQ"], [3, 1, 1, "_CPPv415gqa_attn_splitkRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_tKb", "gqa_attn_splitk::cache_K"], [3, 1, 1, "_CPPv415gqa_attn_splitkRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_tKb", "gqa_attn_splitk::cache_V"], [3, 1, 1, "_CPPv415gqa_attn_splitkRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_tKb", "gqa_attn_splitk::num_int4_kv_groups"], [3, 1, 1, "_CPPv415gqa_attn_splitkRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_tKb", "gqa_attn_splitk::num_split_ks"], [3, 1, 1, "_CPPv415gqa_attn_splitkRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_tKb", "gqa_attn_splitk::qk_scale"], [3, 1, 1, "_CPPv415gqa_attn_splitkRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_tKb", "gqa_attn_splitk::seq_positions"], [3, 1, 1, "_CPPv415gqa_attn_splitkRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_tKb", "gqa_attn_splitk::use_tensor_cores"], [9, 0, 1, "_CPPv428half_to_fused8bitrowwise_cpuRK6Tensor", "half_to_fused8bitrowwise_cpu"], [9, 1, 1, "_CPPv428half_to_fused8bitrowwise_cpuRK6Tensor", "half_to_fused8bitrowwise_cpu::input"], [10, 0, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu"], [10, 1, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::bin_ctr_in_use_after"], [10, 1, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::bin_ctr_weight_value"], [10, 1, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::bin_num_examples"], [10, 1, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::bin_num_positives"], [10, 1, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::logit"], [10, 1, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::lower_bound"], [10, 1, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::positive_weight"], [10, 1, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::upper_bound"], [11, 0, 1, "_CPPv419host_lxu_cache_slot7int64_t7int64_t", "host_lxu_cache_slot"], [11, 1, 1, "_CPPv419host_lxu_cache_slot7int64_t7int64_t", "host_lxu_cache_slot::C"], [11, 1, 1, "_CPPv419host_lxu_cache_slot7int64_t7int64_t", "host_lxu_cache_slot::h_in"], [2, 0, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::D_offsets"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::dev_weights"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::fp8_exponent_bias"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::fp8_exponent_bits"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::indice_weights"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::indices"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::lxu_cache_locations"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::lxu_cache_weights"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::max_float16_D"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::max_float32_D"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::max_float8_D"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::max_int2_D"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::max_int4_D"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::max_int8_D"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::offsets"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::output_dtype"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::pooling_mode"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::row_alignment"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::total_D"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::uvm_weights"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::weights_offsets"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::weights_placements"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::weights_tys"], [2, 0, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::D_offsets"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::dev_weights"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::fp8_exponent_bias"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::fp8_exponent_bits"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::indice_weights"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::indices"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::lxu_cache_locations"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::lxu_cache_weights"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::max_float16_D"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::max_float32_D"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::max_float8_D"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::max_int2_D"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::max_int4_D"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::max_int8_D"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::offsets"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::output_dtype"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::pooling_mode"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::row_alignment"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::total_D"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::uvm_weights"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::weights_offsets"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::weights_placements"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::weights_tys"], [2, 0, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::D_offsets"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::cache_hash_size_cumsum"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::cache_index_table_map"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::dev_weights"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::fp8_exponent_bias"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::fp8_exponent_bits"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::indice_weights"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::indices"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::lxu_cache_locations"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::lxu_cache_state"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::lxu_cache_weights"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::lxu_state"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::max_float16_D"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::max_float32_D"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::max_float8_D"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::max_int2_D"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::max_int4_D"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::max_int8_D"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::offsets"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::output_dtype"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::pooling_mode"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::row_alignment"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::total_D"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::total_cache_hash_size"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::uvm_weights"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::weights_offsets"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::weights_placements"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::weights_tys"], [2, 0, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::D_offsets"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::cache_hash_size_cumsum"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::cache_index_table_map"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::dev_weights"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::fp8_exponent_bias"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::fp8_exponent_bits"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::indice_weights"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::indices"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::lxu_cache_locations"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::lxu_cache_state"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::lxu_cache_weights"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::lxu_state"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::max_float16_D"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::max_float32_D"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::max_float8_D"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::max_int2_D"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::max_int4_D"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::max_int8_D"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::offsets"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::output_dtype"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::pooling_mode"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::row_alignment"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::total_D"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::total_cache_hash_size"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::uvm_weights"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::weights_offsets"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::weights_placements"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::weights_tys"], [7, 0, 1, "_CPPv413is_uvm_tensorRK6Tensor", "is_uvm_tensor"], [7, 1, 1, "_CPPv413is_uvm_tensorRK6Tensor", "is_uvm_tensor::self"], [5, 0, 1, "_CPPv418jagged_1d_to_dense6Tensor6TensorN3c106SymIntE7int64_t", "jagged_1d_to_dense"], [5, 1, 1, "_CPPv418jagged_1d_to_dense6Tensor6TensorN3c106SymIntE7int64_t", "jagged_1d_to_dense::max_L"], [5, 1, 1, "_CPPv418jagged_1d_to_dense6Tensor6TensorN3c106SymIntE7int64_t", "jagged_1d_to_dense::offsets"], [5, 1, 1, "_CPPv418jagged_1d_to_dense6Tensor6TensorN3c106SymIntE7int64_t", "jagged_1d_to_dense::padding_value"], [5, 1, 1, "_CPPv418jagged_1d_to_dense6Tensor6TensorN3c106SymIntE7int64_t", "jagged_1d_to_dense::values"], [5, 0, 1, "_CPPv418jagged_2d_to_dense6Tensor6TensorN3c106SymIntE", "jagged_2d_to_dense"], [5, 1, 1, "_CPPv418jagged_2d_to_dense6Tensor6TensorN3c106SymIntE", "jagged_2d_to_dense::max_sequence_length"], [5, 1, 1, "_CPPv418jagged_2d_to_dense6Tensor6TensorN3c106SymIntE", "jagged_2d_to_dense::offsets"], [5, 1, 1, "_CPPv418jagged_2d_to_dense6Tensor6TensorN3c106SymIntE", "jagged_2d_to_dense::values"], [5, 0, 1, "_CPPv428jagged_dense_elementwise_addRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add"], [5, 1, 1, "_CPPv428jagged_dense_elementwise_addRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add::x_offsets"], [5, 1, 1, "_CPPv428jagged_dense_elementwise_addRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add::x_values"], [5, 1, 1, "_CPPv428jagged_dense_elementwise_addRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add::y"], [5, 0, 1, "_CPPv442jagged_dense_elementwise_add_jagged_outputRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output"], [5, 1, 1, "_CPPv442jagged_dense_elementwise_add_jagged_outputRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output::x_offsets"], [5, 1, 1, "_CPPv442jagged_dense_elementwise_add_jagged_outputRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output::x_values"], [5, 1, 1, "_CPPv442jagged_dense_elementwise_add_jagged_outputRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output::y"], [5, 0, 1, "_CPPv447jagged_dense_elementwise_add_jagged_output_cudaRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output_cuda"], [5, 1, 1, "_CPPv447jagged_dense_elementwise_add_jagged_output_cudaRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output_cuda::x_offsets"], [5, 1, 1, "_CPPv447jagged_dense_elementwise_add_jagged_output_cudaRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output_cuda::x_values"], [5, 1, 1, "_CPPv447jagged_dense_elementwise_add_jagged_output_cudaRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output_cuda::y"], [5, 0, 1, "_CPPv428jagged_dense_elementwise_mulRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_mul"], [5, 1, 1, "_CPPv428jagged_dense_elementwise_mulRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_mul::x_offsets"], [5, 1, 1, "_CPPv428jagged_dense_elementwise_mulRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_mul::x_values"], [5, 1, 1, "_CPPv428jagged_dense_elementwise_mulRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_mul::y"], [5, 0, 1, "_CPPv422jagged_to_padded_denseRK6TensorRKNSt6vectorI6TensorEEKN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense"], [5, 1, 1, "_CPPv422jagged_to_padded_denseRK6TensorRKNSt6vectorI6TensorEEKN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense::max_lengths"], [5, 1, 1, "_CPPv422jagged_to_padded_denseRK6TensorRKNSt6vectorI6TensorEEKN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense::offsets"], [5, 1, 1, "_CPPv422jagged_to_padded_denseRK6TensorRKNSt6vectorI6TensorEEKN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense::padding_value"], [5, 1, 1, "_CPPv422jagged_to_padded_denseRK6TensorRKNSt6vectorI6TensorEEKN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense::values"], [5, 0, 1, "_CPPv430jagged_to_padded_dense_forwardRK6TensorRKNSt6vectorI6TensorEEN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense_forward"], [5, 1, 1, "_CPPv430jagged_to_padded_dense_forwardRK6TensorRKNSt6vectorI6TensorEEN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense_forward::max_lengths"], [5, 1, 1, "_CPPv430jagged_to_padded_dense_forwardRK6TensorRKNSt6vectorI6TensorEEN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense_forward::offsets"], [5, 1, 1, "_CPPv430jagged_to_padded_dense_forwardRK6TensorRKNSt6vectorI6TensorEEN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense_forward::padding_value"], [5, 1, 1, "_CPPv430jagged_to_padded_dense_forwardRK6TensorRKNSt6vectorI6TensorEEN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense_forward::values"], [11, 0, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda"], [11, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::D_offsets"], [11, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::cache_hash_size_cumsum"], [11, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::cache_index_table_map"], [11, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::lfu_state"], [11, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::linear_cache_indices"], [11, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::lxu_cache_state"], [11, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::lxu_cache_weights"], [11, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::row_alignment"], [11, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::total_cache_hash_size"], [11, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::weights"], [11, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::weights_offsets"], [11, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::weights_tys"], [11, 0, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda"], [11, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::D_offsets"], [11, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::cache_hash_size_cumsum"], [11, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::cache_index_table_map"], [11, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::lfu_state"], [11, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::linear_cache_indices"], [11, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::lxu_cache_state"], [11, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::lxu_cache_weights"], [11, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::stochastic_rounding"], [11, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::total_cache_hash_size"], [11, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::weights"], [11, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::weights_offsets"], [11, 0, 1, "_CPPv428linearize_cache_indices_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN3c108optionalIN2at6TensorEEEK7int64_t", "linearize_cache_indices_cuda"], [11, 1, 1, "_CPPv428linearize_cache_indices_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN3c108optionalIN2at6TensorEEEK7int64_t", "linearize_cache_indices_cuda::B_offsets"], [11, 1, 1, "_CPPv428linearize_cache_indices_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN3c108optionalIN2at6TensorEEEK7int64_t", "linearize_cache_indices_cuda::cache_hash_size_cumsum"], [11, 1, 1, "_CPPv428linearize_cache_indices_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN3c108optionalIN2at6TensorEEEK7int64_t", "linearize_cache_indices_cuda::indices"], [11, 1, 1, "_CPPv428linearize_cache_indices_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN3c108optionalIN2at6TensorEEEK7int64_t", "linearize_cache_indices_cuda::max_B"], [11, 1, 1, "_CPPv428linearize_cache_indices_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN3c108optionalIN2at6TensorEEEK7int64_t", "linearize_cache_indices_cuda::offsets"], [11, 0, 1, "_CPPv441linearize_cache_indices_from_row_idx_cudaN2at6TensorEN2at6TensorEN2at6TensorE", "linearize_cache_indices_from_row_idx_cuda"], [11, 1, 1, "_CPPv441linearize_cache_indices_from_row_idx_cudaN2at6TensorEN2at6TensorEN2at6TensorE", "linearize_cache_indices_from_row_idx_cuda::cache_hash_size_cumsum"], [11, 1, 1, "_CPPv441linearize_cache_indices_from_row_idx_cudaN2at6TensorEN2at6TensorEN2at6TensorE", "linearize_cache_indices_from_row_idx_cuda::update_row_indices"], [11, 1, 1, "_CPPv441linearize_cache_indices_from_row_idx_cudaN2at6TensorEN2at6TensorEN2at6TensorE", "linearize_cache_indices_from_row_idx_cuda::update_table_indices"], [11, 0, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE", "lru_cache_find_uncached_cuda"], [11, 1, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE", "lru_cache_find_uncached_cuda::gather_cache_stats"], [11, 1, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE", "lru_cache_find_uncached_cuda::lock_cache_line"], [11, 1, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE", "lru_cache_find_uncached_cuda::lru_state"], [11, 1, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE", "lru_cache_find_uncached_cuda::lxu_cache_locking_counter"], [11, 1, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE", "lru_cache_find_uncached_cuda::lxu_cache_state"], [11, 1, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE", "lru_cache_find_uncached_cuda::max_indices"], [11, 1, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE", "lru_cache_find_uncached_cuda::time_stamp"], [11, 1, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE", "lru_cache_find_uncached_cuda::unique_indices"], [11, 1, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE", "lru_cache_find_uncached_cuda::unique_indices_length"], [11, 1, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE", "lru_cache_find_uncached_cuda::uvm_cache_stats"], [11, 0, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda"], [11, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::D_offsets"], [11, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::cache_index_table_map"], [11, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::gather_cache_stats"], [11, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::hash_size_cumsum"], [11, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::linear_cache_indices"], [11, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::lru_state"], [11, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::lxu_cache_state"], [11, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::lxu_cache_weights"], [11, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::row_alignment"], [11, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::time_stamp"], [11, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::total_cache_hash_size"], [11, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::uvm_cache_stats"], [11, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::weights"], [11, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::weights_offsets"], [11, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::weights_tys"], [11, 0, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda"], [11, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::D_offsets"], [11, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::cache_index_table_map"], [11, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::gather_cache_stats"], [11, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::hash_size_cumsum"], [11, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::linear_cache_indices"], [11, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::lock_cache_line"], [11, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::lru_state"], [11, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::lxu_cache_locking_counter"], [11, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::lxu_cache_state"], [11, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::lxu_cache_weights"], [11, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::stochastic_rounding"], [11, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::time_stamp"], [11, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::total_cache_hash_size"], [11, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::uvm_cache_stats"], [11, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::weights"], [11, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::weights_offsets"], [11, 0, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda"], [11, 1, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::D_offsets"], [11, 1, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::cache_hash_size_cumsum"], [11, 1, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::cache_index_table_map"], [11, 1, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::lxu_cache_state"], [11, 1, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::lxu_cache_weights"], [11, 1, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::stochastic_rounding"], [11, 1, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::total_D"], [11, 1, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::uvm_weights"], [11, 1, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::weights_offsets"], [11, 0, 1, "_CPPv431lxu_cache_locations_update_cudaN2at6TensorEN2at6TensorEN3c108optionalIN2at6TensorEEE", "lxu_cache_locations_update_cuda"], [11, 1, 1, "_CPPv431lxu_cache_locations_update_cudaN2at6TensorEN2at6TensorEN3c108optionalIN2at6TensorEEE", "lxu_cache_locations_update_cuda::lxu_cache_locations"], [11, 1, 1, "_CPPv431lxu_cache_locations_update_cudaN2at6TensorEN2at6TensorEN3c108optionalIN2at6TensorEEE", "lxu_cache_locations_update_cuda::lxu_cache_locations_new"], [11, 1, 1, "_CPPv431lxu_cache_locations_update_cudaN2at6TensorEN2at6TensorEN3c108optionalIN2at6TensorEEE", "lxu_cache_locations_update_cuda::num_uniq_cache_indices"], [11, 0, 1, "_CPPv440lxu_cache_locking_counter_decrement_cudaN2at6TensorEN2at6TensorE", "lxu_cache_locking_counter_decrement_cuda"], [11, 1, 1, "_CPPv440lxu_cache_locking_counter_decrement_cudaN2at6TensorEN2at6TensorE", "lxu_cache_locking_counter_decrement_cuda::lxu_cache_locations"], [11, 1, 1, "_CPPv440lxu_cache_locking_counter_decrement_cudaN2at6TensorEN2at6TensorE", "lxu_cache_locking_counter_decrement_cuda::lxu_cache_locking_counter"], [11, 0, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda"], [11, 1, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::gather_cache_stats"], [11, 1, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::invalid_index"], [11, 1, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::linear_cache_indices"], [11, 1, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::lxu_cache_locations_output"], [11, 1, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::lxu_cache_state"], [11, 1, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::num_uniq_cache_indices"], [11, 1, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::uvm_cache_stats"], [7, 0, 1, "_CPPv422new_host_mapped_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_host_mapped_tensor"], [7, 1, 1, "_CPPv422new_host_mapped_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_host_mapped_tensor::self"], [7, 1, 1, "_CPPv422new_host_mapped_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_host_mapped_tensor::sizes"], [7, 0, 1, "_CPPv418new_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_managed_tensor"], [7, 1, 1, "_CPPv418new_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_managed_tensor::self"], [7, 1, 1, "_CPPv418new_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_managed_tensor::sizes"], [7, 0, 1, "_CPPv423new_managed_tensor_metaRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_managed_tensor_meta"], [7, 1, 1, "_CPPv423new_managed_tensor_metaRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_managed_tensor_meta::self"], [7, 1, 1, "_CPPv423new_managed_tensor_metaRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_managed_tensor_meta::sizes"], [7, 0, 1, "_CPPv418new_unified_tensorRK6TensorRKNSt6vectorINSt7int64_tEEEb", "new_unified_tensor"], [7, 1, 1, "_CPPv418new_unified_tensorRK6TensorRKNSt6vectorINSt7int64_tEEEb", "new_unified_tensor::is_host_mapped"], [7, 1, 1, "_CPPv418new_unified_tensorRK6TensorRKNSt6vectorINSt7int64_tEEEb", "new_unified_tensor::self"], [7, 1, 1, "_CPPv418new_unified_tensorRK6TensorRKNSt6vectorINSt7int64_tEEEb", "new_unified_tensor::sizes"], [7, 0, 1, "_CPPv426new_vanilla_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_vanilla_managed_tensor"], [7, 1, 1, "_CPPv426new_vanilla_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_vanilla_managed_tensor::self"], [7, 1, 1, "_CPPv426new_vanilla_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_vanilla_managed_tensor::sizes"], [4, 0, 1, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t", "padding_fused_tbe_input_combine_cpu"], [4, 1, 1, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t", "padding_fused_tbe_input_combine_cpu::batch_size"], [4, 1, 1, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t", "padding_fused_tbe_input_combine_cpu::include_last_offsets"], [4, 1, 1, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t", "padding_fused_tbe_input_combine_cpu::indices_list"], [4, 1, 1, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t", "padding_fused_tbe_input_combine_cpu::offsets_list"], [4, 1, 1, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t", "padding_fused_tbe_input_combine_cpu::per_sample_weights"], [8, 0, 1, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad"], [8, 1, 1, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad::inv_offset_dim_list"], [8, 1, 1, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad::inv_permute_list"], [8, 1, 1, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad::offset_dim_list"], [8, 1, 1, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad::permute_list"], [8, 1, 1, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad::pooled_embs"], [8, 0, 1, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_cpu"], [8, 1, 1, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_cpu::inv_offset_dim_list"], [8, 1, 1, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_cpu::inv_permute_list"], [8, 1, 1, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_cpu::offset_dim_list"], [8, 1, 1, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_cpu::permute_list"], [8, 1, 1, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_cpu::pooled_embs"], [8, 0, 1, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_gpu"], [8, 1, 1, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_gpu::inv_offset_dim_list"], [8, 1, 1, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_gpu::inv_permute_list"], [8, 1, 1, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_gpu::offset_dim_list"], [8, 1, 1, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_gpu::permute_list"], [8, 1, 1, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_gpu::pooled_embs"], [8, 0, 1, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_cpu"], [8, 1, 1, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_cpu::inv_offset_dim_list"], [8, 1, 1, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_cpu::inv_permute_list"], [8, 1, 1, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_cpu::offset_dim_list"], [8, 1, 1, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_cpu::permute_list"], [8, 1, 1, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_cpu::pooled_embs"], [8, 0, 1, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_gpu"], [8, 1, 1, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_gpu::inv_offset_dim_list"], [8, 1, 1, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_gpu::inv_permute_list"], [8, 1, 1, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_gpu::offset_dim_list"], [8, 1, 1, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_gpu::permute_list"], [8, 1, 1, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_gpu::pooled_embs"], [8, 0, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl"], [8, 1, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl::allow_duplicates"], [8, 1, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl::inv_offset_dim_list"], [8, 1, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl::inv_permute_list"], [8, 1, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl::offset_dim_list"], [8, 1, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl::permute_list"], [8, 1, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl::pooled_embs"], [8, 0, 1, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_cpu"], [8, 1, 1, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_cpu::inv_offset_dim_list"], [8, 1, 1, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_cpu::inv_permute_list"], [8, 1, 1, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_cpu::offset_dim_list"], [8, 1, 1, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_cpu::permute_list"], [8, 1, 1, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_cpu::pooled_embs"], [8, 0, 1, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_gpu"], [8, 1, 1, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_gpu::inv_offset_dim_list"], [8, 1, 1, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_gpu::inv_permute_list"], [8, 1, 1, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_gpu::offset_dim_list"], [8, 1, 1, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_gpu::permute_list"], [8, 1, 1, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_gpu::pooled_embs"], [2, 0, 1, "_CPPv423pruned_array_lookup_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cpu"], [2, 1, 1, "_CPPv423pruned_array_lookup_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cpu::index_remappings"], [2, 1, 1, "_CPPv423pruned_array_lookup_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cpu::index_remappings_offsets"], [2, 1, 1, "_CPPv423pruned_array_lookup_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cpu::indices"], [2, 1, 1, "_CPPv423pruned_array_lookup_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cpu::offsets"], [2, 0, 1, "_CPPv424pruned_array_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cuda"], [2, 1, 1, "_CPPv424pruned_array_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cuda::index_remappings"], [2, 1, 1, "_CPPv424pruned_array_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cuda::index_remappings_offsets"], [2, 1, 1, "_CPPv424pruned_array_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cuda::indices"], [2, 1, 1, "_CPPv424pruned_array_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cuda::offsets"], [2, 0, 1, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_insert_unweighted_cpu"], [2, 1, 1, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_insert_unweighted_cpu::dense_indices"], [2, 1, 1, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_insert_unweighted_cpu::hash_table"], [2, 1, 1, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_insert_unweighted_cpu::hash_table_offsets"], [2, 1, 1, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_insert_unweighted_cpu::indices"], [2, 1, 1, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_insert_unweighted_cpu::offsets"], [2, 0, 1, "_CPPv426pruned_hashmap_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_cuda"], [2, 1, 1, "_CPPv426pruned_hashmap_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_cuda::hash_table"], [2, 1, 1, "_CPPv426pruned_hashmap_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_cuda::hash_table_offsets"], [2, 1, 1, "_CPPv426pruned_hashmap_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_cuda::indices"], [2, 1, 1, "_CPPv426pruned_hashmap_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_cuda::offsets"], [2, 0, 1, "_CPPv436pruned_hashmap_lookup_unweighted_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_unweighted_cpu"], [2, 1, 1, "_CPPv436pruned_hashmap_lookup_unweighted_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_unweighted_cpu::hash_table"], [2, 1, 1, "_CPPv436pruned_hashmap_lookup_unweighted_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_unweighted_cpu::hash_table_offsets"], [2, 1, 1, "_CPPv436pruned_hashmap_lookup_unweighted_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_unweighted_cpu::indices"], [2, 1, 1, "_CPPv436pruned_hashmap_lookup_unweighted_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_unweighted_cpu::offsets"], [6, 0, 1, "_CPPv432recat_embedding_grad_output_cuda6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_cuda"], [6, 1, 1, "_CPPv432recat_embedding_grad_output_cuda6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_cuda::grad_output"], [6, 1, 1, "_CPPv432recat_embedding_grad_output_cuda6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_cuda::num_features_per_rank"], [6, 0, 1, "_CPPv446recat_embedding_grad_output_mixed_D_batch_cudaRK6TensorRK6TensorRK6Tensor", "recat_embedding_grad_output_mixed_D_batch_cuda"], [6, 1, 1, "_CPPv446recat_embedding_grad_output_mixed_D_batch_cudaRK6TensorRK6TensorRK6Tensor", "recat_embedding_grad_output_mixed_D_batch_cuda::cumsum_dim_sum_per_rank"], [6, 1, 1, "_CPPv446recat_embedding_grad_output_mixed_D_batch_cudaRK6TensorRK6TensorRK6Tensor", "recat_embedding_grad_output_mixed_D_batch_cuda::dim_sum_per_rank"], [6, 1, 1, "_CPPv446recat_embedding_grad_output_mixed_D_batch_cudaRK6TensorRK6TensorRK6Tensor", "recat_embedding_grad_output_mixed_D_batch_cuda::grad_output"], [6, 0, 1, "_CPPv439recat_embedding_grad_output_mixed_D_cpuRK6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_mixed_D_cpu"], [6, 1, 1, "_CPPv439recat_embedding_grad_output_mixed_D_cpuRK6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_mixed_D_cpu::dim_sum_per_rank"], [6, 1, 1, "_CPPv439recat_embedding_grad_output_mixed_D_cpuRK6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_mixed_D_cpu::grad_output"], [6, 0, 1, "_CPPv440recat_embedding_grad_output_mixed_D_cudaRK6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_mixed_D_cuda"], [6, 1, 1, "_CPPv440recat_embedding_grad_output_mixed_D_cudaRK6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_mixed_D_cuda::dim_sum_per_rank"], [6, 1, 1, "_CPPv440recat_embedding_grad_output_mixed_D_cudaRK6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_mixed_D_cuda::grad_output"], [0, 0, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::A_SYMMETRIC"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::BIAS_TYPE"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::B_SYMMETRIC"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::DIRECT"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::FUSE_RELU"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::HAS_BIAS"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::Q_GRAN"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::block"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::inp"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::ld_in"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::ld_out"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::out"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::r"], [0, 0, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::A_SYMMETRIC"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::BIAS_TYPE"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::B_SYMMETRIC"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::C_PER_G"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::FUSE_RELU"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::HAS_BIAS"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::Q_GRAN"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::block"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::inp"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::ld_in"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::ld_out"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::out"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::r"], [11, 0, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda"], [11, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::D_offsets"], [11, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::buffer_ids"], [11, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::cache_hash_size_cumsum"], [11, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::dev_weights"], [11, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::logical_table_ids"], [11, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::lxu_cache_state"], [11, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::lxu_cache_weights"], [11, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::momentum1_dev"], [11, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::momentum1_offsets"], [11, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::momentum1_placements"], [11, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::momentum1_uvm"], [11, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::pruned_indices"], [11, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::pruned_indices_offsets"], [11, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::total_cache_hash_size"], [11, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::uvm_weights"], [11, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::weights_offsets"], [11, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::weights_placements"], [4, 0, 1, "_CPPv421tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE", "tbe_input_combine_cpu"], [4, 1, 1, "_CPPv421tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE", "tbe_input_combine_cpu::include_last_offsets"], [4, 1, 1, "_CPPv421tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE", "tbe_input_combine_cpu::indices_list"], [4, 1, 1, "_CPPv421tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE", "tbe_input_combine_cpu::offsets_list"], [4, 1, 1, "_CPPv421tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE", "tbe_input_combine_cpu::per_sample_weights"], [7, 0, 1, "_CPPv419uvm_cuda_mem_adviseRK6Tensor7int64_t", "uvm_cuda_mem_advise"], [7, 1, 1, "_CPPv419uvm_cuda_mem_adviseRK6Tensor7int64_t", "uvm_cuda_mem_advise::cuda_memory_advise"], [7, 1, 1, "_CPPv419uvm_cuda_mem_adviseRK6Tensor7int64_t", "uvm_cuda_mem_advise::self"], [7, 0, 1, "_CPPv427uvm_cuda_mem_prefetch_asyncRK6TensorN3c108optionalI6TensorEE", "uvm_cuda_mem_prefetch_async"], [7, 1, 1, "_CPPv427uvm_cuda_mem_prefetch_asyncRK6TensorN3c108optionalI6TensorEE", "uvm_cuda_mem_prefetch_async::device_t"], [7, 1, 1, "_CPPv427uvm_cuda_mem_prefetch_asyncRK6TensorN3c108optionalI6TensorEE", "uvm_cuda_mem_prefetch_async::self"], [7, 0, 1, "_CPPv424uvm_mem_advice_dont_forkRK6Tensor", "uvm_mem_advice_dont_fork"], [7, 1, 1, "_CPPv424uvm_mem_advice_dont_forkRK6Tensor", "uvm_mem_advice_dont_fork::self"], [7, 0, 1, "_CPPv411uvm_storageRK6Tensor", "uvm_storage"], [7, 1, 1, "_CPPv411uvm_storageRK6Tensor", "uvm_storage::self"], [7, 0, 1, "_CPPv410uvm_to_cpuRK6Tensor", "uvm_to_cpu"], [7, 1, 1, "_CPPv410uvm_to_cpuRK6Tensor", "uvm_to_cpu::self"], [7, 0, 1, "_CPPv416uvm_to_cpu_cloneRK6Tensor", "uvm_to_cpu_clone"], [7, 1, 1, "_CPPv416uvm_to_cpu_cloneRK6Tensor", "uvm_to_cpu_clone::self"], [7, 0, 1, "_CPPv413uvm_to_deviceRK6TensorRK6Tensor", "uvm_to_device"], [7, 1, 1, "_CPPv413uvm_to_deviceRK6TensorRK6Tensor", "uvm_to_device::prototype"], [7, 1, 1, "_CPPv413uvm_to_deviceRK6TensorRK6Tensor", "uvm_to_device::self"], [17, 3, 0, "-", "fbgemm_gpu"]], "fbgemm_gpu.docs.examples": [[23, 4, 1, "", "example_method"]], "fbgemm_gpu.split_table_batched_embeddings_ops": [[17, 4, 1, "", "SplitTableBatchedEmbeddingBagsCodegen"]], "torch.ops.fbgemm": [[16, 4, 1, "", "batched_dense_vec_jagged_2d_mul"], [16, 4, 1, "", "dense_to_jagged"], [16, 4, 1, "", "jagged_1d_to_dense"], [16, 4, 1, "", "jagged_2d_to_dense"], [16, 4, 1, "", "jagged_dense_dense_elementwise_add_jagged_output"], [16, 4, 1, "", "jagged_dense_elementwise_add"], [16, 4, 1, "", "jagged_dense_elementwise_add_jagged_output"], [16, 4, 1, "", "jagged_dense_elementwise_mul"], [16, 4, 1, "", "jagged_to_padded_dense"], [16, 4, 1, "", "stacked_jagged_1d_to_dense"], [16, 4, 1, "", "stacked_jagged_2d_to_dense"]]}, "objtypes": {"0": "cpp:function", "1": "cpp:functionParam", "2": "cpp:templateParam", "3": "py:module", "4": "py:function"}, "objnames": {"0": ["cpp", "function", "C++ function"], "1": ["cpp", "functionParam", "C++ function parameter"], "2": ["cpp", "templateParam", "C++ template parameter"], "3": ["py", "module", "Python module"], "4": ["py", "function", "Python function"]}, "titleterms": {"quantiz": [0, 9], "util": 0, "refer": [0, 24], "implement": 0, "method": 0, "avx": 0, "2": 0, "512": 0, "build": [1, 12, 22], "instruct": [1, 12, 13, 14], "fbgemm": [1, 25], "requir": 1, "hardwar": 1, "softwar": 1, "depend": 1, "asmjit": 1, "cpuinfo": 1, "googletest": 1, "set": [1, 12, 13, 22], "up": [1, 12, 13, 22], "an": [1, 12], "isol": [1, 12], "environ": [1, 12, 13, 14, 22], "instal": [1, 12, 13], "tool": [1, 12], "c": [1, 12, 21, 25], "compil": [1, 12], "other": [1, 12, 24], "librari": [1, 13], "prepar": [1, 12], "linux": 1, "maco": 1, "cmake": 1, "gcc": [1, 12], "issu": [1, 19], "12": 1, "clang": [1, 12], "bazel": 1, "window": 1, "embed": [2, 8, 11, 17], "oper": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 15, 16, 17], "cuda": [2, 5, 6, 7, 9, 10, 12, 13, 14], "cpu": [2, 5, 6, 9, 10, 12, 13], "experiment": 3, "attent": 3, "combin": [4, 15], "input": 4, "jag": [5, 15, 16], "tensor": [5, 15, 16], "layout": 6, "transform": 6, "memori": 7, "pool": 8, "merg": 8, "permut": 8, "spars": 10, "data": 10, "tabl": [11, 17], "batch": [11, 17], "miniconda": 12, "conda": [12, 13], "onli": [12, 13], "docker": [12, 13], "imag": 12, "cudnn": 12, "rocm": [12, 13, 14], "miopen": 12, "symlink": 12, "pytorch": [12, 13], "through": [12, 13], "pip": [12, 13], "post": [12, 13], "check": [12, 13], "fbgemm_gpu": [12, 13, 14, 22, 25], "packag": [12, 13], "The": 12, "process": 12, "wheel": 12, "variabl": 12, "For": 12, "develop": [12, 25], "undefin": [12, 13], "symbol": [12, 13], "glibc": 12, "version": 12, "compat": 12, "nvidia": 13, "driver": 13, "contain": 13, "runtim": 13, "amdgpu": 13, "python": [13, 23, 25], "public": 13, "pypi": 13, "test": 14, "setup": 14, "run": 14, "variant": 14, "benchmark": 14, "high": 15, "level": 15, "overview": [15, 25], "format": 15, "valu": 15, "offset": 15, "max": 15, "length": 15, "exampl": 15, "arithmet": 15, "convers": 15, "dens": 15, "tbe": 17, "contact": 18, "u": 18, "github": 18, "slack": 18, "contribut": 19, "code": [19, 21, 23, 24], "conduct": 19, "pull": 19, "request": 19, "contributor": 19, "licens": [19, 20], "agreement": 19, "cla": 19, "ad": [21, 23, 24], "document": [21, 22, 23, 24, 25], "gener": [22, 23, 25], "guidelin": 22, "specif": 22, "guid": 22, "toolchain": 22, "lint": 22, "deploy": 22, "preview": 22, "todo": 23, "auto": 23, "sphinx": 24, "pointer": 24, "section": 24, "referenc": 24, "sourc": 24, "latex": 24, "graph": 24, "homepag": 25, "info": 25, "api": 25}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx.ext.todo": 2, "sphinx.ext.viewcode": 1, "sphinx": 57}, "alltitles": {"Test Instructions": [[14, "test-instructions"]], "Setup the FBGEMM_GPU Test Environment": [[14, "setup-the-fbgemm-gpu-test-environment"]], "Running FBGEMM_GPU Tests": [[14, "running-fbgemm-gpu-tests"]], "Testing with the CUDA Variant": [[14, "testing-with-the-cuda-variant"]], "Testing with the ROCm Variant": [[14, "testing-with-the-rocm-variant"]], "Running FBGEMM_GPU Benchmarks": [[14, "running-fbgemm-gpu-benchmarks"]], "Installation Instructions": [[13, "installation-instructions"]], "Set Up CPU-Only Environment": [[13, "set-up-cpu-only-environment"]], "Set Up CUDA Environment": [[13, "set-up-cuda-environment"]], "Install NVIDIA Drivers": [[13, "install-nvidia-drivers"]], "Set Up the CUDA Docker Container and Conda Environment": [[13, "set-up-the-cuda-docker-container-and-conda-environment"]], "Install the CUDA Runtime": [[13, "install-the-cuda-runtime"]], "Set Up ROCm Environment": [[13, "set-up-rocm-environment"]], "Install AMDGPU Drivers": [[13, "install-amdgpu-drivers"]], "Set Up the ROCm Docker Container and Conda Environment": [[13, "set-up-the-rocm-docker-container-and-conda-environment"]], "Install Python Libraries": [[13, "install-python-libraries"]], "Install PyTorch": [[13, "install-pytorch"], [12, "install-pytorch"]], "Install the FBGEMM_GPU Package": [[13, "install-the-fbgemm-gpu-package"]], "Install through PyTorch PIP": [[13, "install-through-pytorch-pip"]], "Install through Public PyPI": [[13, "install-through-public-pypi"]], "Post-Installation Checks": [[13, "post-installation-checks"]], "Undefined Symbols": [[13, "undefined-symbols"]], "Build Instructions": [[12, "build-instructions"], [1, "build-instructions"]], "Set Up an Isolated Build Environment": [[12, "set-up-an-isolated-build-environment"], [1, "set-up-an-isolated-build-environment"]], "Install Miniconda": [[12, "install-miniconda"]], "Set Up the Conda Environment": [[12, "set-up-the-conda-environment"]], "Set Up for CPU-Only Build": [[12, "set-up-for-cpu-only-build"]], "Set Up for CUDA Build": [[12, "set-up-for-cuda-build"]], "CUDA Docker Image": [[12, "cuda-docker-image"]], "Install CUDA": [[12, "install-cuda"]], "Install cuDNN": [[12, "install-cudnn"]], "Set Up for ROCm Build": [[12, "set-up-for-rocm-build"]], "ROCm Docker Image": [[12, "rocm-docker-image"]], "Install ROCm": [[12, "install-rocm"]], "Install MIOpen": [[12, "install-miopen"]], "Install the Build Tools": [[12, "install-the-build-tools"], [1, "install-the-build-tools"]], "C/C++ Compiler (GCC)": [[12, "c-c-compiler-gcc"]], "C/C++ Compiler (Clang)": [[12, "c-c-compiler-clang"]], "Compiler Symlinks": [[12, "compiler-symlinks"]], "Other Build Tools": [[12, "other-build-tools"], [1, "other-build-tools"]], "Installation Through Conda": [[12, "installation-through-conda"]], "Installation Through PyTorch PIP": [[12, "installation-through-pytorch-pip"]], "Post-Install Checks": [[12, "post-install-checks"]], "Build the FBGEMM_GPU Package": [[12, "build-the-fbgemm-gpu-package"]], "Preparing the Build": [[12, "preparing-the-build"], [1, "preparing-the-build"]], "The Build Process": [[12, "the-build-process"]], "Set Wheel Build Variables": [[12, "set-wheel-build-variables"]], "CPU-Only Build": [[12, "cpu-only-build"]], "CUDA Build": [[12, "cuda-build"]], "ROCm Build": [[12, "rocm-build"]], "Post-Build Checks (For Developers)": [[12, "post-build-checks-for-developers"]], "Undefined Symbols Check": [[12, "undefined-symbols-check"]], "GLIBC Version Compatibility Check": [[12, "glibc-version-compatibility-check"]], "Contact Us": [[18, "contact-us"]], "GitHub": [[18, "github"]], "Slack": [[18, "slack"]], "Contributing": [[19, "contributing"]], "Code of Conduct": [[19, "code-of-conduct"]], "Pull Requests": [[19, "pull-requests"]], "Contributor License Agreement (\u201cCLA\u201d)": [[19, "contributor-license-agreement-cla"]], "Issues": [[19, "issues"]], "License": [[19, "license"], [20, "license"]], "Table Batched Embedding (TBE) Operators": [[17, "module-fbgemm_gpu"]], "Jagged Tensor Operators": [[15, "jagged-tensor-operators"], [16, "jagged-tensor-operators"], [5, "jagged-tensor-operators"]], "High Level Overview": [[15, "high-level-overview"]], "Jagged Tensor Format": [[15, "jagged-tensor-format"]], "Values": [[15, "values"]], "Offsets": [[15, "offsets"]], "Max Lengths": [[15, "max-lengths"]], "Jagged Tensor Example": [[15, "jagged-tensor-example"]], "Jagged Tensor Operations": [[15, "jagged-tensor-operations"]], "Arithmetic Operations": [[15, "arithmetic-operations"]], "Conversion Operations": [[15, "conversion-operations"]], "Jagged to Dense": [[15, "jagged-to-dense"]], "Dense to Jagged": [[15, "dense-to-jagged"]], "Combined Arithmetic + Conversion Operations": [[15, "combined-arithmetic-conversion-operations"]], "Sphinx Documentation Pointers": [[24, "sphinx-documentation-pointers"]], "References Other Sections of the Documentation": [[24, "references-other-sections-of-the-documentation"]], "Referencing the Source Code": [[24, "referencing-the-source-code"]], "Adding LaTeX": [[24, "adding-latex"]], "Adding Graphs": [[24, "adding-graphs"]], "FBGEMM and FBGEMM_GPU Documentation Homepage": [[25, "fbgemm-and-fbgemm-gpu-documentation-homepage"]], "General Info": [[25, null]], "FBGEMM Development": [[25, null]], "FBGEMM_GPU Development": [[25, null]], "FBGEMM_GPU Overview": [[25, null]], "FBGEMM C++ API": [[25, null]], "FBGEMM_GPU C++ API": [[25, null]], "FBGEMM_GPU Python API": [[25, null]], "Adding Documentation to Python Code": [[23, "adding-documentation-to-python-code"]], "Todo": [[23, "id1"]], "Adding Documentation to Auto-Generated Python Code": [[23, "adding-documentation-to-auto-generated-python-code"]], "Adding Documentation to C++ Code": [[21, "adding-documentation-to-c-code"]], "Documentation": [[22, "documentation"]], "General Documentation Guidelines": [[22, "general-documentation-guidelines"]], "Specific Documentation Guides": [[22, "specific-documentation-guides"]], "Building the Documentation": [[22, "building-the-documentation"]], "Set Up Build Environment": [[22, "set-up-build-environment"]], "Build FBGEMM_GPU": [[22, "build-fbgemm-gpu"]], "Set Up the Documentation Toolchain": [[22, "set-up-the-documentation-toolchain"]], "Build the Documentation": [[22, "build-the-documentation"]], "Linting the Documentation": [[22, "linting-the-documentation"]], "Deployment Preview": [[22, "deployment-preview"]], "Combine Input Operators": [[4, "combine-input-operators"]], "CUDA Operators": [[5, "cuda-operators"], [2, "cuda-operators"], [6, "cuda-operators"], [9, "cuda-operators"], [10, "cuda-operators"]], "CPU Operators": [[5, "cpu-operators"], [2, "cpu-operators"], [6, "cpu-operators"], [9, "cpu-operators"], [10, "cpu-operators"]], "Experimental Operators": [[3, "experimental-operators"]], "Attention Operators": [[3, "attention-operators"]], "FBGEMM Requirements": [[1, "fbgemm-requirements"]], "Hardware Requirements": [[1, "hardware-requirements"]], "Software Dependencies": [[1, "software-dependencies"]], "asmjit": [[1, "asmjit"]], "cpuinfo": [[1, "cpuinfo"]], "GoogleTest": [[1, "googletest"]], "C/C++ Compiler": [[1, "c-c-compiler"]], "Build the FBGEMM Library": [[1, "build-the-fbgemm-library"]], "Building on Linux and macOS (CMake + GCC)": [[1, "building-on-linux-and-macos-cmake-gcc"]], "Build Issues with GCC 12+": [[1, "build-issues-with-gcc-12"]], "Building on Linux and macOS (CMake + Clang)": [[1, "building-on-linux-and-macos-cmake-clang"]], "Building on Linux (Bazel)": [[1, "building-on-linux-bazel"]], "Building on Windows": [[1, "building-on-windows"]], "Quantization Utilities": [[0, "quantization-utilities"]], "Reference Implementation Methods": [[0, "reference-implementation-methods"]], "AVX-2 Implementation Methods": [[0, "avx-2-implementation-methods"]], "AVX-512 Implementation Methods": [[0, "avx-512-implementation-methods"]], "Embedding Operators": [[2, "embedding-operators"]], "CUDA Memory Operators": [[7, "cuda-memory-operators"]], "Pooled Embeddings Operators": [[8, "pooled-embeddings-operators"]], "Merge Operators": [[8, "merge-operators"]], "Permutation Operators": [[8, "permutation-operators"]], "Layout Transformation Operators": [[6, "layout-transformation-operators"]], "Quantization Operators": [[9, "quantization-operators"]], "Table Batched Embedding Operators": [[11, "table-batched-embedding-operators"]], "Sparse Data Operators": [[10, "sparse-data-operators"]]}, "indexentries": {"findminmax (c++ function)": [[0, "_CPPv410FindMinMaxPKfPfPf7int64_t"]], "floatorhalftofusednbitrowwisequantizedsbhalf (c++ function)": [[0, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE"]], "fusedquantizedequantize (c++ function)": [[0, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif"]], "quantizegroupwise (c++ function)": [[0, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T"]], "xor128 (c++ function)": [[0, "_CPPv46Xor128v"]], "requantizeoutputprocessingavx2 (c++ function)": [[0, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE"]], "requantizeoutputprocessinggconvavx512 (c++ function)": [[0, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE"]], "bounds_check_indices_cuda (c++ function)": [[2, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKN3c108optionalI6TensorEERKN3c108optionalI6TensorEEK7int64_t"]], "int_nbit_split_embedding_codegen_lookup_function (c++ function)": [[2, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE"]], "int_nbit_split_embedding_codegen_lookup_function_cpu (c++ function)": [[2, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE"]], "int_nbit_split_embedding_uvm_caching_codegen_lookup_function (c++ function)": [[2, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE"]], "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu (c++ function)": [[2, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE"]], "pruned_array_lookup_cpu (c++ function)": [[2, "_CPPv423pruned_array_lookup_cpu6Tensor6Tensor6Tensor6Tensor"]], "pruned_array_lookup_cuda (c++ function)": [[2, "_CPPv424pruned_array_lookup_cuda6Tensor6Tensor6Tensor6Tensor"]], "pruned_hashmap_insert_unweighted_cpu (c++ function)": [[2, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor"]], "pruned_hashmap_lookup_cuda (c++ function)": [[2, "_CPPv426pruned_hashmap_lookup_cuda6Tensor6Tensor6Tensor6Tensor"]], "pruned_hashmap_lookup_unweighted_cpu (c++ function)": [[2, "_CPPv436pruned_hashmap_lookup_unweighted_cpu6Tensor6Tensor6Tensor6Tensor"]], "gqa_attn_splitk (c++ function)": [[3, "_CPPv415gqa_attn_splitkRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_tKb"]], "padding_fused_tbe_input_combine_cpu (c++ function)": [[4, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t"]], "tbe_input_combine_cpu (c++ function)": [[4, "_CPPv421tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE"]], "batched_dense_vec_jagged_2d_mul (c++ function)": [[5, "_CPPv431batched_dense_vec_jagged_2d_mulRK6TensorRK6TensorRK6Tensor"]], "dense_to_jagged (c++ function)": [[5, "_CPPv415dense_to_jaggedRK6TensorRKNSt6vectorI6TensorEEN3c108optionalIN2at6SymIntEEE"]], "jagged_1d_to_dense (c++ function)": [[5, "_CPPv418jagged_1d_to_dense6Tensor6TensorN3c106SymIntE7int64_t"]], "jagged_2d_to_dense (c++ function)": [[5, "_CPPv418jagged_2d_to_dense6Tensor6TensorN3c106SymIntE"]], "jagged_dense_elementwise_add (c++ function)": [[5, "_CPPv428jagged_dense_elementwise_addRK6TensorRKNSt6vectorI6TensorEERK6Tensor"]], "jagged_dense_elementwise_add_jagged_output (c++ function)": [[5, "_CPPv442jagged_dense_elementwise_add_jagged_outputRK6TensorRKNSt6vectorI6TensorEERK6Tensor"]], "jagged_dense_elementwise_add_jagged_output_cuda (c++ function)": [[5, "_CPPv447jagged_dense_elementwise_add_jagged_output_cudaRK6TensorRKNSt6vectorI6TensorEERK6Tensor"]], "jagged_dense_elementwise_mul (c++ function)": [[5, "_CPPv428jagged_dense_elementwise_mulRK6TensorRKNSt6vectorI6TensorEERK6Tensor"]], "jagged_to_padded_dense (c++ function)": [[5, "_CPPv422jagged_to_padded_denseRK6TensorRKNSt6vectorI6TensorEEKN3c1014SymIntArrayRefEKd"]], "jagged_to_padded_dense_forward (c++ function)": [[5, "_CPPv430jagged_to_padded_dense_forwardRK6TensorRKNSt6vectorI6TensorEEN3c1014SymIntArrayRefEKd"]], "recat_embedding_grad_output_cuda (c++ function)": [[6, "_CPPv432recat_embedding_grad_output_cuda6TensorRKNSt6vectorI7int64_tEE"]], "recat_embedding_grad_output_mixed_d_batch_cuda (c++ function)": [[6, "_CPPv446recat_embedding_grad_output_mixed_D_batch_cudaRK6TensorRK6TensorRK6Tensor"]], "recat_embedding_grad_output_mixed_d_cpu (c++ function)": [[6, "_CPPv439recat_embedding_grad_output_mixed_D_cpuRK6TensorRKNSt6vectorI7int64_tEE"]], "recat_embedding_grad_output_mixed_d_cuda (c++ function)": [[6, "_CPPv440recat_embedding_grad_output_mixed_D_cudaRK6TensorRKNSt6vectorI7int64_tEE"]], "is_uvm_tensor (c++ function)": [[7, "_CPPv413is_uvm_tensorRK6Tensor"]], "new_host_mapped_tensor (c++ function)": [[7, "_CPPv422new_host_mapped_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE"]], "new_managed_tensor (c++ function)": [[7, "_CPPv418new_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE"]], "new_managed_tensor_meta (c++ function)": [[7, "_CPPv423new_managed_tensor_metaRK6TensorRKNSt6vectorINSt7int64_tEEE"]], "new_unified_tensor (c++ function)": [[7, "_CPPv418new_unified_tensorRK6TensorRKNSt6vectorINSt7int64_tEEEb"]], "new_vanilla_managed_tensor (c++ function)": [[7, "_CPPv426new_vanilla_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE"]], "uvm_cuda_mem_advise (c++ function)": [[7, "_CPPv419uvm_cuda_mem_adviseRK6Tensor7int64_t"]], "uvm_cuda_mem_prefetch_async (c++ function)": [[7, "_CPPv427uvm_cuda_mem_prefetch_asyncRK6TensorN3c108optionalI6TensorEE"]], "uvm_mem_advice_dont_fork (c++ function)": [[7, "_CPPv424uvm_mem_advice_dont_forkRK6Tensor"]], "uvm_storage (c++ function)": [[7, "_CPPv411uvm_storageRK6Tensor"]], "uvm_to_cpu (c++ function)": [[7, "_CPPv410uvm_to_cpuRK6Tensor"]], "uvm_to_cpu_clone (c++ function)": [[7, "_CPPv416uvm_to_cpu_cloneRK6Tensor"]], "uvm_to_device (c++ function)": [[7, "_CPPv413uvm_to_deviceRK6TensorRK6Tensor"]], "all_to_one_device (c++ function)": [[8, "_CPPv417all_to_one_deviceNSt6vectorIN2at6TensorEEEN2at6DeviceE"]], "permute_pooled_embs_auto_grad (c++ function)": [[8, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor"]], "permute_pooled_embs_auto_grad_cpu (c++ function)": [[8, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor"]], "permute_pooled_embs_auto_grad_gpu (c++ function)": [[8, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor"]], "permute_pooled_embs_auto_grad_split_cpu (c++ function)": [[8, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE"]], "permute_pooled_embs_auto_grad_split_gpu (c++ function)": [[8, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE"]], "permute_pooled_embs_cpu_impl (c++ function)": [[8, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb"]], "permute_pooled_embs_split_cpu (c++ function)": [[8, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE"]], "permute_pooled_embs_split_gpu (c++ function)": [[8, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE"]], "fp8quantizedtofloat_ref (c++ function)": [[9, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi"]], "fp8rowwise_to_float_cpu (c++ function)": [[9, "_CPPv423FP8rowwise_to_float_cpuRK6TensorbK7int64_t"]], "floattofp8quantized_ref (c++ function)": [[9, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd"]], "_fp8rowwise_to_float_gpu (c++ function)": [[9, "_CPPv424_FP8rowwise_to_float_gpuRKN2at6TensorEbK7int64_t"]], "_bfloat16_to_float_gpu (c++ function)": [[9, "_CPPv422_bfloat16_to_float_gpuRKN2at6TensorE"]], "_float_to_fp8rowwise_gpu (c++ function)": [[9, "_CPPv424_float_to_FP8rowwise_gpuRK6TensorKb"]], "_float_to_bfloat16_gpu (c++ function)": [[9, "_CPPv422_float_to_bfloat16_gpuRKN2at6TensorE"]], "_float_to_fused8bitrowwise_cpu_out (c++ function)": [[9, "_CPPv434_float_to_fused8bitrowwise_cpu_outR6TensorRK6Tensor"]], "_float_to_fused8bitrowwise_gpu (c++ function)": [[9, "_CPPv430_float_to_fused8bitrowwise_gpuRK6Tensor"]], "_float_to_fusednbitrowwise_gpu (c++ function)": [[9, "_CPPv430_float_to_fusednbitrowwise_gpuRK6TensorK7int64_t"]], "_float_to_hfp8_gpu (c++ function)": [[9, "_CPPv418_float_to_hfp8_gpuRKN2at6TensorEK7int64_tK7int64_tKd"]], "_float_to_msfp_gpu (c++ function)": [[9, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd"]], "_float_to_paddedfp8rowwise_gpu (c++ function)": [[9, "_CPPv430_float_to_paddedFP8rowwise_gpuRK6TensorKbK7int64_t"]], "_fused8bitrowwise_to_float_cpu_out (c++ function)": [[9, "_CPPv434_fused8bitrowwise_to_float_cpu_outR6TensorRK6Tensor"]], "_fused8bitrowwise_to_float_gpu (c++ function)": [[9, "_CPPv430_fused8bitrowwise_to_float_gpuRKN2at6TensorE"]], "_fused8bitrowwise_to_float_mixed_dim_gpu (c++ function)": [[9, "_CPPv440_fused8bitrowwise_to_float_mixed_dim_gpuRKN2at6TensorERKN2at6TensorEK7int64_t"]], "_fused8bitrowwise_to_half_gpu (c++ function)": [[9, "_CPPv429_fused8bitrowwise_to_half_gpuRKN2at6TensorE"]], "_fused8bitrowwise_to_single_or_half_precision_gpu (c++ function)": [[9, "_CPPv449_fused8bitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tKbKb"]], "_fusednbitrowwise_to_float_gpu (c++ function)": [[9, "_CPPv430_fusednbitrowwise_to_float_gpuRKN2at6TensorEK7int64_t"]], "_fusednbitrowwise_to_half_gpu (c++ function)": [[9, "_CPPv429_fusednbitrowwise_to_half_gpuRKN2at6TensorEK7int64_t"]], "_fusednbitrowwise_to_single_or_half_precision_gpu (c++ function)": [[9, "_CPPv449_fusednbitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tK7int64_t"]], "_half_to_fused8bitrowwise_gpu (c++ function)": [[9, "_CPPv429_half_to_fused8bitrowwise_gpuRK6Tensor"]], "_half_to_fusednbitrowwise_gpu (c++ function)": [[9, "_CPPv429_half_to_fusednbitrowwise_gpuRKN2at6TensorEK7int64_t"]], "_hfp8_to_float_gpu (c++ function)": [[9, "_CPPv418_hfp8_to_float_gpuRKN2at6TensorEK7int64_tK7int64_t"]], "_msfp_to_float_gpu (c++ function)": [[9, "_CPPv418_msfp_to_float_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_t"]], "_paddedfp8rowwise_to_float_gpu (c++ function)": [[9, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t"]], "_single_or_half_precision_to_fused8bitrowwise_gpu (c++ function)": [[9, "_CPPv449_single_or_half_precision_to_fused8bitrowwise_gpuRK6Tensor"]], "_single_or_half_precision_to_fusednbitrowwise_gpu (c++ function)": [[9, "_CPPv449_single_or_half_precision_to_fusednbitrowwise_gpuRK6TensorK7int64_t"]], "float_or_half_to_fused8bitrowwise_cpu (c++ function)": [[9, "_CPPv437float_or_half_to_fused8bitrowwise_cpuRK6Tensor"]], "float_to_fp8rowwise_cpu (c++ function)": [[9, "_CPPv423float_to_FP8rowwise_cpuRK6Tensorb"]], "float_to_fused8bitrowwise_cpu (c++ function)": [[9, "_CPPv429float_to_fused8bitrowwise_cpuRK6Tensor"]], "fused8bitrowwise_to_float_cpu (c++ function)": [[9, "_CPPv429fused8bitrowwise_to_float_cpuRK6Tensor"]], "fused8bitrowwise_to_float_or_half_cpu (c++ function)": [[9, "_CPPv437fused8bitrowwise_to_float_or_half_cpuRK6TensorK7int64_tKbKb"]], "fused8bitrowwise_to_half_cpu (c++ function)": [[9, "_CPPv428fused8bitrowwise_to_half_cpuRK6Tensor"]], "fusednbitrowwise_to_float_cpu (c++ function)": [[9, "_CPPv429fusednbitrowwise_to_float_cpuRK6TensorK7int64_t"]], "fusednbitrowwise_to_float_or_half_cpu (c++ function)": [[9, "_CPPv437fusednbitrowwise_to_float_or_half_cpuRK6TensorK7int64_tK7int64_t"]], "fusednbitrowwise_to_half_cpu (c++ function)": [[9, "_CPPv428fusednbitrowwise_to_half_cpuRK6TensorK7int64_t"]], "half_to_fused8bitrowwise_cpu (c++ function)": [[9, "_CPPv428half_to_fused8bitrowwise_cpuRK6Tensor"]], "expand_into_jagged_permute_cuda (c++ function)": [[10, "_CPPv431expand_into_jagged_permute_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_t"]], "generic_histogram_binning_calibration_by_feature_cpu (c++ function)": [[10, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td"]], "histogram_binning_calibration_cpu (c++ function)": [[10, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td"]], "direct_mapped_lru_cache_populate_byte_cuda (c++ function)": [[11, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE"]], "direct_mapped_lxu_cache_lookup_cuda (c++ function)": [[11, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE"]], "get_unique_indices_cuda (c++ function)": [[11, "_CPPv423get_unique_indices_cudaN2at6TensorE7int64_tb"]], "host_lxu_cache_slot (c++ function)": [[11, "_CPPv419host_lxu_cache_slot7int64_t7int64_t"]], "lfu_cache_populate_byte_cuda (c++ function)": [[11, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t"]], "lfu_cache_populate_cuda (c++ function)": [[11, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb"]], "linearize_cache_indices_cuda (c++ function)": [[11, "_CPPv428linearize_cache_indices_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN3c108optionalIN2at6TensorEEEK7int64_t"]], "linearize_cache_indices_from_row_idx_cuda (c++ function)": [[11, "_CPPv441linearize_cache_indices_from_row_idx_cudaN2at6TensorEN2at6TensorEN2at6TensorE"]], "lru_cache_find_uncached_cuda (c++ function)": [[11, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE"]], "lru_cache_populate_byte_cuda (c++ function)": [[11, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE"]], "lru_cache_populate_cuda (c++ function)": [[11, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE"]], "lxu_cache_flush_cuda (c++ function)": [[11, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb"]], "lxu_cache_locations_update_cuda (c++ function)": [[11, "_CPPv431lxu_cache_locations_update_cudaN2at6TensorEN2at6TensorEN3c108optionalIN2at6TensorEEE"]], "lxu_cache_locking_counter_decrement_cuda (c++ function)": [[11, "_CPPv440lxu_cache_locking_counter_decrement_cudaN2at6TensorEN2at6TensorE"]], "lxu_cache_lookup_cuda (c++ function)": [[11, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEE"]], "reset_weight_momentum_cuda (c++ function)": [[11, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t"]], "batched_dense_vec_jagged_2d_mul() (in module torch.ops.fbgemm)": [[16, "torch.ops.fbgemm.batched_dense_vec_jagged_2d_mul"]], "dense_to_jagged() (in module torch.ops.fbgemm)": [[16, "torch.ops.fbgemm.dense_to_jagged"]], "jagged_1d_to_dense() (in module torch.ops.fbgemm)": [[16, "torch.ops.fbgemm.jagged_1d_to_dense"]], "jagged_2d_to_dense() (in module torch.ops.fbgemm)": [[16, "torch.ops.fbgemm.jagged_2d_to_dense"]], "jagged_dense_dense_elementwise_add_jagged_output() (in module torch.ops.fbgemm)": [[16, "torch.ops.fbgemm.jagged_dense_dense_elementwise_add_jagged_output"]], "jagged_dense_elementwise_add() (in module torch.ops.fbgemm)": [[16, "torch.ops.fbgemm.jagged_dense_elementwise_add"]], "jagged_dense_elementwise_add_jagged_output() (in module torch.ops.fbgemm)": [[16, "torch.ops.fbgemm.jagged_dense_elementwise_add_jagged_output"]], "jagged_dense_elementwise_mul() (in module torch.ops.fbgemm)": [[16, "torch.ops.fbgemm.jagged_dense_elementwise_mul"]], "jagged_to_padded_dense() (in module torch.ops.fbgemm)": [[16, "torch.ops.fbgemm.jagged_to_padded_dense"]], "stacked_jagged_1d_to_dense() (in module torch.ops.fbgemm)": [[16, "torch.ops.fbgemm.stacked_jagged_1d_to_dense"]], "stacked_jagged_2d_to_dense() (in module torch.ops.fbgemm)": [[16, "torch.ops.fbgemm.stacked_jagged_2d_to_dense"]], "splittablebatchedembeddingbagscodegen() (in module fbgemm_gpu.split_table_batched_embeddings_ops)": [[17, "fbgemm_gpu.split_table_batched_embeddings_ops.SplitTableBatchedEmbeddingBagsCodegen"]], "fbgemm_gpu": [[17, "module-fbgemm_gpu"]], "module": [[17, "module-fbgemm_gpu"]], "example_method (c++ function)": [[21, "_CPPv4I0_NSt6size_tEE14example_method7int32_t1Tf"]], "example_method() (in module fbgemm_gpu.docs.examples)": [[23, "fbgemm_gpu.docs.examples.example_method"]]}}) \ No newline at end of file +Search.setIndex({"docnames": ["fbgemm-cpp-api/QuantUtils", "fbgemm-development/BuildInstructions", "fbgemm_gpu-cpp-api/embedding_ops", "fbgemm_gpu-cpp-api/experimental_ops", "fbgemm_gpu-cpp-api/input_combine", "fbgemm_gpu-cpp-api/jagged_tensor_ops", "fbgemm_gpu-cpp-api/layout_transform_ops", "fbgemm_gpu-cpp-api/memory_utils", "fbgemm_gpu-cpp-api/merge_pooled_embeddings", "fbgemm_gpu-cpp-api/quantize_ops", "fbgemm_gpu-cpp-api/sparse_ops", "fbgemm_gpu-cpp-api/split_table_batched_embeddings", "fbgemm_gpu-development/BuildInstructions", "fbgemm_gpu-development/InstallationInstructions", "fbgemm_gpu-development/TestInstructions", "fbgemm_gpu-overview/jagged-tensor-ops/JaggedTensorOps", "fbgemm_gpu-python-api/jagged_tensor_ops", "fbgemm_gpu-python-api/table_batched_embedding_ops", "general/ContactUs", "general/Contributing", "general/License", "general/documentation/Cpp", "general/documentation/Overview", "general/documentation/Python", "general/documentation/Sphinx", "index"], "filenames": ["fbgemm-cpp-api/QuantUtils.rst", "fbgemm-development/BuildInstructions.rst", "fbgemm_gpu-cpp-api/embedding_ops.rst", "fbgemm_gpu-cpp-api/experimental_ops.rst", "fbgemm_gpu-cpp-api/input_combine.rst", "fbgemm_gpu-cpp-api/jagged_tensor_ops.rst", "fbgemm_gpu-cpp-api/layout_transform_ops.rst", "fbgemm_gpu-cpp-api/memory_utils.rst", "fbgemm_gpu-cpp-api/merge_pooled_embeddings.rst", "fbgemm_gpu-cpp-api/quantize_ops.rst", "fbgemm_gpu-cpp-api/sparse_ops.rst", "fbgemm_gpu-cpp-api/split_table_batched_embeddings.rst", "fbgemm_gpu-development/BuildInstructions.rst", "fbgemm_gpu-development/InstallationInstructions.rst", "fbgemm_gpu-development/TestInstructions.rst", "fbgemm_gpu-overview/jagged-tensor-ops/JaggedTensorOps.rst", "fbgemm_gpu-python-api/jagged_tensor_ops.rst", "fbgemm_gpu-python-api/table_batched_embedding_ops.rst", "general/ContactUs.rst", "general/Contributing.rst", "general/License.rst", "general/documentation/Cpp.rst", "general/documentation/Overview.rst", "general/documentation/Python.rst", "general/documentation/Sphinx.rst", "index.rst"], "titles": ["Quantization Utilities", "Build Instructions", "Embedding Operators", "Experimental Operators", "Combine Input Operators", "Jagged Tensor Operators", "Layout Transformation Operators", "CUDA Memory Operators", "Pooled Embeddings Operators", "Quantization Operators", "Sparse Data Operators", "Table Batched Embedding Operators", "Build Instructions", "Installation Instructions", "Test Instructions", "Jagged Tensor Operators", "Jagged Tensor Operators", "Table Batched Embedding (TBE) Operators", "Contact Us", "Contributing", "License", "Adding Documentation to C++ Code", "Documentation", "Adding Documentation to Python Code", "Sphinx Documentation Pointers", "FBGEMM and FBGEMM_GPU Documentation Homepage"], "terms": {"templat": [0, 12, 21], "typenam": [0, 21], "t": [0, 1, 3, 7, 10, 12, 17, 19, 21, 22], "layout_t": 0, "layout": [0, 25], "kcx": 0, "void": [0, 2, 7, 9, 11], "quantizegroupwis": 0, "const": [0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 23], "float": [0, 9, 16, 17, 21, 23], "src": 0, "int": [0, 9, 16, 17, 21, 23], "k": [0, 3], "c": [0, 11, 13, 15, 20, 22, 23, 24], "x": [0, 5, 15, 21, 23], "g": [0, 1, 10, 12, 21, 23], "scale": [0, 3], "std": [0, 3, 4, 5, 6, 7, 8, 10, 11, 12, 21, 23], "int32_t": [0, 21, 23], "zero_point": 0, "dst": 0, "point": [0, 9, 16, 21, 23], "data": [0, 7, 15, 17, 20, 25], "type": [0, 1, 9, 13, 15, 16, 17, 21], "paramet": [0, 3, 7, 9, 10, 16, 17, 21, 22, 23], "output": [0, 3, 5, 9, 10, 16, 17, 21, 23], "int8_t": 0, "uint8_t": [0, 9, 11], "ar": [0, 1, 5, 11, 12, 13, 15, 16, 17, 20, 21, 22, 23], "support": [0, 1, 3, 12, 13, 15, 23, 25], "input": [0, 3, 5, 7, 9, 10, 15, 16, 17, 21, 25], "tensor": [0, 2, 3, 4, 6, 7, 8, 9, 10, 11, 17, 22, 23, 25], "kxc": 0, "correspond": [0, 10, 11, 15, 21, 23], "kcr": 0, "kctr": 0, "weight": [0, 2, 10, 11, 17], "time": [0, 1, 12, 13, 15], "dimens": [0, 3, 5, 7, 10, 15, 16, 17, 23], "krsc": 0, "ktrsc": 0, "channel": [0, 12, 13, 18], "number": [0, 1, 3, 10, 12, 15, 16, 17, 22], "r": [0, 14, 22], "": [0, 1, 7, 12, 14, 15, 19, 21, 22, 23], "group": [0, 3, 15, 21], "function": [0, 1, 12, 21, 23], "perform": [0, 1, 9, 10, 15, 25], "channelwis": 0, "1": [0, 1, 3, 10, 11, 12, 13, 14, 15, 16, 17, 22, 23, 24], "groupwis": 0, "per": [0, 15], "size": [0, 1, 3, 7, 9, 10, 15, 16, 17], "should": [0, 10, 11, 12, 13, 15, 19, 21, 22, 23], "equal": [0, 15, 23], "zero": [0, 16, 23], "reprsent": 0, "fusedquantizedequant": 0, "int64_t": [0, 2, 3, 4, 5, 6, 7, 9, 10, 11], "len": [0, 15], "tensorquantizationparam": 0, "qparam": 0, "thread_id": 0, "0": [0, 1, 9, 10, 11, 12, 13, 15, 16, 17, 23], "num_thread": 0, "noise_ratio": 0, "0f": 0, "fuse": [0, 9, 17], "integ": [0, 7, 9, 15], "dequant": 0, "kernel": [0, 1, 7, 14, 25], "acceler": 0, "awar": 0, "train": [0, 17, 25], "fp32": [0, 9, 17], "valu": [0, 5, 7, 9, 10, 11, 16, 17, 21, 22, 23], "u": [0, 12, 24, 25], "int8": [0, 17], "us": [0, 1, 3, 7, 10, 12, 14, 15, 17, 18, 19, 20, 21, 22, 23, 24, 25], "provid": [0, 1, 12, 13, 14, 20, 21, 22, 23, 25], "back": [0, 7, 11, 12, 13], "inputtyp": 0, "floatorhalftofusednbitrowwisequantizedsbhalf": 0, "bit_rat": [0, 9], "size_t": [0, 9, 21], "input_row": 0, "input_column": 0, "convert": [0, 7, 9, 15, 16, 23], "fp16": [0, 9, 17], "rowwis": [0, 9, 17], "bitrat": 0, "specifi": [0, 1, 9, 10, 12, 16, 17], "bit": [0, 9], "bia": [0, 3, 9], "each": [0, 3, 10, 12, 15, 16, 17, 23], "row": [0, 5, 11, 15, 16, 17, 23], "store": [0, 10, 11], "itself": [0, 15, 22], "end": [0, 13, 15, 24], "can": [0, 1, 9, 10, 12, 13, 15, 21, 22, 23, 24], "4": [0, 12, 13, 15, 16, 17, 23], "8": [0, 9, 12, 15, 17], "uint32_t": 0, "xor128": 0, "random": 0, "gener": [0, 1, 10, 12, 13, 21, 24], "9": [0, 12, 15, 17], "base": [0, 1, 10, 11, 12, 15], "thi": [0, 1, 5, 7, 8, 10, 12, 13, 15, 18, 19, 20, 21, 23, 24, 25], "paper": 0, "findminmax": 0, "m": [0, 12, 13, 14], "min": 0, "max": [0, 3, 17], "find": [0, 11, 12], "matrix": [0, 1, 16, 25], "bool": [0, 3, 7, 8, 9, 11, 17], "a_symmetr": 0, "b_symmetr": 0, "quantizationgranular": 0, "q_gran": 0, "has_bia": 0, "fuse_relu": 0, "bias_typ": 0, "direct": [0, 11, 13, 20, 21, 23, 24], "fals": [0, 7, 17, 22], "requantizeoutputprocessingavx2": 0, "out": [0, 12, 18, 20, 22], "inp": 0, "block_type_t": 0, "block": [0, 21, 23, 24], "ld_out": 0, "ld_in": 0, "requantizationparams_t": 0, "requant": 0, "avx2": [0, 1], "i": [0, 1, 3, 5, 7, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 21, 22, 23, 24, 25], "c_per_g": 0, "requantizeoutputprocessinggconvavx512": 0, "avx512": 0, "note": [1, 11, 12, 13, 21, 22, 23, 24], "The": [1, 3, 7, 9, 10, 13, 14, 15, 16, 17, 19, 21, 22, 23, 24], "most": [1, 12, 13, 15, 22], "date": [1, 12, 13, 22], "embed": [1, 12, 13, 22, 25], "script": [1, 12, 13, 22], "bundl": [1, 12, 13, 22], "repo": [1, 12, 13, 22, 23], "under": [1, 12, 13, 19, 20, 22, 23], "setup_env": [1, 12, 13, 22], "bash": [1, 12, 13, 22], "step": [1, 12, 13, 15, 22, 23], "fbgemm_gpu": [1, 7, 15, 17, 18, 19, 20, 21, 23], "follow": [1, 10, 12, 13, 15, 20, 21, 22, 23], "toolchain": [1, 12, 13], "run": [1, 12, 13, 22], "cpu": [1, 7, 8, 14, 22], "higher": 1, "In": [1, 10, 12, 13, 15, 19, 21, 23], "doe": [1, 2, 13, 21, 22, 23], "have": [1, 10, 11, 12, 15, 22], "ani": [1, 10, 12, 16, 19, 20, 22, 23], "intel": 1, "mkl": 1, "howev": [1, 12, 15, 20], "comparison": 1, "some": [1, 12, 15, 22], "benchmark": 1, "If": [1, 12, 13, 17, 19, 21, 22, 23], "found": [1, 12, 13, 22], "path": [1, 12, 21, 24], "through": [1, 19, 21, 23], "intel_mkl_dir": 1, "variabl": 1, "built": [1, 12, 13, 22, 25], "report": [1, 13], "otherwis": [1, 7, 13, 20], "subset": 1, "all": [1, 10, 11, 12, 13, 15, 17, 20, 22], "three": [1, 15], "git": [1, 12], "submodul": [1, 12], "custom": [1, 24], "version": [1, 13], "desir": [1, 12, 15, 16, 21], "thei": [1, 12, 22, 24], "asmjit_src_dir": 1, "cpuinfo_src_dir": 1, "googletest_source_dir": 1, "With": 1, "inner": [1, 15], "take": [1, 12], "one": [1, 3, 9, 10, 11, 16, 17, 21, 23], "doesn": 1, "fit": [1, 20], "approach": 1, "so": [1, 10, 12, 13, 15], "implement": [1, 3, 12, 15], "dynam": 1, "effici": [1, 25], "shape": [1, 3, 15, 17], "specif": [1, 10, 12, 17, 20], "vector": [1, 4, 5, 6, 7, 8, 16, 23], "code": [1, 12, 20, 22], "third": 1, "parti": 1, "call": [1, 7, 13], "detect": [1, 14], "runtim": [1, 12], "pytorch": [1, 15, 18, 22, 23, 25], "project": [1, 19], "dispatch": [1, 7], "optim": [1, 9, 17], "test": [1, 12, 13, 19, 25], "you": [1, 19, 21, 23], "don": [1, 10, 12, 22], "want": [1, 19], "togeth": [1, 21, 22], "default": [1, 10, 12, 13, 17], "turn": [1, 22], "off": [1, 13, 18], "simpli": [1, 12], "fbgemm_build_test": 1, "conda": [1, 14, 22], "For": [1, 14, 15, 18, 20, 21, 22, 23, 24], "platform": [1, 12, 20], "machin": [1, 12, 13, 14, 25], "microsoft": [1, 9], "visual": 1, "studio": 1, "2019": 1, "newer": [1, 12], "recommend": [1, 5, 12, 13, 15], "here": [1, 7, 12, 19, 21, 22, 23, 24], "necessari": [1, 12], "ninja": [1, 12], "etc": [1, 12, 17], "n": [1, 9, 12, 13, 24], "env_nam": [1, 12, 13], "y": [1, 5, 12, 13, 16, 22], "doxygen": [1, 21, 22], "make": [1, 11, 12, 19, 21, 22, 23], "openbla": 1, "packag": [1, 14, 22], "onli": [1, 3, 10, 11, 14, 15, 19, 21, 22, 24], "clone": [1, 12], "along": [1, 12, 13], "its": [1, 7, 10, 12, 17, 20, 22, 24], "insid": [1, 12, 13, 14, 22, 24], "recurs": [1, 12], "http": [1, 12, 13, 19, 21, 22, 23], "github": [1, 12, 19], "com": [1, 12, 19], "cd": [1, 12, 14, 22], "assum": [1, 10], "process": [1, 5, 13, 15, 19, 23], "straightforward": 1, "creat": [1, 7, 12, 15, 19, 21, 23, 24], "directori": [1, 12, 14, 19, 21, 22], "mkdir": 1, "argument": [1, 10, 21, 22, 23], "build_arg": 1, "duse_sanit": 1, "address": [1, 12], "dfbgemm_library_typ": 1, "share": [1, 7], "dpython_execut": 1, "which": [1, 10, 12, 13, 15, 17, 22], "python3": [1, 13], "option": [1, 2, 5, 7, 11, 12, 16, 17], "document": [1, 7, 19, 20], "dfbgemm_build_doc": 1, "ON": [1, 20], "j": [1, 15], "verbos": 1, "As": [1, 10, 12, 13, 15], "write": [1, 12, 13, 22, 23], "fail": [1, 13, 14, 21], "due": [1, 12], "known": [1, 12, 17], "regress": 1, "To": [1, 12, 14, 24], "work": [1, 12, 13, 15, 19], "around": 1, "append": [1, 12, 21, 23], "export": [1, 12, 14], "prior": [1, 12, 13, 20], "cflag": 1, "wno": 1, "error": [1, 9, 13, 21, 22, 23], "mayb": 1, "uniniti": 1, "restrict": 1, "cxxflag": 1, "pleas": [1, 19, 21, 23], "see": [1, 7, 12, 13, 15, 21, 23, 24], "77939": 1, "1094": 1, "1666": 1, "more": [1, 7, 12, 17, 21, 23, 24], "detail": [1, 13], "exactli": 1, "same": [1, 3, 7, 10, 12, 15, 16, 21, 22, 23], "extra": 1, "need": [1, 12, 13, 14, 15, 19, 21, 23, 24], "ad": [1, 19, 22], "invoc": [1, 12, 22], "llvm": [1, 12], "standard": [1, 12], "libc": 1, "openmp": [1, 12], "libomp": 1, "locat": [1, 7, 11, 12, 15], "cc_path": 1, "cxx_path": 1, "dcmake_c_compil": 1, "dcmake_cxx_compil": 1, "dcmake_c_flag": [1, 12], "fopenmp": 1, "stdlib": [1, 12], "conda_prefix": [1, 12], "includ": [1, 8, 12, 20, 21, 23], "dcmake_cxx_flag": [1, 12], "likewis": 1, "also": [1, 12, 17, 24], "veri": [1, 12, 21, 22, 23], "target": [1, 7, 9, 10, 12, 15, 21, 22, 23, 24], "architectur": [1, 12, 13], "bc": [1, 12], "x64": 1, "program": [1, 19], "file": [1, 12, 13, 18, 19, 21, 22, 23, 24], "x86": [1, 25], "enterpris": 1, "vc": 1, "auxiliari": 1, "vcvarsal": 1, "bat": 1, "build_dir": 1, "dfbgemm_build_benchmark": 1, "dcmake_build_typ": 1, "releas": [1, 13], "cl": 1, "ex": 1, "v": [1, 3, 5, 14, 16], "int_nbit_split_embedding_codegen_lookup_funct": 2, "dev_weight": [2, 11], "uvm_weight": [2, 11], "weights_plac": [2, 11], "weights_offset": [2, 11], "weights_ti": [2, 11], "d_offset": [2, 9, 11], "total_d": [2, 11, 17], "max_int2_d": 2, "max_int4_d": 2, "max_int8_d": 2, "max_float16_d": 2, "max_float32_d": 2, "indic": [2, 11, 15, 17], "offset": [2, 5, 10, 11, 16, 17], "pooling_mod": [2, 17], "c10": [2, 5, 7, 9, 11], "indice_weight": 2, "output_dtyp": [2, 9, 17], "lxu_cache_weight": [2, 11], "lxu_cache_loc": [2, 11], "row_align": [2, 11], "max_float8_d": 2, "fp8_exponent_bit": 2, "fp8_exponent_bia": 2, "int_nbit_split_embedding_uvm_caching_codegen_lookup_funct": 2, "cache_hash_size_cumsum": [2, 11], "total_cache_hash_s": [2, 11], "cache_index_table_map": [2, 11], "lxu_cache_st": [2, 11], "lxu_stat": 2, "simlar": 2, "uvm_cach": 2, "lookup": [2, 11], "pruned_hashmap_lookup_cuda": 2, "hash_tabl": 2, "hash_table_offset": 2, "pruned_array_lookup_cuda": 2, "index_remap": 2, "index_remappings_offset": 2, "bounds_check_indices_cuda": 2, "rows_per_t": 2, "bounds_check_mod": [2, 17], "warn": [2, 17, 21], "b_ofset": 2, "max_b": [2, 11], "int_nbit_split_embedding_codegen_lookup_function_cpu": 2, "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu": 2, "pruned_hashmap_insert_unweighted_cpu": 2, "dense_indic": 2, "pruned_hashmap_lookup_unweighted_cpu": 2, "pruned_array_lookup_cpu": 2, "tupl": [3, 4, 5, 10, 11, 17], "gqa_attn_splitk": 3, "xq": 3, "cache_k": 3, "cache_v": 3, "seq_posit": 3, "doubl": [3, 5, 9, 10], "qk_scale": 3, "num_split_k": 3, "num_int4_kv_group": 3, "use_tensor_cor": 3, "decod": 3, "queri": 3, "split": 3, "w": [3, 14], "bf16": [3, 9], "int4": 3, "kv": 3, "cuda": [3, 8, 17, 25], "gqa": 3, "cach": [3, 11, 12, 17], "It": [3, 12, 13, 15], "current": [3, 12, 13, 15, 17], "context": 3, "length": [3, 5, 10, 16, 17, 23], "16384": 3, "fix": [3, 10, 12], "head": 3, "128": 3, "an": [3, 7, 10, 13, 14, 15, 17, 21, 22, 23, 24], "arbitrari": 3, "b": [3, 10, 12, 15, 16, 17, 21, 22, 23, 24], "h_q": 3, "d": [3, 15, 16, 24], "where": [3, 5, 7, 10, 15, 16, 17], "batch": [3, 5, 10, 15, 16, 25], "num": 3, "max_t": 3, "h_kv": 3, "sequenc": 3, "posit": [3, 10, 17], "contain": [3, 7, 12, 15, 16, 17, 23], "actual": [3, 12], "token": [3, 15], "appli": [3, 10, 12, 15, 17], "after": [3, 10, 12, 13, 14, 15, 17, 22, 23, 24], "qk": 3, "control": 3, "amount": [3, 17], "parallel": 3, "wise": [3, 15, 17], "quantiz": [3, 25], "whether": [3, 7, 12, 20], "core": 3, "wmma": 3, "instruct": [3, 19, 21, 22, 23, 25], "fast": 3, "return": [3, 7, 9, 10, 16, 17, 21, 22, 23], "A": [3, 7, 9, 13, 15, 16, 17, 20, 21, 22, 23], "combin": [3, 25], "non": [3, 7, 17], "metadata": 3, "softmax": 3, "sum": [3, 10, 16, 17], "tbe_input_combine_cpu": 4, "indices_list": 4, "offsets_list": 4, "per_sample_weight": [4, 17], "include_last_offset": 4, "padding_fused_tbe_input_combine_cpu": 4, "batch_siz": 4, "solv": 5, "issu": [5, 7, 12, 13, 18], "when": [5, 10, 12, 14, 15, 17, 21, 22, 24], "differ": [5, 10, 15], "often": 5, "occur": [5, 21], "spars": [5, 15, 25], "featur": [5, 10, 15, 17, 18], "system": [5, 12, 13, 15], "well": [5, 10, 12, 21], "natur": [5, 15], "languag": [5, 15, 24], "jagged_to_padded_dense_forward": 5, "symintarrayref": 5, "max_length": [5, 16], "padding_valu": [5, 16], "jagged_dense_elementwise_add_jagged_output_cuda": 5, "x_valu": [5, 16], "x_offset": [5, 16, 23], "dens": [5, 16, 23], "jagged_to_padded_dens": [5, 16], "jagged_dense_elementwise_add": [5, 16], "jagged_dense_elementwise_mul": [5, 16], "batched_dense_vec_jagged_2d_mul": [5, 16], "a_valu": [5, 16], "a_offset": [5, 16], "dense_to_jag": [5, 16], "symint": 5, "total_l": [5, 16], "jagged_dense_elementwise_add_jagged_output": [5, 16], "jagged_1d_to_dens": [5, 16], "max_l": 5, "jagged_2d_to_dens": [5, 12, 13, 16, 22, 23], "max_sequence_length": [5, 16, 23], "recat_embedding_grad_output_cuda": 6, "grad_output": 6, "num_features_per_rank": 6, "recat_embedding_grad_output_mixed_d_cuda": 6, "dim_sum_per_rank": 6, "recat_embedding_grad_output_mixed_d_batch_cuda": 6, "cumsum_dim_sum_per_rank": 6, "recat_embedding_grad_output_mixed_d_cpu": 6, "new_managed_tensor": 7, "self": 7, "alloc": [7, 21], "unifi": 7, "manag": [7, 12, 13, 17], "uvm": [7, 14], "Then": 7, "set": [7, 11, 14, 15, 16, 17], "prefer": [7, 13], "storag": [7, 9, 11], "host": [7, 12], "establish": 7, "map": [7, 10, 11, 15, 17], "devic": [7, 8, 12, 14, 17], "new": [7, 9, 11, 21, 22, 23], "new_managed_tensor_meta": 7, "placehold": 7, "meta": [7, 20], "kei": 7, "empti": [7, 15, 16, 24], "new_host_mapped_tensor": 7, "new_unified_tensor": 7, "is_host_map": 7, "either": [7, 9, 10, 12, 13], "depend": [7, 9, 12, 13, 15], "new_vanilla_managed_tensor": 7, "allow": [7, 12], "automat": [7, 10, 14, 22], "uvm_storag": 7, "check": [7, 17], "gpu": [7, 12, 13, 14, 25], "true": [7, 17], "is_uvm_tensor": 7, "BUT": [7, 20], "uvm_to_cpu": 7, "effect": [7, 15], "move": 7, "from": [7, 9, 10, 11, 12, 13, 14, 15, 17, 19, 20, 21, 22, 23, 24], "uvm_to_devic": 7, "prototyp": 7, "whose": 7, "uvm_cuda_mem_advis": 7, "cuda_memory_advis": 7, "cudamemadvis": 7, "cudamemoryadvis": 7, "enum": [7, 9], "avail": [7, 12, 14, 22], "python": [7, 12, 14, 21, 22, 24], "side": [7, 21, 23, 25], "namespac": 7, "over": [7, 12], "valid": 7, "inform": [7, 15, 23, 24], "uvm_cuda_mem_prefetch_async": 7, "device_t": 7, "cudamemprefetchasync": 7, "prefetch": 7, "destin": 7, "uvm_mem_advice_dont_fork": 7, "madvis": 7, "madv_dontfork": 7, "workaround": 7, "driver": [7, 12], "un": 7, "page": [7, 19, 24, 25], "tabl": [7, 10, 15, 25], "fork": [7, 19], "caus": [7, 12, 13, 20, 22], "slowdown": 7, "next": [7, 15, 21, 23], "access": [7, 17], "uvm_to_cpu_clon": 7, "copi": 7, "contigu": [7, 10], "singl": [7, 9], "thread": 7, "memcpi": 7, "section": [8, 12, 23], "variou": 8, "all_to_one_devic": 8, "inputtensor": 8, "target_devic": 8, "permute_pooled_embs_split_gpu": 8, "pooled_emb": 8, "offset_dim_list": 8, "permute_list": 8, "inv_offset_dim_list": 8, "inv_permute_list": 8, "permute_pooled_embs_auto_grad_split_gpu": 8, "permute_pooled_embs_auto_grad_gpu": 8, "permute_pooled_embs_cpu_impl": 8, "allow_dupl": 8, "permute_pooled_embs_split_cpu": 8, "permute_pooled_embs_auto_grad_split_cpu": 8, "permute_pooled_embs_auto_grad": 8, "permute_pooled_embs_auto_grad_cpu": 8, "model": [9, 10], "techniqu": 9, "reduc": 9, "larg": [9, 12], "order": [9, 15, 19], "achiev": [9, 13], "better": [9, 21], "small": 9, "loss": [9, 20], "accuraci": 9, "_float_to_bfloat16_gpu": 9, "brain": 9, "bfloat16": 9, "_bfloat16_to_float_gpu": 9, "_float_to_fp8rowwise_gpu": 9, "forward": 9, "fp8": 9, "dtype": [9, 17], "sparsetyp": [9, 17], "throw": [9, 21], "_fp8rowwise_to_float_gpu": 9, "represent": [9, 15], "_float_to_fused8bitrowwise_gpu": 9, "_half_to_fused8bitrowwise_gpu": 9, "half": 9, "_single_or_half_precision_to_fused8bitrowwise_gpu": 9, "_fused8bitrowwise_to_float_gpu": 9, "_fused8bitrowwise_to_half_gpu": 9, "_fused8bitrowwise_to_single_or_half_precision_gpu": 9, "scale_bias_last": 9, "quant_padding_float_typ": 9, "_fused8bitrowwise_to_float_mixed_dim_gpu": 9, "kfloat": 9, "khalf": 9, "_float_to_fusednbitrowwise_gpu": 9, "_half_to_fusednbitrowwise_gpu": 9, "_single_or_half_precision_to_fusednbitrowwise_gpu": 9, "_fusednbitrowwise_to_float_gpu": 9, "_fusednbitrowwise_to_half_gpu": 9, "_fusednbitrowwise_to_single_or_half_precision_gpu": 9, "_float_to_hfp8_gpu": 9, "ebit": 9, "exponent_bia": 9, "max_po": 9, "hybrid": 9, "hfp8": 9, "_hfp8_to_float_gpu": 9, "_float_to_msfp_gpu": 9, "bounding_box_s": 9, "mbit": 9, "min_po": 9, "msfp": 9, "_msfp_to_float_gpu": 9, "_float_to_paddedfp8rowwise_gpu": 9, "row_dim": 9, "pad": [9, 15, 16, 23], "_paddedfp8rowwise_to_float_gpu": 9, "output_last_dim": 9, "_fused8bitrowwise_to_float_cpu_out": 9, "_float_to_fused8bitrowwise_cpu_out": 9, "float_to_fused8bitrowwise_cpu": 9, "half_to_fused8bitrowwise_cpu": 9, "float_or_half_to_fused8bitrowwise_cpu": 9, "fused8bitrowwise_to_float_cpu": 9, "fused8bitrowwise_to_half_cpu": 9, "fused8bitrowwise_to_float_or_half_cpu": 9, "float_to_fp8rowwise_cpu": 9, "fp8rowwise_to_float_cpu": 9, "fusednbitrowwise_to_float_cpu": 9, "fusednbitrowwise_to_half_cpu": 9, "fusednbitrowwise_to_float_or_half_cpu": 9, "floattofp8quantized_ref": 9, "nrow": 9, "ncol": 9, "fp8quantizedtofloat_ref": 9, "expand_into_jagged_permute_cuda": 10, "permut": 10, "input_offset": 10, "output_offset": 10, "output_s": 10, "expand_into_jagged_permut": 10, "expand": 10, "index": [10, 11, 12, 13, 15, 21, 23], "case": [10, 12, 13, 15, 19], "ha": [10, 13, 15, 19, 21, 22], "across": [10, 12], "rank": [10, 15], "level": 10, "exclus": 10, "op": [10, 13, 16, 23], "bag": [10, 17, 25], "sit": 10, "we": [10, 12, 15, 19], "deriv": [10, 15, 20], "arrai": [10, 16, 23], "comput": [10, 12, 13, 17], "formula": 10, "output_permut": 10, "table_offset": 10, "bag_offset": 10, "histogram_binning_calibration_cpu": 10, "logit": 10, "bin_num_exampl": 10, "bin_num_posit": 10, "positive_weight": 10, "lower_bound": 10, "upper_bound": 10, "bin_ctr_in_use_aft": 10, "bin_ctr_weight_valu": 10, "divid": [10, 15], "predict": 10, "rang": [10, 15], "e": [10, 12, 15, 21, 23, 24], "bin": [10, 12], "two": [10, 15, 16, 17, 22], "exampl": [10, 12, 13, 14, 16, 17, 21, 22, 23, 24], "fall": [10, 12, 13], "bucket": [10, 12], "basic": [10, 23], "histogram": 10, "result": [10, 12, 16], "statist": 10, "real": 10, "ctr": 10, "num_po": 10, "num_exampl": 10, "final": 10, "calibr": 10, "pre": [10, 12, 13], "cali": 10, "wai": [10, 20], "within": 10, "suffici": [10, 19, 22], "That": 10, "fine": 10, "grain": 10, "modul": [10, 13, 17, 23], "theoret": 10, "layer": 10, "uncalibr": 10, "befor": [10, 12, 17, 24], "sigmoid": 10, "calibart": 10, "pass": [10, 17, 19, 22], "lower": 10, "bound": [10, 15], "calibration_target": 10, "observ": 10, "statisct": 10, "final_calibrated_predict": 10, "bin_ctr_weight": 10, "bin_ctr": 10, "calibrated_predict": 10, "bin_id": 10, "generic_histogram_binning_calibration_by_feature_cpu": 10, "segment_valu": 10, "segment_length": 10, "num_seg": 10, "bin_boundari": 10, "extens": [10, 21, 22], "ectr": 10, "abov": [10, 13, 15, 20, 21, 23, 24], "accept": [10, 19], "sort": [10, 11, 12], "keyjaggedtensor": 10, "num_bin": 10, "longer": [10, 18, 21], "still": [10, 12], "parambin_ctr_weight_valu": 10, "get_unique_indices_cuda": 11, "linear_indic": 11, "max_indic": 11, "compute_count": 11, "dedupl": 11, "pair": [11, 24], "lru_cache_find_uncached_cuda": 11, "unique_indic": 11, "unique_indices_length": 11, "time_stamp": 11, "lru_stat": 11, "gather_cache_stat": 11, "uvm_cache_stat": 11, "lock_cache_lin": 11, "lxu_cache_locking_count": 11, "lru": [11, 17], "uncach": 11, "them": 11, "host_lxu_cache_slot": 11, "h_in": 11, "cache_set": [11, 17], "linearize_cache_indices_cuda": 11, "b_offset": 11, "linear": 11, "uniqu": [11, 24], "linearize_cache_indices_from_row_idx_cuda": 11, "update_table_indic": 11, "update_row_indic": 11, "format": [11, 22, 23], "inplac": 11, "updat": [11, 12, 13, 17, 19], "lru_cache_populate_cuda": 11, "hash_size_cumsum": 11, "linear_cache_indic": 11, "stochastic_round": [11, 17], "fetch": 11, "insert": [11, 24], "timestep": 11, "lru_cache_populate_byte_cuda": 11, "byte": 11, "element": [11, 15], "direct_mapped_lru_cache_populate_byte_cuda": 11, "lxu_cache_miss_timestamp": 11, "assoc": 11, "variant": [11, 12, 13, 22], "lfu_cache_populate_cuda": 11, "lfu_stat": 11, "lfu": [11, 17], "lfu_cache_populate_byte_cuda": 11, "lxu_cache_lookup_cuda": 11, "invalid_index": 11, "num_uniq_cache_indic": 11, "lxu_cache_locations_output": 11, "look": [11, 17], "up": [11, 17], "slot": 11, "sentinel": 11, "miss": [11, 12], "direct_mapped_lxu_cache_lookup_cuda": 11, "lxu_cache_flush_cuda": 11, "flush": 11, "reset_weight_momentum_cuda": 11, "momentum1_dev": 11, "momentum1_uvm": 11, "momentum1_plac": 11, "momentum1_offset": 11, "pruned_indic": 11, "pruned_indices_offset": 11, "logical_table_id": 11, "buffer_id": 11, "lxu_cache_locking_counter_decrement_cuda": 11, "decrement": 11, "counter": 11, "lxu_cache_locations_update_cuda": 11, "lxu_cache_locations_new": 11, "fbgemm": [12, 13, 16, 18, 19, 20, 22, 23], "reproduc": [12, 13, 19, 20], "platform_nam": 12, "unam": 12, "prefix": [12, 24], "miniconda_prefix": 12, "home": 12, "download": [12, 13], "wget": 12, "q": 12, "anaconda": 12, "miniconda3": 12, "latest": 12, "sh": 12, "o": [12, 13], "p": 12, "load": [12, 15, 23], "shortcut": 12, "bashrc": 12, "command": [12, 13, 21, 22], "against": [12, 14], "env": [12, 13], "name": [12, 13, 20, 21, 23], "python_vers": 12, "3": [12, 15, 16, 17, 20, 23], "12": [12, 15, 17], "upgrad": 12, "pyopenssl": 12, "22": [12, 15], "requir": [12, 13, 14, 15, 17, 22, 23], "recent": [12, 13], "nvcc": 12, "capabl": [12, 14], "5": [12, 15, 17], "done": [12, 13], "bare": 12, "metal": 12, "neither": [12, 20], "nor": [12, 20], "nvidia": 12, "present": [12, 23], "sinc": [12, 15], "setup": [12, 13], "pull": [12, 13, 22], "linux": [12, 13], "distribut": [12, 20], "ubuntu": 12, "04": 12, "11": [12, 13, 15], "entrypoint": 12, "devel": 12, "ubuntu22": 12, "rest": [12, 13], "mai": [12, 13, 15, 20], "construct": [12, 13, 15], "mechan": 12, "full": [12, 13, 24], "nvml": 12, "org": [12, 13, 23], "cuda_vers": 12, "label": 12, "verifi": [12, 13, 21, 23], "cuda_runtim": 12, "h": [12, 16, 21], "libnvidia": [12, 13], "ml": [12, 13], "printenv": 12, "extract": 12, "given": [12, 15, 16], "url": [12, 13], "builder": 12, "blob": 12, "main": [12, 19], "common": [12, 13, 15, 23], "install_cuda": 12, "cudnn_url": 12, "redist": 12, "x86_64": 12, "2": [12, 13, 15, 16, 17, 21, 23, 24], "26_cuda12": 12, "archiv": 12, "tar": 12, "xz": 12, "unpack": 12, "xvf": 12, "amd": [12, 13], "minim": 12, "6": [12, 13, 15], "termin": 12, "while": [12, 22], "come": 12, "reason": [12, 13, 22], "oper": [12, 13, 25], "guid": [12, 23], "disabl": 12, "apt": 12, "prompt": 12, "debian_frontend": 12, "noninteract": 12, "db": 12, "radeon": 12, "amdgpu": 12, "focal": 12, "install_5": 12, "50601": 12, "1_all": 12, "deb": 12, "usecas": 12, "hiplibsdk": 12, "dkm": 12, "hipifi": 12, "hip": 12, "dev": 12, "20": 12, "sysroot": 12, "avoid": 12, "glibcxx": 12, "fbgemm_cpu": 12, "10": [12, 13, 15], "keep": 12, "older": [12, 13], "gcc_version": 12, "15": 12, "7": [12, 13, 15, 16, 17], "forg": [12, 22], "gxx_linux": 12, "64": [12, 15], "sysroot_linux": 12, "17": 12, "binari": [12, 20], "cento": 12, "stream": 12, "becaus": [12, 15], "librari": [12, 22, 25], "refer": [12, 15, 22, 23], "libstdc": 12, "what": [12, 22], "libcxx_path": 12, "print": [12, 13, 17, 23], "objdump": 12, "tc": 12, "grep": 12, "glibc_": 12, "sed": 12, "vu": 12, "cat": 12, "glibcxx_": 12, "possibl": [12, 15, 19, 20], "just": 12, "do": [12, 13, 19], "llvm_version": 12, "libcxx": 12, "outdat": 12, "aarch64": [12, 13], "cannot": 12, "explicitli": 12, "clangxx": 12, "rt": 12, "lib": [12, 13], "ld_library_path": [12, 13], "config": 12, "var": 12, "nvcc_prepend_flag": 12, "correctli": [12, 13, 14, 21, 22], "xcompil": 12, "ccbin": 12, "clangxx_path": 12, "unsupport": 12, "even": [12, 20], "though": [12, 13], "libstd": 12, "being": [12, 22], "mean": [12, 15, 17], "regardless": 12, "scenario": 12, "first": [12, 21, 23, 24], "binpath": 12, "overrid": 12, "exist": [12, 21, 23], "ln": 12, "sf": 12, "path_to_either_gcc_or_clang": 12, "cc": 12, "These": 12, "later": 12, "configur": [12, 21], "stage": [12, 15], "cmake": 12, "click": 12, "hypothesi": [12, 13], "jinja2": 12, "ncurs": 12, "numpi": [12, 13], "scikit": [12, 13], "offici": 12, "homepag": 12, "authorit": [12, 13, 22], "how": [12, 13, 14, 23], "nightli": [12, 13], "rc": 12, "without": [12, 20], "alwai": 12, "reliabl": 12, "arriv": 12, "hour": 12, "than": [12, 15], "window": 12, "silent": 12, "both": [12, 18, 20, 22], "place": [12, 17], "artifact": 12, "select": 12, "dure": [12, 15, 17, 23], "thu": [12, 17], "import": [12, 13, 17, 23, 24], "much": [12, 21], "determinist": 12, "torch": [12, 13, 16, 17, 22, 23], "whl": [12, 13], "cu121": [12, 13], "rocm5": [12, 13], "ensur": [12, 13, 19], "properli": 12, "__version__": 12, "minimum": [12, 21, 22, 23], "cuda_cmake_macro": 12, "txt": [12, 14, 22, 24], "tag": [12, 21, 24], "fbgemm_vers": 12, "v0": 12, "fbgemm_": 12, "addit": [12, 14, 15, 16], "flow": 12, "state": 12, "becom": 12, "stale": 12, "problem": 12, "re": [12, 13], "attempt": 12, "failur": [12, 13], "clear": [12, 19], "py": [12, 13, 14, 22, 23], "clean": [12, 22], "must": [12, 13, 14, 15, 17, 20, 24], "package_nam": 12, "fbgemm_gpu_": 12, "convent": 12, "major": 12, "minor": 12, "py312": 12, "python_tag": 12, "determin": [12, 15], "processor": 12, "arch": 12, "python_plat_nam": 12, "manylinux2014_": 12, "maco": 12, "macosx_10_9_": 12, "arm64": 12, "macosx_11_0_": 12, "win_": 12, "cpu_onli": 12, "flag": [12, 22], "bdist_wheel": 12, "package_vari": 12, "plat": 12, "instead": [12, 22], "cxxprefix": 12, "presum": 12, "made": [12, 22], "presenc": 12, "similar": [12, 15, 17], "enabl": [12, 14], "been": [12, 21], "unabl": 12, "cudacxx": 12, "cuda_bin_path": 12, "cub": 12, "applic": [12, 17, 21, 23], "cub_dir": 12, "header": [12, 21, 24], "cudnn_include_dir": 12, "cudnn_librari": 12, "nvml_lib_path": 12, "sm70": [12, 13], "80": 12, "v100": [12, 13], "a100": [12, 13], "cuda_arch_list": 12, "unset": 12, "torch_cuda_arch_list": 12, "preced": 12, "dtorch_cuda_arch_list": 12, "rocm_path": 12, "pytorch_rocm_arch": 12, "gfx906": 12, "gfx908": 12, "gfx90a": 12, "wiki": 12, "gentoo": 12, "list": [12, 15, 16, 17, 20, 21, 23], "rocminfo": 12, "gfx": 12, "dhip_root_dir": 12, "dtorch_use_hip_dsa": 12, "complet": [12, 19, 22], "correct": 12, "lot": 12, "jinja": 12, "instanti": 12, "sure": [12, 19, 21, 23], "accident": 12, "cours": 12, "fbgemm_gpu_lib_path": 12, "fbgemm_gpu_pi": [12, 13], "defin": [12, 15, 21], "nm": 12, "gdcu": 12, "referenc": 12, "certain": 12, "gdc": 12, "merge_pooled_embed": [12, 13], "isol": [13, 22], "build": [13, 14, 21, 23, 25], "sm80": 13, "respect": 13, "other": [13, 15, 20, 21, 22, 23], "scratch": 13, "guarante": 13, "especi": 13, "displai": [13, 24], "smi": 13, "515": 13, "76": 13, "persist": 13, "bu": [13, 24], "id": 13, "disp": 13, "volatil": 13, "uncorr": 13, "ecc": 13, "fan": 13, "temp": 13, "perf": 13, "pwr": 13, "usag": [13, 22, 23], "cap": 13, "memori": [13, 17, 25], "util": [13, 25], "mig": 13, "a10g": 13, "00000000": 13, "00": 13, "1e": 13, "31c": 13, "p0": 13, "59w": 13, "300w": 13, "0mib": 13, "23028mib": 13, "gi": 13, "ci": 13, "pid": 13, "No": 13, "expos": 13, "onc": [13, 19], "imag": 13, "launch": 13, "alreadi": [13, 19, 21, 23], "toolkit": 13, "interfac": 13, "concis": 13, "info": [13, 21, 23], "dieedg": 13, "avgpwr": 13, "sclk": 13, "mclk": 13, "pwrcap": 13, "vram": 13, "33": 13, "0c": 13, "37": 13, "0w": 13, "300mhz": 13, "1200mhz": 13, "auto": [13, 22], "290": 13, "32": 13, "39": 13, "log": 13, "difficult": 13, "relev": [13, 21], "link": [13, 22], "encount": 13, "signatur": [13, 22], "traceback": 13, "last": 13, "root": [13, 19], "miniconda": 13, "mycondaenv": 13, "site": 13, "_op": [13, 22], "line": [13, 23, 24], "565": 13, "__getattr__": 13, "overload_nam": 13, "_c": 13, "_jit_get_oper": 13, "qualified_op_nam": 13, "runtimeerror": 13, "except": [13, 21, 23], "wa": 13, "string": [13, 24], "post47": 13, "py3": 13, "egg": 13, "__init__": [13, 23], "21": 13, "_fbgemm_gpu_doc": 13, "noqa": 13, "f401": 13, "e402": 13, "18": 13, "569": 13, "rais": [13, 23], "attributeerror": [13, 23], "_opnamespac": 13, "object": [13, 15], "attribut": [13, 23], "cli": 13, "main_run": 13, "execut": [13, 14], "47": 13, "_zn6fbgemm48floatorhalftofusednbitrowwisequantizedsbhalfavx2itli2eeevpkt_miph": 13, "appear": 13, "libtorch": 13, "visibl": 13, "incorrectli": [13, 22], "declar": [13, 21], "were": [13, 16], "pr": [13, 21, 22, 23], "1618": 13, "former": 13, "resolv": 13, "manual": [13, 21], "latter": 13, "seriou": 13, "tha": 13, "develop": [13, 22], "bench": 14, "good": [14, 20], "instal": [14, 22, 25], "pip": [14, 22], "pytest": 14, "rsx": 14, "ignor": [14, 17, 22], "pytestcollectionwarn": 14, "split_table_batched_embeddings_test": 14, "quantize_ops_test": 14, "sparse_ops_test": 14, "split_embedding_inference_converter_test": 14, "mode": [14, 17], "cuda_visible_devic": 14, "debug": 14, "cuda_launch_block": 14, "fbgemm_test_with_rocm": 14, "hip_launch_block": 14, "split_table_batched_embeddings_benchmark": 14, "purpos": [15, 16, 17, 20], "handl": 15, "consecut": 15, "nestedtensor": 15, "raggedtensor": 15, "tensorflow": 15, "notabl": 15, "sentenc": 15, "repres": 15, "maxlength": 15, "2d": [15, 16, 17, 23], "numel": 15, "greatest": 15, "divisor": 15, "smallest": 15, "sub": 15, "exclud": 15, "partit": 15, "impli": [15, 20], "denot": [15, 21, 23], "offest": 15, "outer": 15, "would": 15, "begin": 15, "maximum": [15, 16, 23], "between": [15, 21, 22, 24], "normal": 15, "densor": 15, "form": [15, 20], "figur": 15, "below": 15, "show": [15, 22], "accomod": 15, "logic": [15, 21], "At": [15, 21, 22, 23], "multipl": [15, 16, 17, 23, 25], "hadamard": 15, "product": [15, 20], "involv": 15, "bmatrix": 15, "rightarrow": 15, "16": 15, "25": 15, "36": 15, "49": 15, "81": 15, "50": 15, "operand": 15, "word": 15, "ax": 15, "properti": 15, "hold": 15, "elementwis": [15, 16], "equival": 15, "start": [15, 16, 23, 24], "dim": 15, "onto": 15, "part": 15, "everi": 15, "those": [15, 16, 19, 23], "converson": 15, "could": 15, "lead": 15, "read": [15, 17], "relat": 15, "smaller": 15, "expect": 15, "happen": 15, "give": 15, "situat": 15, "like": 15, "dense_tensor": 15, "jagged_tensor": 15, "break": 15, "exact": 15, "usual": 15, "1d": [16, 17, 23], "area": 16, "outsid": 16, "coverag": 16, "total": [16, 17], "identit": 16, "add": [16, 19, 21, 22, 23], "structur": 16, "jagged_dense_dense_elementwise_add_jagged_output": 16, "y_0": 16, "y_1": 16, "multipli": [16, 17], "max_n": 16, "matmul": 16, "stacked_jagged_1d_to_dens": 16, "arg": [16, 23], "kwarg": 16, "stacked_jagged_2d_to_dens": 16, "split_table_batched_embeddings_op": 17, "splittablebatchedembeddingbagscodegen": 17, "embedding_spec": 17, "feature_table_map": 17, "none": 17, "cache_algorithm": 17, "cachealgorithm": 17, "cache_load_factor": 17, "cache_reserved_memori": 17, "cache_precis": 17, "weights_precis": 17, "enforce_hbm": 17, "optimtyp": 17, "exact_sgd": 17, "record_cache_metr": 17, "gradient_clip": 17, "max_gradi": 17, "learning_r": 17, "01": 17, "ep": 17, "0e": 17, "momentum": 17, "weight_decai": 17, "weight_decay_mod": 17, "weightdecaymod": 17, "eta": 17, "001": 17, "beta1": 17, "beta2": 17, "999": 17, "poolingmod": 17, "boundscheckmod": 17, "sourc": [17, 19, 20, 21, 22, 23], "backward": 17, "embeddingloc": 17, "computedevic": 17, "spec": 17, "placement": 17, "lxu": 17, "algorithm": 17, "capac": 17, "reserv": [17, 20], "hbm": 17, "adam": 17, "exact_adagrad": 17, "exact_rowwise_adagrad": 17, "lamb": 17, "lars_sgd": 17, "partial_rowwise_adam": 17, "partial_rowwise_lamb": 17, "sgd": 17, "recordcachemetr": 17, "record": 17, "hit": 17, "request": [17, 18, 22], "record_cache_miss_count": 17, "metric": 17, "record_tablewise_cache_miss": 17, "stochast": 17, "round": 17, "gradient": 17, "clip": 17, "learn": 17, "rate": 17, "epsilon": 17, "adagrad": 17, "lar": 17, "decai": 17, "l2": 17, "decoupl": 17, "pool": [17, 25], "boundari": 17, "fatal": 17, "conatin": 17, "column": 17, "feature_requires_grad": 17, "split_table_batched_embeddings_ops_common": 17, "split_table_batched_embeddings_ops_train": 17, "init_embedding_weights_uniform": 17, "split_embedding_weight": 17, "9426": 17, "7046": 17, "4214": 17, "0419": 17, "1331": 17, "7856": 17, "8124": 17, "2021": 17, "5771": 17, "5911": 17, "7792": 17, "1068": 17, "6203": 17, "4813": 17, "1677": 17, "4790": 17, "5587": 17, "0941": 17, "5754": 17, "3475": 17, "8952": 17, "1964": 17, "0810": 17, "4174": 17, "2513": 17, "4039": 17, "3775": 17, "3273": 17, "5399": 17, "0229": 17, "1455": 17, "8770": 17, "9520": 17, "4593": 17, "7169": 17, "6307": 17, "1765": 17, "8757": 17, "8614": 17, "2051": 17, "0603": 17, "9980": 17, "7958": 17, "5826": 17, "long": 17, "13": 17, "5197": 17, "2957": 17, "3578": 17, "1487": 17, "4873": 17, "3044": 17, "9801": 17, "2769": 17, "7164": 17, "8528": 17, "7159": 17, "6719": 17, "0784": 17, "2016": 17, "2176": 17, "1988": 17, "3825": 17, "5008": 17, "8991": 17, "1405": 17, "2637": 17, "9427": 17, "8902": 17, "3754": 17, "5013": 17, "6105": 17, "9968": 17, "3057": 17, "7621": 17, "9821": 17, "7314": 17, "6195": 17, "grad_fn": 17, "cppnode": 17, "splitlookupfunction_sgd_op": 17, "question": 18, "concern": 18, "discuss": 18, "kick": 18, "regard": 18, "feel": 18, "free": 18, "reach": 18, "easi": 19, "transpar": 19, "describ": 19, "activ": 19, "welcom": [19, 25], "your": [19, 22, 23], "repositori": 19, "branch": 19, "ve": 19, "chang": [19, 21, 23], "api": [19, 21, 22, 23], "suit": 19, "lint": 19, "haven": 19, "submit": [19, 21, 23], "facebook": [19, 20, 25], "open": 19, "track": 19, "public": [19, 22], "bug": 19, "descript": [19, 21, 22, 23, 24], "abl": 19, "bounti": 19, "safe": 19, "disclosur": 19, "secur": 19, "go": 19, "outlin": 19, "By": 19, "agre": 19, "tree": 19, "claus": 20, "bsd": 20, "softwar": 20, "copyright": 20, "inc": 20, "affili": 20, "right": [20, 24], "redistribut": 20, "modif": 20, "permit": 20, "condit": 20, "met": 20, "retain": 20, "notic": 20, "disclaim": 20, "materi": 20, "contributor": 20, "endors": 20, "promot": 20, "written": 20, "permiss": 20, "BY": 20, "THE": 20, "holder": 20, "AND": 20, "AS": 20, "express": [20, 24], "OR": 20, "warranti": 20, "NOT": 20, "limit": [20, 22], "TO": 20, "OF": 20, "merchant": 20, "FOR": 20, "particular": 20, "IN": 20, "NO": 20, "event": 20, "shall": 20, "BE": 20, "liabl": 20, "indirect": 20, "incident": 20, "special": 20, "exemplari": 20, "consequenti": 20, "damag": 20, "procur": 20, "substitut": 20, "servic": 20, "profit": 20, "busi": 20, "interrupt": 20, "theori": 20, "liabil": 20, "contract": 20, "strict": 20, "tort": 20, "neglig": 20, "aris": 20, "IF": 20, "advis": 20, "SUCH": 20, "javadoc": 21, "style": [21, 23], "comment": [21, 22, 24], "sphinx": [21, 22, 23], "breath": 21, "kept": 21, "cpp": [21, 23, 24], "cu": 21, "cuh": 21, "everyth": 21, "ifndef": 21, "doxygen_this_will_be_skip": 21, "endif": 21, "hidden": 21, "html": [21, 22, 23], "descriptionss": 21, "publish": [21, 23], "docstr": [21, 22, 23], "method": [21, 22, 23], "organ": 21, "yet": 21, "top": [21, 25], "defgroup": 21, "directli": [21, 23], "behavior": [21, 23], "tparam": 21, "param": [21, 23], "thrown": [21, 23], "ingroup": 21, "brief": 21, "short": 21, "example_method": [21, 23], "def": [21, 23], "foo": [21, 23], "lst": [21, 23], "And": [21, 23], "verbatim": [21, 23], "text": [21, 23, 24], "diagram": [21, 23], "unpars": 21, "second": [21, 23], "prev": [21, 23], "usabl": [21, 23], "space": [21, 22, 23], "endcod": 21, "align": [21, 23], "param1": [21, 23], "param2": 21, "bad_alloc": 21, "logic_error": 21, "href": 21, "www": [21, 23], "nl": 21, "cmdlink": 21, "On": [21, 23], "doxygengroup": 21, "rst": [21, 23, 24], "content": [21, 24, 25], "toctre": [21, 23], "ini": 21, "taken": 21, "care": 21, "doc": [21, 22, 23, 24], "local": [21, 23], "netlifi": [21, 22, 23], "preview": [21, 23], "serv": 22, "accompani": 22, "put": 22, "yourself": 22, "shoe": 22, "who": 22, "understand": 22, "live": 22, "easier": 22, "leav": 22, "separ": 22, "task": 22, "pointer": 22, "tool": 22, "graphviz": [22, 24], "assembl": 22, "view": 22, "prepend": 22, "sphinx_lint": 22, "technic": 22, "why": 22, "invok": 22, "occasion": 22, "unresolv": 22, "might": 22, "opt": 22, "pycapsul": 22, "class": [22, 23], "neg": 22, "silenc": 22, "nitpick": 22, "conf": 22, "domain": 22, "deploi": 22, "app": 22, "googl": 23, "c_size_t": 23, "about": 23, "ret": 23, "emplace_back": 23, "item": 23, "valueerror": 23, "14": 23, "restructuredtext": 23, "en": 23, "master": 23, "__": 23, "pep": 23, "0287": 23, "42": 23, "autofunct": 23, "c_ulong": 23, "mani": 23, "attach": 23, "fact": 23, "helper": 23, "codebas": 23, "add_doc": 23, "jag": [23, 25], "forc": 23, "hoc": 23, "the_new_doc_modul": 23, "remain": 23, "render": [23, 24], "anchor": 24, "_doc": 24, "underscor": 24, "_": 24, "There": 24, "elsewher": 24, "ref": 24, "anoth": 24, "literalinclud": 24, "rel": 24, "enclos": 24, "bracket": 24, "skiplin": 24, "suppli": 24, "math": 24, "inlin": 24, "k_": 24, "k_n": 24, "expressino": 24, "int_a": 24, "frac": 24, "2v": 24, "dx": 24, "left": 24, "dv": 24, "_a": 24, "du": 24, "digraph": 24, "altern": 24, "extern": 24, "dot": 24, "examplegraph": 24, "low": 25, "precis": 25, "high": 25, "convolut": 25, "server": 25, "infer": 25, "backend": 25, "caffe2": 25, "collect": 25, "transform": 25, "contribut": 25, "contact": 25, "licens": 25, "experiment": 25, "tbe": 25}, "objects": {"": [[9, 0, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref"], [9, 1, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref::ebits"], [9, 1, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref::exponent_bias"], [9, 1, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref::input"], [9, 1, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref::ncols"], [9, 1, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref::nrows"], [9, 1, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref::output"], [9, 0, 1, "_CPPv423FP8rowwise_to_float_cpuRK6TensorbK7int64_t", "FP8rowwise_to_float_cpu"], [9, 1, 1, "_CPPv423FP8rowwise_to_float_cpuRK6TensorbK7int64_t", "FP8rowwise_to_float_cpu::forward"], [9, 1, 1, "_CPPv423FP8rowwise_to_float_cpuRK6TensorbK7int64_t", "FP8rowwise_to_float_cpu::input"], [9, 1, 1, "_CPPv423FP8rowwise_to_float_cpuRK6TensorbK7int64_t", "FP8rowwise_to_float_cpu::output_dtype"], [0, 0, 1, "_CPPv410FindMinMaxPKfPfPf7int64_t", "FindMinMax"], [0, 1, 1, "_CPPv410FindMinMaxPKfPfPf7int64_t", "FindMinMax::len"], [0, 1, 1, "_CPPv410FindMinMaxPKfPfPf7int64_t", "FindMinMax::m"], [0, 1, 1, "_CPPv410FindMinMaxPKfPfPf7int64_t", "FindMinMax::max"], [0, 1, 1, "_CPPv410FindMinMaxPKfPfPf7int64_t", "FindMinMax::min"], [0, 0, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf"], [0, 2, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf::InputType"], [0, 1, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf::bit_rate"], [0, 1, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf::input"], [0, 1, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf::input_columns"], [0, 1, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf::input_rows"], [0, 1, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf::output"], [9, 0, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref"], [9, 1, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::ebits"], [9, 1, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::exponent_bias"], [9, 1, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::input"], [9, 1, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::max_pos"], [9, 1, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::ncols"], [9, 1, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::nrows"], [9, 1, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::output"], [0, 0, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize"], [0, 2, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::T"], [0, 1, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::dst"], [0, 1, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::len"], [0, 1, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::noise_ratio"], [0, 1, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::num_threads"], [0, 1, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::qparams"], [0, 1, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::src"], [0, 1, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::thread_id"], [0, 0, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise"], [0, 1, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::C"], [0, 1, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::G"], [0, 1, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::K"], [0, 2, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::LAYOUT"], [0, 2, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::T"], [0, 1, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::X"], [0, 1, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::dst"], [0, 1, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::scales"], [0, 1, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::src"], [0, 1, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::zero_points"], [0, 0, 1, "_CPPv46Xor128v", "Xor128"], [9, 0, 1, "_CPPv424_FP8rowwise_to_float_gpuRKN2at6TensorEbK7int64_t", "_FP8rowwise_to_float_gpu"], [9, 1, 1, "_CPPv424_FP8rowwise_to_float_gpuRKN2at6TensorEbK7int64_t", "_FP8rowwise_to_float_gpu::forward"], [9, 1, 1, "_CPPv424_FP8rowwise_to_float_gpuRKN2at6TensorEbK7int64_t", "_FP8rowwise_to_float_gpu::input"], [9, 1, 1, "_CPPv424_FP8rowwise_to_float_gpuRKN2at6TensorEbK7int64_t", "_FP8rowwise_to_float_gpu::output_dtype"], [9, 0, 1, "_CPPv422_bfloat16_to_float_gpuRKN2at6TensorE", "_bfloat16_to_float_gpu"], [9, 1, 1, "_CPPv422_bfloat16_to_float_gpuRKN2at6TensorE", "_bfloat16_to_float_gpu::input"], [9, 0, 1, "_CPPv424_float_to_FP8rowwise_gpuRK6TensorKb", "_float_to_FP8rowwise_gpu"], [9, 1, 1, "_CPPv424_float_to_FP8rowwise_gpuRK6TensorKb", "_float_to_FP8rowwise_gpu::forward"], [9, 1, 1, "_CPPv424_float_to_FP8rowwise_gpuRK6TensorKb", "_float_to_FP8rowwise_gpu::input"], [9, 0, 1, "_CPPv422_float_to_bfloat16_gpuRKN2at6TensorE", "_float_to_bfloat16_gpu"], [9, 1, 1, "_CPPv422_float_to_bfloat16_gpuRKN2at6TensorE", "_float_to_bfloat16_gpu::input"], [9, 0, 1, "_CPPv434_float_to_fused8bitrowwise_cpu_outR6TensorRK6Tensor", "_float_to_fused8bitrowwise_cpu_out"], [9, 1, 1, "_CPPv434_float_to_fused8bitrowwise_cpu_outR6TensorRK6Tensor", "_float_to_fused8bitrowwise_cpu_out::input"], [9, 1, 1, "_CPPv434_float_to_fused8bitrowwise_cpu_outR6TensorRK6Tensor", "_float_to_fused8bitrowwise_cpu_out::output"], [9, 0, 1, "_CPPv430_float_to_fused8bitrowwise_gpuRK6Tensor", "_float_to_fused8bitrowwise_gpu"], [9, 1, 1, "_CPPv430_float_to_fused8bitrowwise_gpuRK6Tensor", "_float_to_fused8bitrowwise_gpu::input"], [9, 0, 1, "_CPPv430_float_to_fusednbitrowwise_gpuRK6TensorK7int64_t", "_float_to_fusednbitrowwise_gpu"], [9, 1, 1, "_CPPv430_float_to_fusednbitrowwise_gpuRK6TensorK7int64_t", "_float_to_fusednbitrowwise_gpu::bit_rate"], [9, 1, 1, "_CPPv430_float_to_fusednbitrowwise_gpuRK6TensorK7int64_t", "_float_to_fusednbitrowwise_gpu::input"], [9, 0, 1, "_CPPv418_float_to_hfp8_gpuRKN2at6TensorEK7int64_tK7int64_tKd", "_float_to_hfp8_gpu"], [9, 1, 1, "_CPPv418_float_to_hfp8_gpuRKN2at6TensorEK7int64_tK7int64_tKd", "_float_to_hfp8_gpu::ebits"], [9, 1, 1, "_CPPv418_float_to_hfp8_gpuRKN2at6TensorEK7int64_tK7int64_tKd", "_float_to_hfp8_gpu::exponent_bias"], [9, 1, 1, "_CPPv418_float_to_hfp8_gpuRKN2at6TensorEK7int64_tK7int64_tKd", "_float_to_hfp8_gpu::input"], [9, 1, 1, "_CPPv418_float_to_hfp8_gpuRKN2at6TensorEK7int64_tK7int64_tKd", "_float_to_hfp8_gpu::max_pos"], [9, 0, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu"], [9, 1, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::bias"], [9, 1, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::bounding_box_size"], [9, 1, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::ebits"], [9, 1, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::input"], [9, 1, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::max_pos"], [9, 1, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::mbits"], [9, 1, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::min_pos"], [9, 0, 1, "_CPPv430_float_to_paddedFP8rowwise_gpuRK6TensorKbK7int64_t", "_float_to_paddedFP8rowwise_gpu"], [9, 1, 1, "_CPPv430_float_to_paddedFP8rowwise_gpuRK6TensorKbK7int64_t", "_float_to_paddedFP8rowwise_gpu::forward"], [9, 1, 1, "_CPPv430_float_to_paddedFP8rowwise_gpuRK6TensorKbK7int64_t", "_float_to_paddedFP8rowwise_gpu::input"], [9, 1, 1, "_CPPv430_float_to_paddedFP8rowwise_gpuRK6TensorKbK7int64_t", "_float_to_paddedFP8rowwise_gpu::row_dim"], [9, 0, 1, "_CPPv434_fused8bitrowwise_to_float_cpu_outR6TensorRK6Tensor", "_fused8bitrowwise_to_float_cpu_out"], [9, 1, 1, "_CPPv434_fused8bitrowwise_to_float_cpu_outR6TensorRK6Tensor", "_fused8bitrowwise_to_float_cpu_out::input"], [9, 1, 1, "_CPPv434_fused8bitrowwise_to_float_cpu_outR6TensorRK6Tensor", "_fused8bitrowwise_to_float_cpu_out::output"], [9, 0, 1, "_CPPv430_fused8bitrowwise_to_float_gpuRKN2at6TensorE", "_fused8bitrowwise_to_float_gpu"], [9, 1, 1, "_CPPv430_fused8bitrowwise_to_float_gpuRKN2at6TensorE", "_fused8bitrowwise_to_float_gpu::input"], [9, 0, 1, "_CPPv440_fused8bitrowwise_to_float_mixed_dim_gpuRKN2at6TensorERKN2at6TensorEK7int64_t", "_fused8bitrowwise_to_float_mixed_dim_gpu"], [9, 1, 1, "_CPPv440_fused8bitrowwise_to_float_mixed_dim_gpuRKN2at6TensorERKN2at6TensorEK7int64_t", "_fused8bitrowwise_to_float_mixed_dim_gpu::D_offsets"], [9, 1, 1, "_CPPv440_fused8bitrowwise_to_float_mixed_dim_gpuRKN2at6TensorERKN2at6TensorEK7int64_t", "_fused8bitrowwise_to_float_mixed_dim_gpu::input"], [9, 1, 1, "_CPPv440_fused8bitrowwise_to_float_mixed_dim_gpuRKN2at6TensorERKN2at6TensorEK7int64_t", "_fused8bitrowwise_to_float_mixed_dim_gpu::output_dtype"], [9, 0, 1, "_CPPv429_fused8bitrowwise_to_half_gpuRKN2at6TensorE", "_fused8bitrowwise_to_half_gpu"], [9, 1, 1, "_CPPv429_fused8bitrowwise_to_half_gpuRKN2at6TensorE", "_fused8bitrowwise_to_half_gpu::input"], [9, 0, 1, "_CPPv449_fused8bitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tKbKb", "_fused8bitrowwise_to_single_or_half_precision_gpu"], [9, 1, 1, "_CPPv449_fused8bitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tKbKb", "_fused8bitrowwise_to_single_or_half_precision_gpu::input"], [9, 1, 1, "_CPPv449_fused8bitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tKbKb", "_fused8bitrowwise_to_single_or_half_precision_gpu::output_dtype"], [9, 1, 1, "_CPPv449_fused8bitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tKbKb", "_fused8bitrowwise_to_single_or_half_precision_gpu::quant_padding_float_type"], [9, 1, 1, "_CPPv449_fused8bitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tKbKb", "_fused8bitrowwise_to_single_or_half_precision_gpu::scale_bias_last"], [9, 0, 1, "_CPPv430_fusednbitrowwise_to_float_gpuRKN2at6TensorEK7int64_t", "_fusednbitrowwise_to_float_gpu"], [9, 1, 1, "_CPPv430_fusednbitrowwise_to_float_gpuRKN2at6TensorEK7int64_t", "_fusednbitrowwise_to_float_gpu::bit_rate"], [9, 1, 1, "_CPPv430_fusednbitrowwise_to_float_gpuRKN2at6TensorEK7int64_t", "_fusednbitrowwise_to_float_gpu::input"], [9, 0, 1, "_CPPv429_fusednbitrowwise_to_half_gpuRKN2at6TensorEK7int64_t", "_fusednbitrowwise_to_half_gpu"], [9, 1, 1, "_CPPv429_fusednbitrowwise_to_half_gpuRKN2at6TensorEK7int64_t", "_fusednbitrowwise_to_half_gpu::bit_rate"], [9, 1, 1, "_CPPv429_fusednbitrowwise_to_half_gpuRKN2at6TensorEK7int64_t", "_fusednbitrowwise_to_half_gpu::input"], [9, 0, 1, "_CPPv449_fusednbitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tK7int64_t", "_fusednbitrowwise_to_single_or_half_precision_gpu"], [9, 1, 1, "_CPPv449_fusednbitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tK7int64_t", "_fusednbitrowwise_to_single_or_half_precision_gpu::bit_rate"], [9, 1, 1, "_CPPv449_fusednbitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tK7int64_t", "_fusednbitrowwise_to_single_or_half_precision_gpu::input"], [9, 1, 1, "_CPPv449_fusednbitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tK7int64_t", "_fusednbitrowwise_to_single_or_half_precision_gpu::output_dtype"], [9, 0, 1, "_CPPv429_half_to_fused8bitrowwise_gpuRK6Tensor", "_half_to_fused8bitrowwise_gpu"], [9, 1, 1, "_CPPv429_half_to_fused8bitrowwise_gpuRK6Tensor", "_half_to_fused8bitrowwise_gpu::input"], [9, 0, 1, "_CPPv429_half_to_fusednbitrowwise_gpuRKN2at6TensorEK7int64_t", "_half_to_fusednbitrowwise_gpu"], [9, 1, 1, "_CPPv429_half_to_fusednbitrowwise_gpuRKN2at6TensorEK7int64_t", "_half_to_fusednbitrowwise_gpu::bit_rate"], [9, 1, 1, "_CPPv429_half_to_fusednbitrowwise_gpuRKN2at6TensorEK7int64_t", "_half_to_fusednbitrowwise_gpu::input"], [9, 0, 1, "_CPPv418_hfp8_to_float_gpuRKN2at6TensorEK7int64_tK7int64_t", "_hfp8_to_float_gpu"], [9, 1, 1, "_CPPv418_hfp8_to_float_gpuRKN2at6TensorEK7int64_tK7int64_t", "_hfp8_to_float_gpu::ebits"], [9, 1, 1, "_CPPv418_hfp8_to_float_gpuRKN2at6TensorEK7int64_tK7int64_t", "_hfp8_to_float_gpu::exponent_bias"], [9, 1, 1, "_CPPv418_hfp8_to_float_gpuRKN2at6TensorEK7int64_tK7int64_t", "_hfp8_to_float_gpu::input"], [9, 0, 1, "_CPPv418_msfp_to_float_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_t", "_msfp_to_float_gpu"], [9, 1, 1, "_CPPv418_msfp_to_float_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_t", "_msfp_to_float_gpu::bias"], [9, 1, 1, "_CPPv418_msfp_to_float_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_t", "_msfp_to_float_gpu::ebits"], [9, 1, 1, "_CPPv418_msfp_to_float_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_t", "_msfp_to_float_gpu::input"], [9, 1, 1, "_CPPv418_msfp_to_float_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_t", "_msfp_to_float_gpu::mbits"], [9, 0, 1, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t", "_paddedFP8rowwise_to_float_gpu"], [9, 1, 1, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t", "_paddedFP8rowwise_to_float_gpu::forward"], [9, 1, 1, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t", "_paddedFP8rowwise_to_float_gpu::input"], [9, 1, 1, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t", "_paddedFP8rowwise_to_float_gpu::output_dtype"], [9, 1, 1, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t", "_paddedFP8rowwise_to_float_gpu::output_last_dim"], [9, 1, 1, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t", "_paddedFP8rowwise_to_float_gpu::row_dim"], [9, 0, 1, "_CPPv449_single_or_half_precision_to_fused8bitrowwise_gpuRK6Tensor", "_single_or_half_precision_to_fused8bitrowwise_gpu"], [9, 1, 1, "_CPPv449_single_or_half_precision_to_fused8bitrowwise_gpuRK6Tensor", "_single_or_half_precision_to_fused8bitrowwise_gpu::input"], [9, 0, 1, "_CPPv449_single_or_half_precision_to_fusednbitrowwise_gpuRK6TensorK7int64_t", "_single_or_half_precision_to_fusednbitrowwise_gpu"], [9, 1, 1, "_CPPv449_single_or_half_precision_to_fusednbitrowwise_gpuRK6TensorK7int64_t", "_single_or_half_precision_to_fusednbitrowwise_gpu::bit_rate"], [9, 1, 1, "_CPPv449_single_or_half_precision_to_fusednbitrowwise_gpuRK6TensorK7int64_t", "_single_or_half_precision_to_fusednbitrowwise_gpu::input"], [8, 0, 1, "_CPPv417all_to_one_deviceNSt6vectorIN2at6TensorEEEN2at6DeviceE", "all_to_one_device"], [8, 1, 1, "_CPPv417all_to_one_deviceNSt6vectorIN2at6TensorEEEN2at6DeviceE", "all_to_one_device::inputTensors"], [8, 1, 1, "_CPPv417all_to_one_deviceNSt6vectorIN2at6TensorEEEN2at6DeviceE", "all_to_one_device::target_device"], [5, 0, 1, "_CPPv431batched_dense_vec_jagged_2d_mulRK6TensorRK6TensorRK6Tensor", "batched_dense_vec_jagged_2d_mul"], [5, 1, 1, "_CPPv431batched_dense_vec_jagged_2d_mulRK6TensorRK6TensorRK6Tensor", "batched_dense_vec_jagged_2d_mul::a_offsets"], [5, 1, 1, "_CPPv431batched_dense_vec_jagged_2d_mulRK6TensorRK6TensorRK6Tensor", "batched_dense_vec_jagged_2d_mul::a_values"], [5, 1, 1, "_CPPv431batched_dense_vec_jagged_2d_mulRK6TensorRK6TensorRK6Tensor", "batched_dense_vec_jagged_2d_mul::v"], [2, 0, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKN3c108optionalI6TensorEERKN3c108optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda"], [2, 1, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKN3c108optionalI6TensorEERKN3c108optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::B_ofsets"], [2, 1, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKN3c108optionalI6TensorEERKN3c108optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::bounds_check_mode"], [2, 1, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKN3c108optionalI6TensorEERKN3c108optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::indices"], [2, 1, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKN3c108optionalI6TensorEERKN3c108optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::max_B"], [2, 1, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKN3c108optionalI6TensorEERKN3c108optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::offsets"], [2, 1, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKN3c108optionalI6TensorEERKN3c108optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::rows_per_table"], [2, 1, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKN3c108optionalI6TensorEERKN3c108optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::warning"], [2, 1, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKN3c108optionalI6TensorEERKN3c108optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::weights"], [5, 0, 1, "_CPPv415dense_to_jaggedRK6TensorRKNSt6vectorI6TensorEEN3c108optionalIN2at6SymIntEEE", "dense_to_jagged"], [5, 1, 1, "_CPPv415dense_to_jaggedRK6TensorRKNSt6vectorI6TensorEEN3c108optionalIN2at6SymIntEEE", "dense_to_jagged::dense"], [5, 1, 1, "_CPPv415dense_to_jaggedRK6TensorRKNSt6vectorI6TensorEEN3c108optionalIN2at6SymIntEEE", "dense_to_jagged::offsets"], [5, 1, 1, "_CPPv415dense_to_jaggedRK6TensorRKNSt6vectorI6TensorEEN3c108optionalIN2at6SymIntEEE", "dense_to_jagged::total_L"], [11, 0, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda"], [11, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::D_offsets"], [11, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::cache_index_table_map"], [11, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::gather_cache_stats"], [11, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::hash_size_cumsum"], [11, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::linear_cache_indices"], [11, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::lru_state"], [11, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::lxu_cache_miss_timestamp"], [11, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::lxu_cache_state"], [11, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::lxu_cache_weights"], [11, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::row_alignment"], [11, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::time_stamp"], [11, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::total_cache_hash_size"], [11, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::uvm_cache_stats"], [11, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::weights"], [11, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::weights_offsets"], [11, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::weights_tys"], [11, 0, 1, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lxu_cache_lookup_cuda"], [11, 1, 1, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lxu_cache_lookup_cuda::gather_cache_stats"], [11, 1, 1, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lxu_cache_lookup_cuda::invalid_index"], [11, 1, 1, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lxu_cache_lookup_cuda::linear_cache_indices"], [11, 1, 1, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lxu_cache_lookup_cuda::lxu_cache_state"], [11, 1, 1, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "direct_mapped_lxu_cache_lookup_cuda::uvm_cache_stats"], [21, 0, 1, "_CPPv4I0_NSt6size_tEE14example_method7int32_t1Tf", "example_method"], [21, 2, 1, "_CPPv4I0_NSt6size_tEE14example_method7int32_t1Tf", "example_method::Alignment"], [21, 2, 1, "_CPPv4I0_NSt6size_tEE14example_method7int32_t1Tf", "example_method::T"], [21, 1, 1, "_CPPv4I0_NSt6size_tEE14example_method7int32_t1Tf", "example_method::param1"], [21, 1, 1, "_CPPv4I0_NSt6size_tEE14example_method7int32_t1Tf", "example_method::param2"], [10, 0, 1, "_CPPv431expand_into_jagged_permute_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_t", "expand_into_jagged_permute_cuda"], [10, 1, 1, "_CPPv431expand_into_jagged_permute_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_t", "expand_into_jagged_permute_cuda::input_offsets"], [10, 1, 1, "_CPPv431expand_into_jagged_permute_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_t", "expand_into_jagged_permute_cuda::output_offsets"], [10, 1, 1, "_CPPv431expand_into_jagged_permute_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_t", "expand_into_jagged_permute_cuda::output_size"], [10, 1, 1, "_CPPv431expand_into_jagged_permute_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_t", "expand_into_jagged_permute_cuda::permute"], [9, 0, 1, "_CPPv437float_or_half_to_fused8bitrowwise_cpuRK6Tensor", "float_or_half_to_fused8bitrowwise_cpu"], [9, 1, 1, "_CPPv437float_or_half_to_fused8bitrowwise_cpuRK6Tensor", "float_or_half_to_fused8bitrowwise_cpu::input"], [9, 0, 1, "_CPPv423float_to_FP8rowwise_cpuRK6Tensorb", "float_to_FP8rowwise_cpu"], [9, 1, 1, "_CPPv423float_to_FP8rowwise_cpuRK6Tensorb", "float_to_FP8rowwise_cpu::forward"], [9, 1, 1, "_CPPv423float_to_FP8rowwise_cpuRK6Tensorb", "float_to_FP8rowwise_cpu::input"], [9, 0, 1, "_CPPv429float_to_fused8bitrowwise_cpuRK6Tensor", "float_to_fused8bitrowwise_cpu"], [9, 1, 1, "_CPPv429float_to_fused8bitrowwise_cpuRK6Tensor", "float_to_fused8bitrowwise_cpu::input"], [9, 0, 1, "_CPPv429fused8bitrowwise_to_float_cpuRK6Tensor", "fused8bitrowwise_to_float_cpu"], [9, 1, 1, "_CPPv429fused8bitrowwise_to_float_cpuRK6Tensor", "fused8bitrowwise_to_float_cpu::input"], [9, 0, 1, "_CPPv437fused8bitrowwise_to_float_or_half_cpuRK6TensorK7int64_tKbKb", "fused8bitrowwise_to_float_or_half_cpu"], [9, 1, 1, "_CPPv437fused8bitrowwise_to_float_or_half_cpuRK6TensorK7int64_tKbKb", "fused8bitrowwise_to_float_or_half_cpu::input"], [9, 1, 1, "_CPPv437fused8bitrowwise_to_float_or_half_cpuRK6TensorK7int64_tKbKb", "fused8bitrowwise_to_float_or_half_cpu::output_dtype"], [9, 1, 1, "_CPPv437fused8bitrowwise_to_float_or_half_cpuRK6TensorK7int64_tKbKb", "fused8bitrowwise_to_float_or_half_cpu::quant_padding_float_type"], [9, 1, 1, "_CPPv437fused8bitrowwise_to_float_or_half_cpuRK6TensorK7int64_tKbKb", "fused8bitrowwise_to_float_or_half_cpu::scale_bias_last"], [9, 0, 1, "_CPPv428fused8bitrowwise_to_half_cpuRK6Tensor", "fused8bitrowwise_to_half_cpu"], [9, 1, 1, "_CPPv428fused8bitrowwise_to_half_cpuRK6Tensor", "fused8bitrowwise_to_half_cpu::input"], [9, 0, 1, "_CPPv429fusednbitrowwise_to_float_cpuRK6TensorK7int64_t", "fusednbitrowwise_to_float_cpu"], [9, 1, 1, "_CPPv429fusednbitrowwise_to_float_cpuRK6TensorK7int64_t", "fusednbitrowwise_to_float_cpu::bit_rate"], [9, 1, 1, "_CPPv429fusednbitrowwise_to_float_cpuRK6TensorK7int64_t", "fusednbitrowwise_to_float_cpu::input"], [9, 0, 1, "_CPPv437fusednbitrowwise_to_float_or_half_cpuRK6TensorK7int64_tK7int64_t", "fusednbitrowwise_to_float_or_half_cpu"], [9, 1, 1, "_CPPv437fusednbitrowwise_to_float_or_half_cpuRK6TensorK7int64_tK7int64_t", "fusednbitrowwise_to_float_or_half_cpu::bit_rate"], [9, 1, 1, "_CPPv437fusednbitrowwise_to_float_or_half_cpuRK6TensorK7int64_tK7int64_t", "fusednbitrowwise_to_float_or_half_cpu::input"], [9, 1, 1, "_CPPv437fusednbitrowwise_to_float_or_half_cpuRK6TensorK7int64_tK7int64_t", "fusednbitrowwise_to_float_or_half_cpu::output_dtype"], [9, 0, 1, "_CPPv428fusednbitrowwise_to_half_cpuRK6TensorK7int64_t", "fusednbitrowwise_to_half_cpu"], [9, 1, 1, "_CPPv428fusednbitrowwise_to_half_cpuRK6TensorK7int64_t", "fusednbitrowwise_to_half_cpu::bit_rate"], [9, 1, 1, "_CPPv428fusednbitrowwise_to_half_cpuRK6TensorK7int64_t", "fusednbitrowwise_to_half_cpu::input"], [10, 0, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu"], [10, 1, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::bin_boundaries"], [10, 1, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::bin_ctr_in_use_after"], [10, 1, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::bin_ctr_weight_value"], [10, 1, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::bin_num_examples"], [10, 1, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::bin_num_positives"], [10, 1, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::logit"], [10, 1, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::num_segments"], [10, 1, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::positive_weight"], [10, 1, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::segment_lengths"], [10, 1, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::segment_value"], [11, 0, 1, "_CPPv423get_unique_indices_cudaN2at6TensorE7int64_tb", "get_unique_indices_cuda"], [11, 1, 1, "_CPPv423get_unique_indices_cudaN2at6TensorE7int64_tb", "get_unique_indices_cuda::compute_count"], [11, 1, 1, "_CPPv423get_unique_indices_cudaN2at6TensorE7int64_tb", "get_unique_indices_cuda::linear_indices"], [11, 1, 1, "_CPPv423get_unique_indices_cudaN2at6TensorE7int64_tb", "get_unique_indices_cuda::max_indices"], [3, 0, 1, "_CPPv415gqa_attn_splitkRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_tKb", "gqa_attn_splitk"], [3, 1, 1, "_CPPv415gqa_attn_splitkRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_tKb", "gqa_attn_splitk::XQ"], [3, 1, 1, "_CPPv415gqa_attn_splitkRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_tKb", "gqa_attn_splitk::cache_K"], [3, 1, 1, "_CPPv415gqa_attn_splitkRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_tKb", "gqa_attn_splitk::cache_V"], [3, 1, 1, "_CPPv415gqa_attn_splitkRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_tKb", "gqa_attn_splitk::num_int4_kv_groups"], [3, 1, 1, "_CPPv415gqa_attn_splitkRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_tKb", "gqa_attn_splitk::num_split_ks"], [3, 1, 1, "_CPPv415gqa_attn_splitkRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_tKb", "gqa_attn_splitk::qk_scale"], [3, 1, 1, "_CPPv415gqa_attn_splitkRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_tKb", "gqa_attn_splitk::seq_positions"], [3, 1, 1, "_CPPv415gqa_attn_splitkRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_tKb", "gqa_attn_splitk::use_tensor_cores"], [9, 0, 1, "_CPPv428half_to_fused8bitrowwise_cpuRK6Tensor", "half_to_fused8bitrowwise_cpu"], [9, 1, 1, "_CPPv428half_to_fused8bitrowwise_cpuRK6Tensor", "half_to_fused8bitrowwise_cpu::input"], [10, 0, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu"], [10, 1, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::bin_ctr_in_use_after"], [10, 1, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::bin_ctr_weight_value"], [10, 1, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::bin_num_examples"], [10, 1, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::bin_num_positives"], [10, 1, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::logit"], [10, 1, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::lower_bound"], [10, 1, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::positive_weight"], [10, 1, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::upper_bound"], [11, 0, 1, "_CPPv419host_lxu_cache_slot7int64_t7int64_t", "host_lxu_cache_slot"], [11, 1, 1, "_CPPv419host_lxu_cache_slot7int64_t7int64_t", "host_lxu_cache_slot::C"], [11, 1, 1, "_CPPv419host_lxu_cache_slot7int64_t7int64_t", "host_lxu_cache_slot::h_in"], [2, 0, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::D_offsets"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::dev_weights"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::fp8_exponent_bias"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::fp8_exponent_bits"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::indice_weights"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::indices"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::lxu_cache_locations"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::lxu_cache_weights"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::max_float16_D"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::max_float32_D"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::max_float8_D"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::max_int2_D"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::max_int4_D"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::max_int8_D"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::offsets"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::output_dtype"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::pooling_mode"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::row_alignment"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::total_D"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::uvm_weights"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::weights_offsets"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::weights_placements"], [2, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::weights_tys"], [2, 0, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::D_offsets"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::dev_weights"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::fp8_exponent_bias"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::fp8_exponent_bits"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::indice_weights"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::indices"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::lxu_cache_locations"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::lxu_cache_weights"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::max_float16_D"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::max_float32_D"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::max_float8_D"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::max_int2_D"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::max_int4_D"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::max_int8_D"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::offsets"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::output_dtype"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::pooling_mode"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::row_alignment"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::total_D"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::uvm_weights"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::weights_offsets"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::weights_placements"], [2, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::weights_tys"], [2, 0, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::D_offsets"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::cache_hash_size_cumsum"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::cache_index_table_map"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::dev_weights"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::fp8_exponent_bias"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::fp8_exponent_bits"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::indice_weights"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::indices"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::lxu_cache_locations"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::lxu_cache_state"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::lxu_cache_weights"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::lxu_state"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::max_float16_D"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::max_float32_D"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::max_float8_D"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::max_int2_D"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::max_int4_D"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::max_int8_D"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::offsets"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::output_dtype"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::pooling_mode"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::row_alignment"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::total_D"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::total_cache_hash_size"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::uvm_weights"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::weights_offsets"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::weights_placements"], [2, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::weights_tys"], [2, 0, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::D_offsets"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::cache_hash_size_cumsum"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::cache_index_table_map"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::dev_weights"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::fp8_exponent_bias"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::fp8_exponent_bits"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::indice_weights"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::indices"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::lxu_cache_locations"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::lxu_cache_state"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::lxu_cache_weights"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::lxu_state"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::max_float16_D"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::max_float32_D"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::max_float8_D"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::max_int2_D"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::max_int4_D"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::max_int8_D"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::offsets"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::output_dtype"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::pooling_mode"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::row_alignment"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::total_D"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::total_cache_hash_size"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::uvm_weights"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::weights_offsets"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::weights_placements"], [2, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::weights_tys"], [7, 0, 1, "_CPPv413is_uvm_tensorRK6Tensor", "is_uvm_tensor"], [7, 1, 1, "_CPPv413is_uvm_tensorRK6Tensor", "is_uvm_tensor::self"], [5, 0, 1, "_CPPv418jagged_1d_to_dense6Tensor6TensorN3c106SymIntE7int64_t", "jagged_1d_to_dense"], [5, 1, 1, "_CPPv418jagged_1d_to_dense6Tensor6TensorN3c106SymIntE7int64_t", "jagged_1d_to_dense::max_L"], [5, 1, 1, "_CPPv418jagged_1d_to_dense6Tensor6TensorN3c106SymIntE7int64_t", "jagged_1d_to_dense::offsets"], [5, 1, 1, "_CPPv418jagged_1d_to_dense6Tensor6TensorN3c106SymIntE7int64_t", "jagged_1d_to_dense::padding_value"], [5, 1, 1, "_CPPv418jagged_1d_to_dense6Tensor6TensorN3c106SymIntE7int64_t", "jagged_1d_to_dense::values"], [5, 0, 1, "_CPPv418jagged_2d_to_dense6Tensor6TensorN3c106SymIntE", "jagged_2d_to_dense"], [5, 1, 1, "_CPPv418jagged_2d_to_dense6Tensor6TensorN3c106SymIntE", "jagged_2d_to_dense::max_sequence_length"], [5, 1, 1, "_CPPv418jagged_2d_to_dense6Tensor6TensorN3c106SymIntE", "jagged_2d_to_dense::offsets"], [5, 1, 1, "_CPPv418jagged_2d_to_dense6Tensor6TensorN3c106SymIntE", "jagged_2d_to_dense::values"], [5, 0, 1, "_CPPv428jagged_dense_elementwise_addRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add"], [5, 1, 1, "_CPPv428jagged_dense_elementwise_addRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add::x_offsets"], [5, 1, 1, "_CPPv428jagged_dense_elementwise_addRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add::x_values"], [5, 1, 1, "_CPPv428jagged_dense_elementwise_addRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add::y"], [5, 0, 1, "_CPPv442jagged_dense_elementwise_add_jagged_outputRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output"], [5, 1, 1, "_CPPv442jagged_dense_elementwise_add_jagged_outputRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output::x_offsets"], [5, 1, 1, "_CPPv442jagged_dense_elementwise_add_jagged_outputRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output::x_values"], [5, 1, 1, "_CPPv442jagged_dense_elementwise_add_jagged_outputRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output::y"], [5, 0, 1, "_CPPv447jagged_dense_elementwise_add_jagged_output_cudaRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output_cuda"], [5, 1, 1, "_CPPv447jagged_dense_elementwise_add_jagged_output_cudaRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output_cuda::x_offsets"], [5, 1, 1, "_CPPv447jagged_dense_elementwise_add_jagged_output_cudaRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output_cuda::x_values"], [5, 1, 1, "_CPPv447jagged_dense_elementwise_add_jagged_output_cudaRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output_cuda::y"], [5, 0, 1, "_CPPv428jagged_dense_elementwise_mulRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_mul"], [5, 1, 1, "_CPPv428jagged_dense_elementwise_mulRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_mul::x_offsets"], [5, 1, 1, "_CPPv428jagged_dense_elementwise_mulRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_mul::x_values"], [5, 1, 1, "_CPPv428jagged_dense_elementwise_mulRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_mul::y"], [5, 0, 1, "_CPPv422jagged_to_padded_denseRK6TensorRKNSt6vectorI6TensorEEKN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense"], [5, 1, 1, "_CPPv422jagged_to_padded_denseRK6TensorRKNSt6vectorI6TensorEEKN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense::max_lengths"], [5, 1, 1, "_CPPv422jagged_to_padded_denseRK6TensorRKNSt6vectorI6TensorEEKN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense::offsets"], [5, 1, 1, "_CPPv422jagged_to_padded_denseRK6TensorRKNSt6vectorI6TensorEEKN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense::padding_value"], [5, 1, 1, "_CPPv422jagged_to_padded_denseRK6TensorRKNSt6vectorI6TensorEEKN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense::values"], [5, 0, 1, "_CPPv430jagged_to_padded_dense_forwardRK6TensorRKNSt6vectorI6TensorEEN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense_forward"], [5, 1, 1, "_CPPv430jagged_to_padded_dense_forwardRK6TensorRKNSt6vectorI6TensorEEN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense_forward::max_lengths"], [5, 1, 1, "_CPPv430jagged_to_padded_dense_forwardRK6TensorRKNSt6vectorI6TensorEEN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense_forward::offsets"], [5, 1, 1, "_CPPv430jagged_to_padded_dense_forwardRK6TensorRKNSt6vectorI6TensorEEN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense_forward::padding_value"], [5, 1, 1, "_CPPv430jagged_to_padded_dense_forwardRK6TensorRKNSt6vectorI6TensorEEN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense_forward::values"], [11, 0, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda"], [11, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::D_offsets"], [11, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::cache_hash_size_cumsum"], [11, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::cache_index_table_map"], [11, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::lfu_state"], [11, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::linear_cache_indices"], [11, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::lxu_cache_state"], [11, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::lxu_cache_weights"], [11, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::row_alignment"], [11, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::total_cache_hash_size"], [11, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::weights"], [11, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::weights_offsets"], [11, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::weights_tys"], [11, 0, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda"], [11, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::D_offsets"], [11, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::cache_hash_size_cumsum"], [11, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::cache_index_table_map"], [11, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::lfu_state"], [11, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::linear_cache_indices"], [11, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::lxu_cache_state"], [11, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::lxu_cache_weights"], [11, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::stochastic_rounding"], [11, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::total_cache_hash_size"], [11, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::weights"], [11, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::weights_offsets"], [11, 0, 1, "_CPPv428linearize_cache_indices_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN3c108optionalIN2at6TensorEEEK7int64_t", "linearize_cache_indices_cuda"], [11, 1, 1, "_CPPv428linearize_cache_indices_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN3c108optionalIN2at6TensorEEEK7int64_t", "linearize_cache_indices_cuda::B_offsets"], [11, 1, 1, "_CPPv428linearize_cache_indices_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN3c108optionalIN2at6TensorEEEK7int64_t", "linearize_cache_indices_cuda::cache_hash_size_cumsum"], [11, 1, 1, "_CPPv428linearize_cache_indices_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN3c108optionalIN2at6TensorEEEK7int64_t", "linearize_cache_indices_cuda::indices"], [11, 1, 1, "_CPPv428linearize_cache_indices_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN3c108optionalIN2at6TensorEEEK7int64_t", "linearize_cache_indices_cuda::max_B"], [11, 1, 1, "_CPPv428linearize_cache_indices_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN3c108optionalIN2at6TensorEEEK7int64_t", "linearize_cache_indices_cuda::offsets"], [11, 0, 1, "_CPPv441linearize_cache_indices_from_row_idx_cudaN2at6TensorEN2at6TensorEN2at6TensorE", "linearize_cache_indices_from_row_idx_cuda"], [11, 1, 1, "_CPPv441linearize_cache_indices_from_row_idx_cudaN2at6TensorEN2at6TensorEN2at6TensorE", "linearize_cache_indices_from_row_idx_cuda::cache_hash_size_cumsum"], [11, 1, 1, "_CPPv441linearize_cache_indices_from_row_idx_cudaN2at6TensorEN2at6TensorEN2at6TensorE", "linearize_cache_indices_from_row_idx_cuda::update_row_indices"], [11, 1, 1, "_CPPv441linearize_cache_indices_from_row_idx_cudaN2at6TensorEN2at6TensorEN2at6TensorE", "linearize_cache_indices_from_row_idx_cuda::update_table_indices"], [11, 0, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE", "lru_cache_find_uncached_cuda"], [11, 1, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE", "lru_cache_find_uncached_cuda::gather_cache_stats"], [11, 1, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE", "lru_cache_find_uncached_cuda::lock_cache_line"], [11, 1, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE", "lru_cache_find_uncached_cuda::lru_state"], [11, 1, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE", "lru_cache_find_uncached_cuda::lxu_cache_locking_counter"], [11, 1, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE", "lru_cache_find_uncached_cuda::lxu_cache_state"], [11, 1, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE", "lru_cache_find_uncached_cuda::max_indices"], [11, 1, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE", "lru_cache_find_uncached_cuda::time_stamp"], [11, 1, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE", "lru_cache_find_uncached_cuda::unique_indices"], [11, 1, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE", "lru_cache_find_uncached_cuda::unique_indices_length"], [11, 1, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE", "lru_cache_find_uncached_cuda::uvm_cache_stats"], [11, 0, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda"], [11, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::D_offsets"], [11, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::cache_index_table_map"], [11, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::gather_cache_stats"], [11, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::hash_size_cumsum"], [11, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::linear_cache_indices"], [11, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::lru_state"], [11, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::lxu_cache_state"], [11, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::lxu_cache_weights"], [11, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::row_alignment"], [11, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::time_stamp"], [11, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::total_cache_hash_size"], [11, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::uvm_cache_stats"], [11, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::weights"], [11, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::weights_offsets"], [11, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::weights_tys"], [11, 0, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda"], [11, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::D_offsets"], [11, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::cache_index_table_map"], [11, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::gather_cache_stats"], [11, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::hash_size_cumsum"], [11, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::linear_cache_indices"], [11, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::lock_cache_line"], [11, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::lru_state"], [11, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::lxu_cache_locking_counter"], [11, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::lxu_cache_state"], [11, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::lxu_cache_weights"], [11, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::stochastic_rounding"], [11, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::time_stamp"], [11, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::total_cache_hash_size"], [11, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::uvm_cache_stats"], [11, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::weights"], [11, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE", "lru_cache_populate_cuda::weights_offsets"], [11, 0, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda"], [11, 1, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::D_offsets"], [11, 1, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::cache_hash_size_cumsum"], [11, 1, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::cache_index_table_map"], [11, 1, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::lxu_cache_state"], [11, 1, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::lxu_cache_weights"], [11, 1, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::stochastic_rounding"], [11, 1, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::total_D"], [11, 1, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::uvm_weights"], [11, 1, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::weights_offsets"], [11, 0, 1, "_CPPv431lxu_cache_locations_update_cudaN2at6TensorEN2at6TensorEN3c108optionalIN2at6TensorEEE", "lxu_cache_locations_update_cuda"], [11, 1, 1, "_CPPv431lxu_cache_locations_update_cudaN2at6TensorEN2at6TensorEN3c108optionalIN2at6TensorEEE", "lxu_cache_locations_update_cuda::lxu_cache_locations"], [11, 1, 1, "_CPPv431lxu_cache_locations_update_cudaN2at6TensorEN2at6TensorEN3c108optionalIN2at6TensorEEE", "lxu_cache_locations_update_cuda::lxu_cache_locations_new"], [11, 1, 1, "_CPPv431lxu_cache_locations_update_cudaN2at6TensorEN2at6TensorEN3c108optionalIN2at6TensorEEE", "lxu_cache_locations_update_cuda::num_uniq_cache_indices"], [11, 0, 1, "_CPPv440lxu_cache_locking_counter_decrement_cudaN2at6TensorEN2at6TensorE", "lxu_cache_locking_counter_decrement_cuda"], [11, 1, 1, "_CPPv440lxu_cache_locking_counter_decrement_cudaN2at6TensorEN2at6TensorE", "lxu_cache_locking_counter_decrement_cuda::lxu_cache_locations"], [11, 1, 1, "_CPPv440lxu_cache_locking_counter_decrement_cudaN2at6TensorEN2at6TensorE", "lxu_cache_locking_counter_decrement_cuda::lxu_cache_locking_counter"], [11, 0, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda"], [11, 1, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::gather_cache_stats"], [11, 1, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::invalid_index"], [11, 1, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::linear_cache_indices"], [11, 1, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::lxu_cache_locations_output"], [11, 1, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::lxu_cache_state"], [11, 1, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::num_uniq_cache_indices"], [11, 1, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::uvm_cache_stats"], [7, 0, 1, "_CPPv422new_host_mapped_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_host_mapped_tensor"], [7, 1, 1, "_CPPv422new_host_mapped_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_host_mapped_tensor::self"], [7, 1, 1, "_CPPv422new_host_mapped_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_host_mapped_tensor::sizes"], [7, 0, 1, "_CPPv418new_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_managed_tensor"], [7, 1, 1, "_CPPv418new_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_managed_tensor::self"], [7, 1, 1, "_CPPv418new_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_managed_tensor::sizes"], [7, 0, 1, "_CPPv423new_managed_tensor_metaRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_managed_tensor_meta"], [7, 1, 1, "_CPPv423new_managed_tensor_metaRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_managed_tensor_meta::self"], [7, 1, 1, "_CPPv423new_managed_tensor_metaRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_managed_tensor_meta::sizes"], [7, 0, 1, "_CPPv418new_unified_tensorRK6TensorRKNSt6vectorINSt7int64_tEEEb", "new_unified_tensor"], [7, 1, 1, "_CPPv418new_unified_tensorRK6TensorRKNSt6vectorINSt7int64_tEEEb", "new_unified_tensor::is_host_mapped"], [7, 1, 1, "_CPPv418new_unified_tensorRK6TensorRKNSt6vectorINSt7int64_tEEEb", "new_unified_tensor::self"], [7, 1, 1, "_CPPv418new_unified_tensorRK6TensorRKNSt6vectorINSt7int64_tEEEb", "new_unified_tensor::sizes"], [7, 0, 1, "_CPPv426new_vanilla_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_vanilla_managed_tensor"], [7, 1, 1, "_CPPv426new_vanilla_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_vanilla_managed_tensor::self"], [7, 1, 1, "_CPPv426new_vanilla_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_vanilla_managed_tensor::sizes"], [4, 0, 1, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t", "padding_fused_tbe_input_combine_cpu"], [4, 1, 1, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t", "padding_fused_tbe_input_combine_cpu::batch_size"], [4, 1, 1, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t", "padding_fused_tbe_input_combine_cpu::include_last_offsets"], [4, 1, 1, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t", "padding_fused_tbe_input_combine_cpu::indices_list"], [4, 1, 1, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t", "padding_fused_tbe_input_combine_cpu::offsets_list"], [4, 1, 1, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t", "padding_fused_tbe_input_combine_cpu::per_sample_weights"], [8, 0, 1, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad"], [8, 1, 1, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad::inv_offset_dim_list"], [8, 1, 1, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad::inv_permute_list"], [8, 1, 1, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad::offset_dim_list"], [8, 1, 1, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad::permute_list"], [8, 1, 1, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad::pooled_embs"], [8, 0, 1, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_cpu"], [8, 1, 1, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_cpu::inv_offset_dim_list"], [8, 1, 1, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_cpu::inv_permute_list"], [8, 1, 1, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_cpu::offset_dim_list"], [8, 1, 1, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_cpu::permute_list"], [8, 1, 1, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_cpu::pooled_embs"], [8, 0, 1, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_gpu"], [8, 1, 1, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_gpu::inv_offset_dim_list"], [8, 1, 1, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_gpu::inv_permute_list"], [8, 1, 1, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_gpu::offset_dim_list"], [8, 1, 1, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_gpu::permute_list"], [8, 1, 1, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_gpu::pooled_embs"], [8, 0, 1, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_cpu"], [8, 1, 1, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_cpu::inv_offset_dim_list"], [8, 1, 1, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_cpu::inv_permute_list"], [8, 1, 1, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_cpu::offset_dim_list"], [8, 1, 1, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_cpu::permute_list"], [8, 1, 1, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_cpu::pooled_embs"], [8, 0, 1, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_gpu"], [8, 1, 1, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_gpu::inv_offset_dim_list"], [8, 1, 1, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_gpu::inv_permute_list"], [8, 1, 1, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_gpu::offset_dim_list"], [8, 1, 1, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_gpu::permute_list"], [8, 1, 1, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_gpu::pooled_embs"], [8, 0, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl"], [8, 1, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl::allow_duplicates"], [8, 1, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl::inv_offset_dim_list"], [8, 1, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl::inv_permute_list"], [8, 1, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl::offset_dim_list"], [8, 1, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl::permute_list"], [8, 1, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl::pooled_embs"], [8, 0, 1, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_cpu"], [8, 1, 1, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_cpu::inv_offset_dim_list"], [8, 1, 1, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_cpu::inv_permute_list"], [8, 1, 1, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_cpu::offset_dim_list"], [8, 1, 1, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_cpu::permute_list"], [8, 1, 1, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_cpu::pooled_embs"], [8, 0, 1, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_gpu"], [8, 1, 1, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_gpu::inv_offset_dim_list"], [8, 1, 1, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_gpu::inv_permute_list"], [8, 1, 1, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_gpu::offset_dim_list"], [8, 1, 1, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_gpu::permute_list"], [8, 1, 1, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_gpu::pooled_embs"], [2, 0, 1, "_CPPv423pruned_array_lookup_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cpu"], [2, 1, 1, "_CPPv423pruned_array_lookup_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cpu::index_remappings"], [2, 1, 1, "_CPPv423pruned_array_lookup_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cpu::index_remappings_offsets"], [2, 1, 1, "_CPPv423pruned_array_lookup_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cpu::indices"], [2, 1, 1, "_CPPv423pruned_array_lookup_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cpu::offsets"], [2, 0, 1, "_CPPv424pruned_array_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cuda"], [2, 1, 1, "_CPPv424pruned_array_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cuda::index_remappings"], [2, 1, 1, "_CPPv424pruned_array_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cuda::index_remappings_offsets"], [2, 1, 1, "_CPPv424pruned_array_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cuda::indices"], [2, 1, 1, "_CPPv424pruned_array_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cuda::offsets"], [2, 0, 1, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_insert_unweighted_cpu"], [2, 1, 1, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_insert_unweighted_cpu::dense_indices"], [2, 1, 1, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_insert_unweighted_cpu::hash_table"], [2, 1, 1, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_insert_unweighted_cpu::hash_table_offsets"], [2, 1, 1, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_insert_unweighted_cpu::indices"], [2, 1, 1, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_insert_unweighted_cpu::offsets"], [2, 0, 1, "_CPPv426pruned_hashmap_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_cuda"], [2, 1, 1, "_CPPv426pruned_hashmap_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_cuda::hash_table"], [2, 1, 1, "_CPPv426pruned_hashmap_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_cuda::hash_table_offsets"], [2, 1, 1, "_CPPv426pruned_hashmap_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_cuda::indices"], [2, 1, 1, "_CPPv426pruned_hashmap_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_cuda::offsets"], [2, 0, 1, "_CPPv436pruned_hashmap_lookup_unweighted_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_unweighted_cpu"], [2, 1, 1, "_CPPv436pruned_hashmap_lookup_unweighted_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_unweighted_cpu::hash_table"], [2, 1, 1, "_CPPv436pruned_hashmap_lookup_unweighted_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_unweighted_cpu::hash_table_offsets"], [2, 1, 1, "_CPPv436pruned_hashmap_lookup_unweighted_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_unweighted_cpu::indices"], [2, 1, 1, "_CPPv436pruned_hashmap_lookup_unweighted_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_unweighted_cpu::offsets"], [6, 0, 1, "_CPPv432recat_embedding_grad_output_cuda6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_cuda"], [6, 1, 1, "_CPPv432recat_embedding_grad_output_cuda6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_cuda::grad_output"], [6, 1, 1, "_CPPv432recat_embedding_grad_output_cuda6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_cuda::num_features_per_rank"], [6, 0, 1, "_CPPv446recat_embedding_grad_output_mixed_D_batch_cudaRK6TensorRK6TensorRK6Tensor", "recat_embedding_grad_output_mixed_D_batch_cuda"], [6, 1, 1, "_CPPv446recat_embedding_grad_output_mixed_D_batch_cudaRK6TensorRK6TensorRK6Tensor", "recat_embedding_grad_output_mixed_D_batch_cuda::cumsum_dim_sum_per_rank"], [6, 1, 1, "_CPPv446recat_embedding_grad_output_mixed_D_batch_cudaRK6TensorRK6TensorRK6Tensor", "recat_embedding_grad_output_mixed_D_batch_cuda::dim_sum_per_rank"], [6, 1, 1, "_CPPv446recat_embedding_grad_output_mixed_D_batch_cudaRK6TensorRK6TensorRK6Tensor", "recat_embedding_grad_output_mixed_D_batch_cuda::grad_output"], [6, 0, 1, "_CPPv439recat_embedding_grad_output_mixed_D_cpuRK6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_mixed_D_cpu"], [6, 1, 1, "_CPPv439recat_embedding_grad_output_mixed_D_cpuRK6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_mixed_D_cpu::dim_sum_per_rank"], [6, 1, 1, "_CPPv439recat_embedding_grad_output_mixed_D_cpuRK6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_mixed_D_cpu::grad_output"], [6, 0, 1, "_CPPv440recat_embedding_grad_output_mixed_D_cudaRK6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_mixed_D_cuda"], [6, 1, 1, "_CPPv440recat_embedding_grad_output_mixed_D_cudaRK6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_mixed_D_cuda::dim_sum_per_rank"], [6, 1, 1, "_CPPv440recat_embedding_grad_output_mixed_D_cudaRK6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_mixed_D_cuda::grad_output"], [0, 0, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::A_SYMMETRIC"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::BIAS_TYPE"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::B_SYMMETRIC"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::DIRECT"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::FUSE_RELU"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::HAS_BIAS"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::Q_GRAN"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::block"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::inp"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::ld_in"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::ld_out"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::out"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::r"], [0, 0, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::A_SYMMETRIC"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::BIAS_TYPE"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::B_SYMMETRIC"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::C_PER_G"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::FUSE_RELU"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::HAS_BIAS"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::Q_GRAN"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::block"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::inp"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::ld_in"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::ld_out"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::out"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::r"], [11, 0, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda"], [11, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::D_offsets"], [11, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::buffer_ids"], [11, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::cache_hash_size_cumsum"], [11, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::dev_weights"], [11, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::logical_table_ids"], [11, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::lxu_cache_state"], [11, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::lxu_cache_weights"], [11, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::momentum1_dev"], [11, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::momentum1_offsets"], [11, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::momentum1_placements"], [11, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::momentum1_uvm"], [11, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::pruned_indices"], [11, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::pruned_indices_offsets"], [11, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::total_cache_hash_size"], [11, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::uvm_weights"], [11, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::weights_offsets"], [11, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::weights_placements"], [4, 0, 1, "_CPPv421tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE", "tbe_input_combine_cpu"], [4, 1, 1, "_CPPv421tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE", "tbe_input_combine_cpu::include_last_offsets"], [4, 1, 1, "_CPPv421tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE", "tbe_input_combine_cpu::indices_list"], [4, 1, 1, "_CPPv421tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE", "tbe_input_combine_cpu::offsets_list"], [4, 1, 1, "_CPPv421tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE", "tbe_input_combine_cpu::per_sample_weights"], [7, 0, 1, "_CPPv419uvm_cuda_mem_adviseRK6Tensor7int64_t", "uvm_cuda_mem_advise"], [7, 1, 1, "_CPPv419uvm_cuda_mem_adviseRK6Tensor7int64_t", "uvm_cuda_mem_advise::cuda_memory_advise"], [7, 1, 1, "_CPPv419uvm_cuda_mem_adviseRK6Tensor7int64_t", "uvm_cuda_mem_advise::self"], [7, 0, 1, "_CPPv427uvm_cuda_mem_prefetch_asyncRK6TensorN3c108optionalI6TensorEE", "uvm_cuda_mem_prefetch_async"], [7, 1, 1, "_CPPv427uvm_cuda_mem_prefetch_asyncRK6TensorN3c108optionalI6TensorEE", "uvm_cuda_mem_prefetch_async::device_t"], [7, 1, 1, "_CPPv427uvm_cuda_mem_prefetch_asyncRK6TensorN3c108optionalI6TensorEE", "uvm_cuda_mem_prefetch_async::self"], [7, 0, 1, "_CPPv424uvm_mem_advice_dont_forkRK6Tensor", "uvm_mem_advice_dont_fork"], [7, 1, 1, "_CPPv424uvm_mem_advice_dont_forkRK6Tensor", "uvm_mem_advice_dont_fork::self"], [7, 0, 1, "_CPPv411uvm_storageRK6Tensor", "uvm_storage"], [7, 1, 1, "_CPPv411uvm_storageRK6Tensor", "uvm_storage::self"], [7, 0, 1, "_CPPv410uvm_to_cpuRK6Tensor", "uvm_to_cpu"], [7, 1, 1, "_CPPv410uvm_to_cpuRK6Tensor", "uvm_to_cpu::self"], [7, 0, 1, "_CPPv416uvm_to_cpu_cloneRK6Tensor", "uvm_to_cpu_clone"], [7, 1, 1, "_CPPv416uvm_to_cpu_cloneRK6Tensor", "uvm_to_cpu_clone::self"], [7, 0, 1, "_CPPv413uvm_to_deviceRK6TensorRK6Tensor", "uvm_to_device"], [7, 1, 1, "_CPPv413uvm_to_deviceRK6TensorRK6Tensor", "uvm_to_device::prototype"], [7, 1, 1, "_CPPv413uvm_to_deviceRK6TensorRK6Tensor", "uvm_to_device::self"], [17, 3, 0, "-", "fbgemm_gpu"]], "fbgemm_gpu.docs.examples": [[23, 4, 1, "", "example_method"]], "fbgemm_gpu.split_table_batched_embeddings_ops": [[17, 4, 1, "", "SplitTableBatchedEmbeddingBagsCodegen"]], "torch.ops.fbgemm": [[16, 4, 1, "", "batched_dense_vec_jagged_2d_mul"], [16, 4, 1, "", "dense_to_jagged"], [16, 4, 1, "", "jagged_1d_to_dense"], [16, 4, 1, "", "jagged_2d_to_dense"], [16, 4, 1, "", "jagged_dense_dense_elementwise_add_jagged_output"], [16, 4, 1, "", "jagged_dense_elementwise_add"], [16, 4, 1, "", "jagged_dense_elementwise_add_jagged_output"], [16, 4, 1, "", "jagged_dense_elementwise_mul"], [16, 4, 1, "", "jagged_to_padded_dense"], [16, 4, 1, "", "stacked_jagged_1d_to_dense"], [16, 4, 1, "", "stacked_jagged_2d_to_dense"]]}, "objtypes": {"0": "cpp:function", "1": "cpp:functionParam", "2": "cpp:templateParam", "3": "py:module", "4": "py:function"}, "objnames": {"0": ["cpp", "function", "C++ function"], "1": ["cpp", "functionParam", "C++ function parameter"], "2": ["cpp", "templateParam", "C++ template parameter"], "3": ["py", "module", "Python module"], "4": ["py", "function", "Python function"]}, "titleterms": {"quantiz": [0, 9], "util": 0, "refer": [0, 24], "implement": 0, "method": 0, "avx": 0, "2": 0, "512": 0, "build": [1, 12, 22], "instruct": [1, 12, 13, 14], "fbgemm": [1, 25], "requir": 1, "hardwar": 1, "softwar": 1, "depend": 1, "asmjit": 1, "cpuinfo": 1, "googletest": 1, "set": [1, 12, 13, 22], "up": [1, 12, 13, 22], "an": [1, 12], "isol": [1, 12], "environ": [1, 12, 13, 14, 22], "instal": [1, 12, 13], "tool": [1, 12], "c": [1, 12, 21, 25], "compil": [1, 12], "other": [1, 12, 24], "librari": [1, 13], "prepar": [1, 12], "linux": 1, "maco": 1, "cmake": 1, "gcc": [1, 12], "issu": [1, 19], "12": 1, "clang": [1, 12], "bazel": 1, "window": 1, "embed": [2, 8, 11, 17], "oper": [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 15, 16, 17], "cuda": [2, 5, 6, 7, 9, 10, 12, 13, 14], "cpu": [2, 5, 6, 9, 10, 12, 13], "experiment": 3, "attent": 3, "combin": [4, 15], "input": 4, "jag": [5, 15, 16], "tensor": [5, 15, 16], "layout": 6, "transform": 6, "memori": 7, "pool": 8, "merg": 8, "permut": 8, "spars": 10, "data": 10, "tabl": [11, 17], "batch": [11, 17], "miniconda": 12, "conda": [12, 13], "onli": [12, 13], "docker": [12, 13], "imag": 12, "cudnn": 12, "rocm": [12, 13, 14], "miopen": 12, "symlink": 12, "pytorch": [12, 13], "through": [12, 13], "pip": [12, 13], "post": [12, 13], "check": [12, 13], "fbgemm_gpu": [12, 13, 14, 22, 25], "packag": [12, 13], "The": 12, "process": 12, "wheel": 12, "variabl": 12, "For": 12, "develop": [12, 25], "undefin": [12, 13], "symbol": [12, 13], "glibc": 12, "version": 12, "compat": 12, "nvidia": 13, "driver": 13, "contain": 13, "runtim": 13, "amdgpu": 13, "python": [13, 23, 25], "public": 13, "pypi": 13, "test": 14, "setup": 14, "run": 14, "variant": 14, "benchmark": 14, "high": 15, "level": 15, "overview": [15, 25], "format": 15, "valu": 15, "offset": 15, "max": 15, "length": 15, "exampl": 15, "arithmet": 15, "convers": 15, "dens": 15, "tbe": 17, "contact": 18, "u": 18, "github": 18, "slack": 18, "contribut": 19, "code": [19, 21, 23, 24], "conduct": 19, "pull": 19, "request": 19, "contributor": 19, "licens": [19, 20], "agreement": 19, "cla": 19, "ad": [21, 23, 24], "document": [21, 22, 23, 24, 25], "gener": [22, 23, 25], "guidelin": 22, "specif": 22, "guid": 22, "toolchain": 22, "lint": 22, "deploy": 22, "preview": 22, "todo": 23, "auto": 23, "sphinx": 24, "pointer": 24, "section": 24, "referenc": 24, "sourc": 24, "latex": 24, "graph": 24, "homepag": 25, "info": 25, "api": 25}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx.ext.todo": 2, "sphinx.ext.viewcode": 1, "sphinx": 57}, "alltitles": {"Test Instructions": [[14, "test-instructions"]], "Setup the FBGEMM_GPU Test Environment": [[14, "setup-the-fbgemm-gpu-test-environment"]], "Running FBGEMM_GPU Tests": [[14, "running-fbgemm-gpu-tests"]], "Testing with the CUDA Variant": [[14, "testing-with-the-cuda-variant"]], "Testing with the ROCm Variant": [[14, "testing-with-the-rocm-variant"]], "Running FBGEMM_GPU Benchmarks": [[14, "running-fbgemm-gpu-benchmarks"]], "Installation Instructions": [[13, "installation-instructions"]], "Set Up CPU-Only Environment": [[13, "set-up-cpu-only-environment"]], "Set Up CUDA Environment": [[13, "set-up-cuda-environment"]], "Install NVIDIA Drivers": [[13, "install-nvidia-drivers"]], "Set Up the CUDA Docker Container and Conda Environment": [[13, "set-up-the-cuda-docker-container-and-conda-environment"]], "Install the CUDA Runtime": [[13, "install-the-cuda-runtime"]], "Set Up ROCm Environment": [[13, "set-up-rocm-environment"]], "Install AMDGPU Drivers": [[13, "install-amdgpu-drivers"]], "Set Up the ROCm Docker Container and Conda Environment": [[13, "set-up-the-rocm-docker-container-and-conda-environment"]], "Install Python Libraries": [[13, "install-python-libraries"]], "Install PyTorch": [[13, "install-pytorch"], [12, "install-pytorch"]], "Install the FBGEMM_GPU Package": [[13, "install-the-fbgemm-gpu-package"]], "Install through PyTorch PIP": [[13, "install-through-pytorch-pip"]], "Install through Public PyPI": [[13, "install-through-public-pypi"]], "Post-Installation Checks": [[13, "post-installation-checks"]], "Undefined Symbols": [[13, "undefined-symbols"]], "Build Instructions": [[12, "build-instructions"], [1, "build-instructions"]], "Set Up an Isolated Build Environment": [[12, "set-up-an-isolated-build-environment"], [1, "set-up-an-isolated-build-environment"]], "Install Miniconda": [[12, "install-miniconda"]], "Set Up the Conda Environment": [[12, "set-up-the-conda-environment"]], "Set Up for CPU-Only Build": [[12, "set-up-for-cpu-only-build"]], "Set Up for CUDA Build": [[12, "set-up-for-cuda-build"]], "CUDA Docker Image": [[12, "cuda-docker-image"]], "Install CUDA": [[12, "install-cuda"]], "Install cuDNN": [[12, "install-cudnn"]], "Set Up for ROCm Build": [[12, "set-up-for-rocm-build"]], "ROCm Docker Image": [[12, "rocm-docker-image"]], "Install ROCm": [[12, "install-rocm"]], "Install MIOpen": [[12, "install-miopen"]], "Install the Build Tools": [[12, "install-the-build-tools"], [1, "install-the-build-tools"]], "C/C++ Compiler (GCC)": [[12, "c-c-compiler-gcc"]], "C/C++ Compiler (Clang)": [[12, "c-c-compiler-clang"]], "Compiler Symlinks": [[12, "compiler-symlinks"]], "Other Build Tools": [[12, "other-build-tools"], [1, "other-build-tools"]], "Installation Through Conda": [[12, "installation-through-conda"]], "Installation Through PyTorch PIP": [[12, "installation-through-pytorch-pip"]], "Post-Install Checks": [[12, "post-install-checks"]], "Build the FBGEMM_GPU Package": [[12, "build-the-fbgemm-gpu-package"]], "Preparing the Build": [[12, "preparing-the-build"], [1, "preparing-the-build"]], "The Build Process": [[12, "the-build-process"]], "Set Wheel Build Variables": [[12, "set-wheel-build-variables"]], "CPU-Only Build": [[12, "cpu-only-build"]], "CUDA Build": [[12, "cuda-build"]], "ROCm Build": [[12, "rocm-build"]], "Post-Build Checks (For Developers)": [[12, "post-build-checks-for-developers"]], "Undefined Symbols Check": [[12, "undefined-symbols-check"]], "GLIBC Version Compatibility Check": [[12, "glibc-version-compatibility-check"]], "Contributing": [[19, "contributing"]], "Code of Conduct": [[19, "code-of-conduct"]], "Pull Requests": [[19, "pull-requests"]], "Contributor License Agreement (\u201cCLA\u201d)": [[19, "contributor-license-agreement-cla"]], "Issues": [[19, "issues"]], "License": [[19, "license"], [20, "license"]], "Contact Us": [[18, "contact-us"]], "GitHub": [[18, "github"]], "Slack": [[18, "slack"]], "Table Batched Embedding (TBE) Operators": [[17, "module-fbgemm_gpu"]], "Jagged Tensor Operators": [[16, "jagged-tensor-operators"], [15, "jagged-tensor-operators"], [5, "jagged-tensor-operators"]], "High Level Overview": [[15, "high-level-overview"]], "Jagged Tensor Format": [[15, "jagged-tensor-format"]], "Values": [[15, "values"]], "Offsets": [[15, "offsets"]], "Max Lengths": [[15, "max-lengths"]], "Jagged Tensor Example": [[15, "jagged-tensor-example"]], "Jagged Tensor Operations": [[15, "jagged-tensor-operations"]], "Arithmetic Operations": [[15, "arithmetic-operations"]], "Conversion Operations": [[15, "conversion-operations"]], "Jagged to Dense": [[15, "jagged-to-dense"]], "Dense to Jagged": [[15, "dense-to-jagged"]], "Combined Arithmetic + Conversion Operations": [[15, "combined-arithmetic-conversion-operations"]], "FBGEMM and FBGEMM_GPU Documentation Homepage": [[25, "fbgemm-and-fbgemm-gpu-documentation-homepage"]], "General Info": [[25, null]], "FBGEMM Development": [[25, null]], "FBGEMM_GPU Development": [[25, null]], "FBGEMM_GPU Overview": [[25, null]], "FBGEMM C++ API": [[25, null]], "FBGEMM_GPU C++ API": [[25, null]], "FBGEMM_GPU Python API": [[25, null]], "Sphinx Documentation Pointers": [[24, "sphinx-documentation-pointers"]], "References Other Sections of the Documentation": [[24, "references-other-sections-of-the-documentation"]], "Referencing the Source Code": [[24, "referencing-the-source-code"]], "Adding LaTeX": [[24, "adding-latex"]], "Adding Graphs": [[24, "adding-graphs"]], "Adding Documentation to C++ Code": [[21, "adding-documentation-to-c-code"]], "Adding Documentation to Python Code": [[23, "adding-documentation-to-python-code"]], "Todo": [[23, "id1"]], "Adding Documentation to Auto-Generated Python Code": [[23, "adding-documentation-to-auto-generated-python-code"]], "Documentation": [[22, "documentation"]], "General Documentation Guidelines": [[22, "general-documentation-guidelines"]], "Specific Documentation Guides": [[22, "specific-documentation-guides"]], "Building the Documentation": [[22, "building-the-documentation"]], "Set Up Build Environment": [[22, "set-up-build-environment"]], "Build FBGEMM_GPU": [[22, "build-fbgemm-gpu"]], "Set Up the Documentation Toolchain": [[22, "set-up-the-documentation-toolchain"]], "Build the Documentation": [[22, "build-the-documentation"]], "Linting the Documentation": [[22, "linting-the-documentation"]], "Deployment Preview": [[22, "deployment-preview"]], "Experimental Operators": [[3, "experimental-operators"]], "Attention Operators": [[3, "attention-operators"]], "Combine Input Operators": [[4, "combine-input-operators"]], "CUDA Operators": [[5, "cuda-operators"], [2, "cuda-operators"], [6, "cuda-operators"], [9, "cuda-operators"], [10, "cuda-operators"]], "CPU Operators": [[5, "cpu-operators"], [2, "cpu-operators"], [6, "cpu-operators"], [9, "cpu-operators"], [10, "cpu-operators"]], "Quantization Utilities": [[0, "quantization-utilities"]], "Reference Implementation Methods": [[0, "reference-implementation-methods"]], "AVX-2 Implementation Methods": [[0, "avx-2-implementation-methods"]], "AVX-512 Implementation Methods": [[0, "avx-512-implementation-methods"]], "FBGEMM Requirements": [[1, "fbgemm-requirements"]], "Hardware Requirements": [[1, "hardware-requirements"]], "Software Dependencies": [[1, "software-dependencies"]], "asmjit": [[1, "asmjit"]], "cpuinfo": [[1, "cpuinfo"]], "GoogleTest": [[1, "googletest"]], "C/C++ Compiler": [[1, "c-c-compiler"]], "Build the FBGEMM Library": [[1, "build-the-fbgemm-library"]], "Building on Linux and macOS (CMake + GCC)": [[1, "building-on-linux-and-macos-cmake-gcc"]], "Build Issues with GCC 12+": [[1, "build-issues-with-gcc-12"]], "Building on Linux and macOS (CMake + Clang)": [[1, "building-on-linux-and-macos-cmake-clang"]], "Building on Linux (Bazel)": [[1, "building-on-linux-bazel"]], "Building on Windows": [[1, "building-on-windows"]], "Embedding Operators": [[2, "embedding-operators"]], "CUDA Memory Operators": [[7, "cuda-memory-operators"]], "Pooled Embeddings Operators": [[8, "pooled-embeddings-operators"]], "Merge Operators": [[8, "merge-operators"]], "Permutation Operators": [[8, "permutation-operators"]], "Layout Transformation Operators": [[6, "layout-transformation-operators"]], "Table Batched Embedding Operators": [[11, "table-batched-embedding-operators"]], "Quantization Operators": [[9, "quantization-operators"]], "Sparse Data Operators": [[10, "sparse-data-operators"]]}, "indexentries": {"findminmax (c++ function)": [[0, "_CPPv410FindMinMaxPKfPfPf7int64_t"]], "floatorhalftofusednbitrowwisequantizedsbhalf (c++ function)": [[0, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE"]], "fusedquantizedequantize (c++ function)": [[0, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif"]], "quantizegroupwise (c++ function)": [[0, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T"]], "xor128 (c++ function)": [[0, "_CPPv46Xor128v"]], "requantizeoutputprocessingavx2 (c++ function)": [[0, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE"]], "requantizeoutputprocessinggconvavx512 (c++ function)": [[0, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE"]], "bounds_check_indices_cuda (c++ function)": [[2, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKN3c108optionalI6TensorEERKN3c108optionalI6TensorEEK7int64_t"]], "int_nbit_split_embedding_codegen_lookup_function (c++ function)": [[2, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE"]], "int_nbit_split_embedding_codegen_lookup_function_cpu (c++ function)": [[2, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEE"]], "int_nbit_split_embedding_uvm_caching_codegen_lookup_function (c++ function)": [[2, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE"]], "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu (c++ function)": [[2, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tN3c108optionalI6TensorEE7int64_tN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI7int64_tEEN3c108optionalI6TensorEEN3c108optionalI6TensorEEN3c108optionalI6TensorEE"]], "pruned_array_lookup_cpu (c++ function)": [[2, "_CPPv423pruned_array_lookup_cpu6Tensor6Tensor6Tensor6Tensor"]], "pruned_array_lookup_cuda (c++ function)": [[2, "_CPPv424pruned_array_lookup_cuda6Tensor6Tensor6Tensor6Tensor"]], "pruned_hashmap_insert_unweighted_cpu (c++ function)": [[2, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor"]], "pruned_hashmap_lookup_cuda (c++ function)": [[2, "_CPPv426pruned_hashmap_lookup_cuda6Tensor6Tensor6Tensor6Tensor"]], "pruned_hashmap_lookup_unweighted_cpu (c++ function)": [[2, "_CPPv436pruned_hashmap_lookup_unweighted_cpu6Tensor6Tensor6Tensor6Tensor"]], "gqa_attn_splitk (c++ function)": [[3, "_CPPv415gqa_attn_splitkRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_tKb"]], "padding_fused_tbe_input_combine_cpu (c++ function)": [[4, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t"]], "tbe_input_combine_cpu (c++ function)": [[4, "_CPPv421tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE"]], "batched_dense_vec_jagged_2d_mul (c++ function)": [[5, "_CPPv431batched_dense_vec_jagged_2d_mulRK6TensorRK6TensorRK6Tensor"]], "dense_to_jagged (c++ function)": [[5, "_CPPv415dense_to_jaggedRK6TensorRKNSt6vectorI6TensorEEN3c108optionalIN2at6SymIntEEE"]], "jagged_1d_to_dense (c++ function)": [[5, "_CPPv418jagged_1d_to_dense6Tensor6TensorN3c106SymIntE7int64_t"]], "jagged_2d_to_dense (c++ function)": [[5, "_CPPv418jagged_2d_to_dense6Tensor6TensorN3c106SymIntE"]], "jagged_dense_elementwise_add (c++ function)": [[5, "_CPPv428jagged_dense_elementwise_addRK6TensorRKNSt6vectorI6TensorEERK6Tensor"]], "jagged_dense_elementwise_add_jagged_output (c++ function)": [[5, "_CPPv442jagged_dense_elementwise_add_jagged_outputRK6TensorRKNSt6vectorI6TensorEERK6Tensor"]], "jagged_dense_elementwise_add_jagged_output_cuda (c++ function)": [[5, "_CPPv447jagged_dense_elementwise_add_jagged_output_cudaRK6TensorRKNSt6vectorI6TensorEERK6Tensor"]], "jagged_dense_elementwise_mul (c++ function)": [[5, "_CPPv428jagged_dense_elementwise_mulRK6TensorRKNSt6vectorI6TensorEERK6Tensor"]], "jagged_to_padded_dense (c++ function)": [[5, "_CPPv422jagged_to_padded_denseRK6TensorRKNSt6vectorI6TensorEEKN3c1014SymIntArrayRefEKd"]], "jagged_to_padded_dense_forward (c++ function)": [[5, "_CPPv430jagged_to_padded_dense_forwardRK6TensorRKNSt6vectorI6TensorEEN3c1014SymIntArrayRefEKd"]], "recat_embedding_grad_output_cuda (c++ function)": [[6, "_CPPv432recat_embedding_grad_output_cuda6TensorRKNSt6vectorI7int64_tEE"]], "recat_embedding_grad_output_mixed_d_batch_cuda (c++ function)": [[6, "_CPPv446recat_embedding_grad_output_mixed_D_batch_cudaRK6TensorRK6TensorRK6Tensor"]], "recat_embedding_grad_output_mixed_d_cpu (c++ function)": [[6, "_CPPv439recat_embedding_grad_output_mixed_D_cpuRK6TensorRKNSt6vectorI7int64_tEE"]], "recat_embedding_grad_output_mixed_d_cuda (c++ function)": [[6, "_CPPv440recat_embedding_grad_output_mixed_D_cudaRK6TensorRKNSt6vectorI7int64_tEE"]], "is_uvm_tensor (c++ function)": [[7, "_CPPv413is_uvm_tensorRK6Tensor"]], "new_host_mapped_tensor (c++ function)": [[7, "_CPPv422new_host_mapped_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE"]], "new_managed_tensor (c++ function)": [[7, "_CPPv418new_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE"]], "new_managed_tensor_meta (c++ function)": [[7, "_CPPv423new_managed_tensor_metaRK6TensorRKNSt6vectorINSt7int64_tEEE"]], "new_unified_tensor (c++ function)": [[7, "_CPPv418new_unified_tensorRK6TensorRKNSt6vectorINSt7int64_tEEEb"]], "new_vanilla_managed_tensor (c++ function)": [[7, "_CPPv426new_vanilla_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE"]], "uvm_cuda_mem_advise (c++ function)": [[7, "_CPPv419uvm_cuda_mem_adviseRK6Tensor7int64_t"]], "uvm_cuda_mem_prefetch_async (c++ function)": [[7, "_CPPv427uvm_cuda_mem_prefetch_asyncRK6TensorN3c108optionalI6TensorEE"]], "uvm_mem_advice_dont_fork (c++ function)": [[7, "_CPPv424uvm_mem_advice_dont_forkRK6Tensor"]], "uvm_storage (c++ function)": [[7, "_CPPv411uvm_storageRK6Tensor"]], "uvm_to_cpu (c++ function)": [[7, "_CPPv410uvm_to_cpuRK6Tensor"]], "uvm_to_cpu_clone (c++ function)": [[7, "_CPPv416uvm_to_cpu_cloneRK6Tensor"]], "uvm_to_device (c++ function)": [[7, "_CPPv413uvm_to_deviceRK6TensorRK6Tensor"]], "all_to_one_device (c++ function)": [[8, "_CPPv417all_to_one_deviceNSt6vectorIN2at6TensorEEEN2at6DeviceE"]], "permute_pooled_embs_auto_grad (c++ function)": [[8, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor"]], "permute_pooled_embs_auto_grad_cpu (c++ function)": [[8, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor"]], "permute_pooled_embs_auto_grad_gpu (c++ function)": [[8, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor"]], "permute_pooled_embs_auto_grad_split_cpu (c++ function)": [[8, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE"]], "permute_pooled_embs_auto_grad_split_gpu (c++ function)": [[8, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE"]], "permute_pooled_embs_cpu_impl (c++ function)": [[8, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb"]], "permute_pooled_embs_split_cpu (c++ function)": [[8, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE"]], "permute_pooled_embs_split_gpu (c++ function)": [[8, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE"]], "fp8quantizedtofloat_ref (c++ function)": [[9, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi"]], "fp8rowwise_to_float_cpu (c++ function)": [[9, "_CPPv423FP8rowwise_to_float_cpuRK6TensorbK7int64_t"]], "floattofp8quantized_ref (c++ function)": [[9, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd"]], "_fp8rowwise_to_float_gpu (c++ function)": [[9, "_CPPv424_FP8rowwise_to_float_gpuRKN2at6TensorEbK7int64_t"]], "_bfloat16_to_float_gpu (c++ function)": [[9, "_CPPv422_bfloat16_to_float_gpuRKN2at6TensorE"]], "_float_to_fp8rowwise_gpu (c++ function)": [[9, "_CPPv424_float_to_FP8rowwise_gpuRK6TensorKb"]], "_float_to_bfloat16_gpu (c++ function)": [[9, "_CPPv422_float_to_bfloat16_gpuRKN2at6TensorE"]], "_float_to_fused8bitrowwise_cpu_out (c++ function)": [[9, "_CPPv434_float_to_fused8bitrowwise_cpu_outR6TensorRK6Tensor"]], "_float_to_fused8bitrowwise_gpu (c++ function)": [[9, "_CPPv430_float_to_fused8bitrowwise_gpuRK6Tensor"]], "_float_to_fusednbitrowwise_gpu (c++ function)": [[9, "_CPPv430_float_to_fusednbitrowwise_gpuRK6TensorK7int64_t"]], "_float_to_hfp8_gpu (c++ function)": [[9, "_CPPv418_float_to_hfp8_gpuRKN2at6TensorEK7int64_tK7int64_tKd"]], "_float_to_msfp_gpu (c++ function)": [[9, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd"]], "_float_to_paddedfp8rowwise_gpu (c++ function)": [[9, "_CPPv430_float_to_paddedFP8rowwise_gpuRK6TensorKbK7int64_t"]], "_fused8bitrowwise_to_float_cpu_out (c++ function)": [[9, "_CPPv434_fused8bitrowwise_to_float_cpu_outR6TensorRK6Tensor"]], "_fused8bitrowwise_to_float_gpu (c++ function)": [[9, "_CPPv430_fused8bitrowwise_to_float_gpuRKN2at6TensorE"]], "_fused8bitrowwise_to_float_mixed_dim_gpu (c++ function)": [[9, "_CPPv440_fused8bitrowwise_to_float_mixed_dim_gpuRKN2at6TensorERKN2at6TensorEK7int64_t"]], "_fused8bitrowwise_to_half_gpu (c++ function)": [[9, "_CPPv429_fused8bitrowwise_to_half_gpuRKN2at6TensorE"]], "_fused8bitrowwise_to_single_or_half_precision_gpu (c++ function)": [[9, "_CPPv449_fused8bitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tKbKb"]], "_fusednbitrowwise_to_float_gpu (c++ function)": [[9, "_CPPv430_fusednbitrowwise_to_float_gpuRKN2at6TensorEK7int64_t"]], "_fusednbitrowwise_to_half_gpu (c++ function)": [[9, "_CPPv429_fusednbitrowwise_to_half_gpuRKN2at6TensorEK7int64_t"]], "_fusednbitrowwise_to_single_or_half_precision_gpu (c++ function)": [[9, "_CPPv449_fusednbitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tK7int64_t"]], "_half_to_fused8bitrowwise_gpu (c++ function)": [[9, "_CPPv429_half_to_fused8bitrowwise_gpuRK6Tensor"]], "_half_to_fusednbitrowwise_gpu (c++ function)": [[9, "_CPPv429_half_to_fusednbitrowwise_gpuRKN2at6TensorEK7int64_t"]], "_hfp8_to_float_gpu (c++ function)": [[9, "_CPPv418_hfp8_to_float_gpuRKN2at6TensorEK7int64_tK7int64_t"]], "_msfp_to_float_gpu (c++ function)": [[9, "_CPPv418_msfp_to_float_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_t"]], "_paddedfp8rowwise_to_float_gpu (c++ function)": [[9, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t"]], "_single_or_half_precision_to_fused8bitrowwise_gpu (c++ function)": [[9, "_CPPv449_single_or_half_precision_to_fused8bitrowwise_gpuRK6Tensor"]], "_single_or_half_precision_to_fusednbitrowwise_gpu (c++ function)": [[9, "_CPPv449_single_or_half_precision_to_fusednbitrowwise_gpuRK6TensorK7int64_t"]], "float_or_half_to_fused8bitrowwise_cpu (c++ function)": [[9, "_CPPv437float_or_half_to_fused8bitrowwise_cpuRK6Tensor"]], "float_to_fp8rowwise_cpu (c++ function)": [[9, "_CPPv423float_to_FP8rowwise_cpuRK6Tensorb"]], "float_to_fused8bitrowwise_cpu (c++ function)": [[9, "_CPPv429float_to_fused8bitrowwise_cpuRK6Tensor"]], "fused8bitrowwise_to_float_cpu (c++ function)": [[9, "_CPPv429fused8bitrowwise_to_float_cpuRK6Tensor"]], "fused8bitrowwise_to_float_or_half_cpu (c++ function)": [[9, "_CPPv437fused8bitrowwise_to_float_or_half_cpuRK6TensorK7int64_tKbKb"]], "fused8bitrowwise_to_half_cpu (c++ function)": [[9, "_CPPv428fused8bitrowwise_to_half_cpuRK6Tensor"]], "fusednbitrowwise_to_float_cpu (c++ function)": [[9, "_CPPv429fusednbitrowwise_to_float_cpuRK6TensorK7int64_t"]], "fusednbitrowwise_to_float_or_half_cpu (c++ function)": [[9, "_CPPv437fusednbitrowwise_to_float_or_half_cpuRK6TensorK7int64_tK7int64_t"]], "fusednbitrowwise_to_half_cpu (c++ function)": [[9, "_CPPv428fusednbitrowwise_to_half_cpuRK6TensorK7int64_t"]], "half_to_fused8bitrowwise_cpu (c++ function)": [[9, "_CPPv428half_to_fused8bitrowwise_cpuRK6Tensor"]], "expand_into_jagged_permute_cuda (c++ function)": [[10, "_CPPv431expand_into_jagged_permute_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_t"]], "generic_histogram_binning_calibration_by_feature_cpu (c++ function)": [[10, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td"]], "histogram_binning_calibration_cpu (c++ function)": [[10, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td"]], "direct_mapped_lru_cache_populate_byte_cuda (c++ function)": [[11, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE"]], "direct_mapped_lxu_cache_lookup_cuda (c++ function)": [[11, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE"]], "get_unique_indices_cuda (c++ function)": [[11, "_CPPv423get_unique_indices_cudaN2at6TensorE7int64_tb"]], "host_lxu_cache_slot (c++ function)": [[11, "_CPPv419host_lxu_cache_slot7int64_t7int64_t"]], "lfu_cache_populate_byte_cuda (c++ function)": [[11, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t"]], "lfu_cache_populate_cuda (c++ function)": [[11, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb"]], "linearize_cache_indices_cuda (c++ function)": [[11, "_CPPv428linearize_cache_indices_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN3c108optionalIN2at6TensorEEEK7int64_t"]], "linearize_cache_indices_from_row_idx_cuda (c++ function)": [[11, "_CPPv441linearize_cache_indices_from_row_idx_cudaN2at6TensorEN2at6TensorEN2at6TensorE"]], "lru_cache_find_uncached_cuda (c++ function)": [[11, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorE"]], "lru_cache_populate_byte_cuda (c++ function)": [[11, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEE"]], "lru_cache_populate_cuda (c++ function)": [[11, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbN3c108optionalIN2at6TensorEEEbN3c108optionalIN2at6TensorEEE"]], "lxu_cache_flush_cuda (c++ function)": [[11, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb"]], "lxu_cache_locations_update_cuda (c++ function)": [[11, "_CPPv431lxu_cache_locations_update_cudaN2at6TensorEN2at6TensorEN3c108optionalIN2at6TensorEEE"]], "lxu_cache_locking_counter_decrement_cuda (c++ function)": [[11, "_CPPv440lxu_cache_locking_counter_decrement_cudaN2at6TensorEN2at6TensorE"]], "lxu_cache_lookup_cuda (c++ function)": [[11, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEEN3c108optionalIN2at6TensorEEE"]], "reset_weight_momentum_cuda (c++ function)": [[11, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t"]], "batched_dense_vec_jagged_2d_mul() (in module torch.ops.fbgemm)": [[16, "torch.ops.fbgemm.batched_dense_vec_jagged_2d_mul"]], "dense_to_jagged() (in module torch.ops.fbgemm)": [[16, "torch.ops.fbgemm.dense_to_jagged"]], "jagged_1d_to_dense() (in module torch.ops.fbgemm)": [[16, "torch.ops.fbgemm.jagged_1d_to_dense"]], "jagged_2d_to_dense() (in module torch.ops.fbgemm)": [[16, "torch.ops.fbgemm.jagged_2d_to_dense"]], "jagged_dense_dense_elementwise_add_jagged_output() (in module torch.ops.fbgemm)": [[16, "torch.ops.fbgemm.jagged_dense_dense_elementwise_add_jagged_output"]], "jagged_dense_elementwise_add() (in module torch.ops.fbgemm)": [[16, "torch.ops.fbgemm.jagged_dense_elementwise_add"]], "jagged_dense_elementwise_add_jagged_output() (in module torch.ops.fbgemm)": [[16, "torch.ops.fbgemm.jagged_dense_elementwise_add_jagged_output"]], "jagged_dense_elementwise_mul() (in module torch.ops.fbgemm)": [[16, "torch.ops.fbgemm.jagged_dense_elementwise_mul"]], "jagged_to_padded_dense() (in module torch.ops.fbgemm)": [[16, "torch.ops.fbgemm.jagged_to_padded_dense"]], "stacked_jagged_1d_to_dense() (in module torch.ops.fbgemm)": [[16, "torch.ops.fbgemm.stacked_jagged_1d_to_dense"]], "stacked_jagged_2d_to_dense() (in module torch.ops.fbgemm)": [[16, "torch.ops.fbgemm.stacked_jagged_2d_to_dense"]], "splittablebatchedembeddingbagscodegen() (in module fbgemm_gpu.split_table_batched_embeddings_ops)": [[17, "fbgemm_gpu.split_table_batched_embeddings_ops.SplitTableBatchedEmbeddingBagsCodegen"]], "fbgemm_gpu": [[17, "module-fbgemm_gpu"]], "module": [[17, "module-fbgemm_gpu"]], "example_method (c++ function)": [[21, "_CPPv4I0_NSt6size_tEE14example_method7int32_t1Tf"]], "example_method() (in module fbgemm_gpu.docs.examples)": [[23, "fbgemm_gpu.docs.examples.example_method"]]}}) \ No newline at end of file