diff --git a/output.json b/output.json index 5865cc4d1..97b7d3d07 100644 --- a/output.json +++ b/output.json @@ -13,61 +13,61 @@ {"filename": "general/documentation/Cpp.rst", "lineno": 60, "status": "unchecked", "code": 0, "uri": "#fbgemm-gpu-toc-api-cpp", "info": ""} {"filename": "general/documentation/Python.rst", "lineno": 43, "status": "unchecked", "code": 0, "uri": "#fbgemm-gpu-toc-api-python", "info": ""} {"filename": "general/documentation/Cpp.rst", "lineno": 68, "status": "unchecked", "code": 0, "uri": "#general-docs-build", "info": ""} +{"filename": "fbgemm-development/BuildInstructions.rst", "lineno": 55, "status": "working", "code": 0, "uri": "https://github.com/google/googletest", "info": ""} {"filename": "fbgemm-development/BuildInstructions.rst", "lineno": 4, "status": "working", "code": 0, "uri": "https://github.com/pytorch/FBGEMM/blob/main/.github/scripts/setup_env.bash", "info": ""} -{"filename": "fbgemm-development/BuildInstructions.rst", "lineno": 48, "status": "working", "code": 0, "uri": "https://github.com/pytorch/cpuinfo", "info": ""} {"filename": "fbgemm-development/BuildInstructions.rst", "lineno": 41, "status": "working", "code": 0, "uri": "https://github.com/asmjit/asmjit", "info": ""} -{"filename": "general/Contributing.rst", "lineno": 27, "status": "working", "code": 0, "uri": "https://code.facebook.com/cla", "info": ""} +{"filename": "fbgemm-development/BuildInstructions.rst", "lineno": 48, "status": "working", "code": 0, "uri": "https://github.com/pytorch/cpuinfo", "info": ""} {"filename": "fbgemm_gpu-development/BuildInstructions.rst", "lineno": 139, "status": "working", "code": 0, "uri": "https://developer.nvidia.com/cudnn", "info": ""} {"filename": "fbgemm_gpu-development/BuildInstructions.rst", "lineno": 114, "status": "redirected", "code": 301, "uri": "https://developer.nvidia.com/nvidia-management-library-nvml", "info": "https://developer.nvidia.com/management-library-nvml"} -{"filename": "fbgemm_gpu-development/BuildInstructions.rst", "lineno": 31, "status": "working", "code": 0, "uri": "https://docs.conda.io/en/latest/miniconda.html", "info": ""} -{"filename": "fbgemm-development/BuildInstructions.rst", "lineno": 55, "status": "working", "code": 0, "uri": "https://github.com/google/googletest", "info": ""} -{"filename": "general/ContactUs.rst", "lineno": 17, "status": "redirected", "code": 301, "uri": "https://bit.ly/ptslack", "info": "https://docs.google.com/forms/d/e/1FAIpQLSeADnUNW36fjKjYzyHDOzEB_abKQE9b6gqqW9NXse6O0MWh0A/viewform"} +{"filename": "general/Contributing.rst", "lineno": 27, "status": "working", "code": 0, "uri": "https://code.facebook.com/cla", "info": ""} {"filename": "fbgemm_gpu-python-api/table_batched_embedding_ops.rst", "lineno": 1, "status": "working", "code": 0, "uri": "https://docs.python.org/3/library/constants.html#None", "info": ""} +{"filename": "fbgemm_gpu-development/BuildInstructions.rst", "lineno": 31, "status": "working", "code": 0, "uri": "https://docs.conda.io/en/latest/miniconda.html", "info": ""} {"filename": "general/documentation/Python.rst", "lineno": 1, "status": "working", "code": 0, "uri": "https://docs.python.org/3/library/exceptions.html#AttributeError", "info": ""} -{"filename": "general/documentation/Python.rst", "lineno": 1, "status": "working", "code": 0, "uri": "https://docs.python.org/3/library/exceptions.html#ValueError", "info": ""} +{"filename": "general/ContactUs.rst", "lineno": 17, "status": "redirected", "code": 301, "uri": "https://bit.ly/ptslack", "info": "https://docs.google.com/forms/d/e/1FAIpQLSeADnUNW36fjKjYzyHDOzEB_abKQE9b6gqqW9NXse6O0MWh0A/viewform"} {"filename": "general/documentation/Python.rst", "lineno": 1, "status": "working", "code": 0, "uri": "https://docs.python.org/3/library/ctypes.html#ctypes.c_ulong", "info": ""} -{"filename": "fbgemm_gpu-python-api/table_batched_embedding_ops.rst", "lineno": 1, "status": "working", "code": 0, "uri": "https://docs.python.org/3/library/functions.html#bool", "info": ""} -{"filename": "fbgemm_gpu-overview/jagged-tensor-ops/JaggedTensorOps.rst", "lineno": 172, "status": "working", "code": 0, "uri": "https://en.wikipedia.org/wiki/Hadamard_product_(matrices)", "info": ""} {"filename": "fbgemm_gpu-development/InstallationInstructions.rst", "lineno": 110, "status": "working", "code": 0, "uri": "https://docs.amd.com/bundle/ROCm-Installation-Guide-v5.5/page/How_to_Install_ROCm.html", "info": ""} +{"filename": "general/documentation/Python.rst", "lineno": 1, "status": "working", "code": 0, "uri": "https://docs.python.org/3/library/exceptions.html#ValueError", "info": ""} +{"filename": "fbgemm_gpu-python-api/table_batched_embedding_ops.rst", "lineno": 1, "status": "working", "code": 0, "uri": "https://docs.python.org/3/library/functions.html#bool", "info": ""} {"filename": "fbgemm_gpu-python-api/jagged_tensor_ops.rst", "lineno": 1, "status": "working", "code": 0, "uri": "https://docs.python.org/3/library/functions.html#float", "info": ""} -{"filename": "fbgemm_gpu-python-api/jagged_tensor_ops.rst", "lineno": 1, "status": "working", "code": 0, "uri": "https://docs.python.org/3/library/functions.html#int", "info": ""} {"filename": "fbgemm_gpu-development/InstallationInstructions.rst", "lineno": 40, "status": "redirected", "code": 301, "uri": "https://docs.nvidia.com/datacenter/tesla/tesla-installation-notes/index.html", "info": "https://docs.nvidia.com/cuda/cuda-installation-guide-linux/"} +{"filename": "fbgemm_gpu-python-api/jagged_tensor_ops.rst", "lineno": 1, "status": "working", "code": 0, "uri": "https://docs.python.org/3/library/functions.html#int", "info": ""} +{"filename": "fbgemm_gpu-overview/jagged-tensor-ops/JaggedTensorOps.rst", "lineno": 172, "status": "working", "code": 0, "uri": "https://en.wikipedia.org/wiki/Hadamard_product_(matrices)", "info": ""} {"filename": "general/ContactUs.rst", "lineno": 11, "status": "working", "code": 0, "uri": "https://github.com/pytorch/FBGEMM/discussions", "info": ""} {"filename": "general/ContactUs.rst", "lineno": 7, "status": "working", "code": 0, "uri": "https://github.com/pytorch/FBGEMM/issues", "info": ""} {"filename": "general/Contributing.rst", "lineno": 8, "status": "working", "code": 0, "uri": "https://github.com/pytorch/FBGEMM/blob/main/CODE_OF_CONDUCT.md", "info": ""} {"filename": "fbgemm_gpu-development/BuildInstructions.rst", "lineno": 221, "status": "redirected", "code": 301, "uri": "https://github.com/ROCmSoftwarePlatform/MIOpen", "info": "https://github.com/ROCm/MIOpen"} {"filename": "fbgemm-development/BuildInstructions.rst", "lineno": 171, "status": "working", "code": 0, "uri": "https://github.com/pytorch/FBGEMM/issues/1094", "info": ""} -{"filename": "fbgemm_gpu-development/InstallationInstructions.rst", "lineno": 188, "status": "working", "code": 0, "uri": "https://github.com/pytorch/pytorch/blob/main/RELEASE.md", "info": ""} +{"filename": "fbgemm_gpu-development/InstallationInstructions.rst", "lineno": 294, "status": "working", "code": 0, "uri": "https://github.com/pytorch/FBGEMM/issues/1618", "info": ""} {"filename": "fbgemm-development/BuildInstructions.rst", "lineno": 171, "status": "working", "code": 0, "uri": "https://github.com/pytorch/FBGEMM/issues/1666", "info": ""} -{"filename": "fbgemm_gpu-development/InstallationInstructions.rst", "lineno": 40, "status": "working", "code": 0, "uri": "https://github.com/pytorch/test-infra/blob/main/.github/actions/setup-nvidia/action.yml", "info": ""} -{"filename": "fbgemm_gpu-overview/jagged-tensor-ops/JaggedTensorOps.rst", "lineno": 7, "status": "working", "code": 0, "uri": "https://github.com/pytorch/pytorch/issues/25032", "info": ""} -{"filename": "fbgemm_gpu-development/BuildInstructions.rst", "lineno": 96, "status": "working", "code": 0, "uri": "https://hub.docker.com/r/nvidia/cuda", "info": ""} +{"filename": "fbgemm_gpu-development/InstallationInstructions.rst", "lineno": 188, "status": "working", "code": 0, "uri": "https://github.com/pytorch/pytorch/blob/main/RELEASE.md", "info": ""} {"filename": "general/documentation/Sphinx.rst", "lineno": 149, "status": "working", "code": 0, "uri": "https://graphviz.org/documentation/", "info": ""} -{"filename": "fbgemm_gpu-development/BuildInstructions.rst", "lineno": 183, "status": "working", "code": 0, "uri": "https://hub.docker.com/r/rocm/dev-ubuntu-20.04", "info": ""} -{"filename": "fbgemm_gpu-development/InstallationInstructions.rst", "lineno": 294, "status": "working", "code": 0, "uri": "https://github.com/pytorch/FBGEMM/issues/1618", "info": ""} +{"filename": "fbgemm_gpu-development/BuildInstructions.rst", "lineno": 96, "status": "working", "code": 0, "uri": "https://hub.docker.com/r/nvidia/cuda", "info": ""} {"filename": "fbgemm-development/BuildInstructions.rst", "lineno": 171, "status": "working", "code": 0, "uri": "https://github.com/pytorch/pytorch/issues/77939", "info": ""} -{"filename": "fbgemm_gpu-development/BuildInstructions.rst", "lineno": 174, "status": "working", "code": 0, "uri": "https://hub.docker.com/r/rocm/rocm-terminal", "info": ""} {"filename": "index.rst", "lineno": 7, "status": "redirected", "code": 302, "uri": "https://github.com/pytorch/pytorch/tree/master/aten/src/ATen/native/quantized/cpu", "info": "https://github.com/pytorch/pytorch/tree/main/aten/src/ATen/native/quantized/cpu"} +{"filename": "fbgemm_gpu-overview/jagged-tensor-ops/JaggedTensorOps.rst", "lineno": 7, "status": "working", "code": 0, "uri": "https://github.com/pytorch/pytorch/issues/25032", "info": ""} +{"filename": "fbgemm_gpu-development/InstallationInstructions.rst", "lineno": 40, "status": "working", "code": 0, "uri": "https://github.com/pytorch/test-infra/blob/main/.github/actions/setup-nvidia/action.yml", "info": ""} +{"filename": "fbgemm_gpu-development/BuildInstructions.rst", "lineno": 183, "status": "working", "code": 0, "uri": "https://hub.docker.com/r/rocm/dev-ubuntu-20.04", "info": ""} +{"filename": "fbgemm_gpu-development/BuildInstructions.rst", "lineno": 174, "status": "working", "code": 0, "uri": "https://hub.docker.com/r/rocm/rocm-terminal", "info": ""} {"filename": "general/documentation/Python.rst", "lineno": 55, "status": "working", "code": 0, "uri": "https://peps.python.org/pep-0287/", "info": ""} {"filename": "fbgemm_gpu-cpp-api/experimental_ops.rst", "lineno": 6, "status": "working", "code": 0, "uri": "https://pytorch.org/cppdocs/api/classat_1_1_tensor.html#_CPPv4N2at6TensorE", "info": ""} {"filename": "fbgemm_gpu-cpp-api/quantize_ops.rst", "lineno": 11, "status": "working", "code": 0, "uri": "https://pytorch.org/cppdocs/api/classc10_1_1_error.html#_CPPv4N3c105ErrorE", "info": ""} {"filename": "fbgemm_gpu-development/BuildInstructions.rst", "lineno": 352, "status": "working", "code": 0, "uri": "https://pytorch.org/get-started/locally/", "info": ""} -{"filename": "fbgemm_gpu-cpp-api/memory_utils.rst", "lineno": 4, "status": "working", "code": 0, "uri": "https://nvidia.github.io/cuda-python/module/cudart.html#cuda.cudart.cudaMemAdvise", "info": ""} -{"filename": "fbgemm_gpu-cpp-api/memory_utils.rst", "lineno": 4, "status": "working", "code": 0, "uri": "https://nvidia.github.io/cuda-python/module/cudart.html#cuda.cudart.cudaMemPrefetchAsync", "info": ""} {"filename": "fbgemm_gpu-python-api/table_batched_embedding_ops.rst", "lineno": 1, "status": "working", "code": 0, "uri": "https://pytorch.org/docs/main/tensor_attributes.html#torch.device", "info": ""} -{"filename": "fbgemm_gpu-cpp-api/memory_utils.rst", "lineno": 4, "status": "working", "code": 0, "uri": "https://man7.org/linux/man-pages/man2/madvise.2.html", "info": ""} {"filename": "fbgemm_gpu-development/BuildInstructions.rst", "lineno": 197, "status": "working", "code": 0, "uri": "https://rocm.docs.amd.com/en/latest/", "info": ""} +{"filename": "fbgemm_gpu-cpp-api/memory_utils.rst", "lineno": 4, "status": "working", "code": 0, "uri": "https://man7.org/linux/man-pages/man2/madvise.2.html", "info": ""} +{"filename": "fbgemm_gpu-cpp-api/memory_utils.rst", "lineno": 4, "status": "working", "code": 0, "uri": "https://nvidia.github.io/cuda-python/module/cudart.html#cuda.cudart.cudaMemPrefetchAsync", "info": ""} +{"filename": "fbgemm_gpu-cpp-api/memory_utils.rst", "lineno": 4, "status": "working", "code": 0, "uri": "https://nvidia.github.io/cuda-python/module/cudart.html#cuda.cudart.cudaMemAdvise", "info": ""} {"filename": "general/documentation/Cpp.rst", "lineno": 6, "status": "working", "code": 0, "uri": "https://www.breathe-doc.org/", "info": ""} {"filename": "general/documentation/Overview.rst", "lineno": 142, "status": "working", "code": 0, "uri": "https://www.netlify.com/", "info": ""} -{"filename": "general/documentation/Cpp.rst", "lineno": 6, "status": "working", "code": 0, "uri": "https://www.doxygen.nl/", "info": ""} -{"filename": "general/documentation/Cpp.rst", "lineno": 75, "status": "working", "code": 0, "uri": "https://www.doxygen.nl/manual/commands.html#cmdlink", "info": ""} {"filename": "general/documentation/Cpp.rst", "lineno": 6, "status": "working", "code": 0, "uri": "https://www.oracle.com/java/technologies/javase/javadoc-tool.html", "info": ""} +{"filename": "general/documentation/Cpp.rst", "lineno": 6, "status": "working", "code": 0, "uri": "https://www.doxygen.nl/", "info": ""} +{"filename": "general/documentation/Python.rst", "lineno": 6, "status": "working", "code": 0, "uri": "https://www.sphinx-doc.org/en/master/usage/extensions/example_google.html", "info": ""} {"filename": "general/documentation/Python.rst", "lineno": 6, "status": "working", "code": 0, "uri": "https://www.sphinx-doc.org/en/master/", "info": ""} -{"filename": "general/Contributing.rst", "lineno": 34, "status": "redirected", "code": 301, "uri": "https://www.facebook.com/whitehat/", "info": "https://bugbounty.meta.com/?utm_source=facebook.com&utm_medium=redirect"} {"filename": "general/documentation/Sphinx.rst", "lineno": 149, "status": "working", "code": 0, "uri": "https://www.sphinx-doc.org/en/master/usage/extensions/graphviz.html", "info": ""} -{"filename": "general/documentation/Python.rst", "lineno": 6, "status": "working", "code": 0, "uri": "https://www.sphinx-doc.org/en/master/usage/extensions/example_google.html", "info": ""} -{"filename": "general/documentation/Python.rst", "lineno": 55, "status": "working", "code": 0, "uri": "https://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html", "info": ""} {"filename": "general/documentation/Sphinx.rst", "lineno": 115, "status": "working", "code": 0, "uri": "https://www.sphinx-doc.org/en/master/usage/extensions/math.html#module-sphinx.ext.mathjax", "info": ""} +{"filename": "general/documentation/Cpp.rst", "lineno": 75, "status": "working", "code": 0, "uri": "https://www.doxygen.nl/manual/commands.html#cmdlink", "info": ""} +{"filename": "general/documentation/Python.rst", "lineno": 55, "status": "working", "code": 0, "uri": "https://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html", "info": ""} +{"filename": "general/Contributing.rst", "lineno": 34, "status": "redirected", "code": 301, "uri": "https://www.facebook.com/whitehat/", "info": "https://bugbounty.meta.com/?utm_source=facebook.com&utm_medium=redirect"} {"filename": "general/documentation/Sphinx.rst", "lineno": 115, "status": "working", "code": 0, "uri": "https://www.sphinx-doc.org/en/master/usage/restructuredtext/directives.html#math", "info": ""} {"filename": "general/documentation/Sphinx.rst", "lineno": 82, "status": "working", "code": 0, "uri": "https://www.sphinx-doc.org/en/master/usage/restructuredtext/directives.html#directive-literalinclude", "info": ""} {"filename": "fbgemm_gpu-overview/jagged-tensor-ops/JaggedTensorOps.rst", "lineno": 7, "status": "working", "code": 0, "uri": "https://www.tensorflow.org/guide/ragged_tensor", "info": ""} diff --git a/searchindex.js b/searchindex.js index 381ae0f4a..39ebe2dc2 100644 --- a/searchindex.js +++ b/searchindex.js @@ -1 +1 @@ -Search.setIndex({"docnames": ["fbgemm-cpp-api/QuantUtils", "fbgemm-cpp-api/tbe_cpu_autovec", "fbgemm-development/BuildInstructions", "fbgemm_gpu-cpp-api/embedding_ops", "fbgemm_gpu-cpp-api/experimental_ops", "fbgemm_gpu-cpp-api/input_combine", "fbgemm_gpu-cpp-api/jagged_tensor_ops", "fbgemm_gpu-cpp-api/layout_transform_ops", "fbgemm_gpu-cpp-api/memory_utils", "fbgemm_gpu-cpp-api/merge_pooled_embeddings", "fbgemm_gpu-cpp-api/quantize_ops", "fbgemm_gpu-cpp-api/sparse_ops", "fbgemm_gpu-cpp-api/split_table_batched_embeddings", "fbgemm_gpu-cpp-api/ssd_embedding_ops", "fbgemm_gpu-development/BuildInstructions", "fbgemm_gpu-development/InstallationInstructions", "fbgemm_gpu-development/TestInstructions", "fbgemm_gpu-overview/jagged-tensor-ops/JaggedTensorOps", "fbgemm_gpu-python-api/jagged_tensor_ops", "fbgemm_gpu-python-api/table_batched_embedding_ops", "general/ContactUs", "general/Contributing", "general/License", "general/documentation/Cpp", "general/documentation/Overview", "general/documentation/Python", "general/documentation/Sphinx", "index"], "filenames": ["fbgemm-cpp-api/QuantUtils.rst", "fbgemm-cpp-api/tbe_cpu_autovec.rst", "fbgemm-development/BuildInstructions.rst", "fbgemm_gpu-cpp-api/embedding_ops.rst", "fbgemm_gpu-cpp-api/experimental_ops.rst", "fbgemm_gpu-cpp-api/input_combine.rst", "fbgemm_gpu-cpp-api/jagged_tensor_ops.rst", "fbgemm_gpu-cpp-api/layout_transform_ops.rst", "fbgemm_gpu-cpp-api/memory_utils.rst", "fbgemm_gpu-cpp-api/merge_pooled_embeddings.rst", "fbgemm_gpu-cpp-api/quantize_ops.rst", "fbgemm_gpu-cpp-api/sparse_ops.rst", "fbgemm_gpu-cpp-api/split_table_batched_embeddings.rst", "fbgemm_gpu-cpp-api/ssd_embedding_ops.rst", "fbgemm_gpu-development/BuildInstructions.rst", "fbgemm_gpu-development/InstallationInstructions.rst", "fbgemm_gpu-development/TestInstructions.rst", "fbgemm_gpu-overview/jagged-tensor-ops/JaggedTensorOps.rst", "fbgemm_gpu-python-api/jagged_tensor_ops.rst", "fbgemm_gpu-python-api/table_batched_embedding_ops.rst", "general/ContactUs.rst", "general/Contributing.rst", "general/License.rst", "general/documentation/Cpp.rst", "general/documentation/Overview.rst", "general/documentation/Python.rst", "general/documentation/Sphinx.rst", "index.rst"], "titles": ["Quantization Utilities", "TBE CPU Autovectorization", "Build Instructions", "Embedding Operators", "Experimental Operators", "Combine Input Operators", "Jagged Tensor Operators", "Layout Transformation Operators", "CUDA Memory Operators", "Pooled Embeddings Operators", "Quantization Operators", "Sparse Data Operators", "Table Batched Embedding Operators", "SSD Embedding Operators", "Build Instructions", "Installation Instructions", "Test Instructions", "Jagged Tensor Operators", "Jagged Tensor Operators", "Table Batched Embedding (TBE) Operators", "Contact Us", "Contributing", "License", "Adding Documentation to C++ Code", "Documentation", "Adding Documentation to Python Code", "Sphinx Documentation Pointers", "FBGEMM and FBGEMM_GPU Documentation Homepage"], "terms": {"templat": [0, 1, 14, 23], "typenam": [0, 1, 23], "t": [0, 2, 4, 8, 11, 14, 19, 21, 23, 24], "layout_t": 0, "layout": [0, 27], "kcx": 0, "void": [0, 3, 8, 10, 12, 13], "quantizegroupwis": 0, "const": [0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 25], "float": [0, 1, 10, 18, 19, 23, 25], "src": 0, "int": [0, 1, 10, 18, 19, 23, 25], "k": [0, 4], "c": [0, 12, 15, 17, 22, 24, 25, 26], "x": [0, 6, 13, 17, 23, 25], "g": [0, 2, 11, 13, 14, 23, 25], "scale": [0, 1, 4, 10], "std": [0, 1, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 23, 25], "int32_t": [0, 1, 23, 25], "zero_point": 0, "dst": 0, "point": [0, 10, 18, 23, 25], "data": [0, 1, 4, 8, 13, 17, 19, 22, 27], "type": [0, 1, 2, 4, 10, 15, 17, 18, 19, 23], "paramet": [0, 1, 4, 8, 10, 11, 13, 18, 19, 23, 24, 25], "output": [0, 1, 4, 6, 10, 11, 13, 18, 19, 23, 25], "int8_t": 0, "uint8_t": [0, 1, 10, 12], "ar": [0, 2, 6, 12, 13, 14, 15, 17, 18, 19, 22, 23, 24, 25], "support": [0, 2, 4, 13, 14, 15, 17, 25, 27], "input": [0, 1, 4, 6, 8, 10, 11, 13, 17, 18, 19, 23, 27], "tensor": [0, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13, 19, 24, 25, 27], "kxc": 0, "correspond": [0, 11, 12, 13, 17, 23, 25], "kcr": 0, "kctr": 0, "weight": [0, 1, 3, 11, 12, 13, 19], "time": [0, 2, 14, 15, 17], "dimens": [0, 4, 6, 8, 11, 17, 18, 19, 25], "krsc": 0, "ktrsc": 0, "channel": [0, 14, 15, 20], "number": [0, 1, 2, 4, 10, 11, 13, 14, 17, 18, 19, 24], "r": [0, 16, 24], "": [0, 2, 8, 14, 16, 17, 21, 23, 24, 25], "group": [0, 4, 17, 23], "function": [0, 2, 13, 14, 23, 25], "perform": [0, 2, 10, 11, 13, 17, 27], "channelwis": 0, "1": [0, 1, 2, 4, 11, 12, 13, 14, 15, 16, 17, 18, 19, 24, 25, 26], "groupwis": 0, "per": [0, 17], "size": [0, 2, 4, 8, 10, 11, 17, 18, 19], "should": [0, 10, 11, 12, 14, 15, 17, 21, 23, 24, 25], "equal": [0, 17, 25], "zero": [0, 18, 25], "reprsent": 0, "fusedquantizedequant": 0, "int64_t": [0, 1, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13], "len": [0, 17], "tensorquantizationparam": 0, "qparam": 0, "thread_id": 0, "0": [0, 2, 4, 10, 11, 12, 13, 14, 15, 17, 18, 19, 25], "num_thread": 0, "noise_ratio": 0, "0f": 0, "fuse": [0, 10, 19], "integ": [0, 8, 10, 17], "dequant": [0, 10], "kernel": [0, 2, 8, 10, 13, 16, 27], "acceler": 0, "awar": 0, "train": [0, 13, 19, 27], "fp32": [0, 1, 10, 19], "valu": [0, 6, 8, 10, 11, 12, 13, 18, 19, 23, 24, 25], "u": [0, 14, 26, 27], "int8": [0, 19], "us": [0, 1, 2, 4, 8, 11, 13, 14, 15, 16, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27], "provid": [0, 2, 13, 14, 15, 16, 22, 23, 24, 25, 27], "back": [0, 8, 12, 14, 15], "inputtyp": 0, "floatorhalftofusednbitrowwisequantizedsbhalf": 0, "bit_rat": [0, 10], "size_t": [0, 10, 13, 23], "input_row": 0, "input_column": 0, "convert": [0, 8, 10, 13, 17, 18, 25], "fp16": [0, 10, 19], "rowwis": [0, 10, 19], "bitrat": 0, "specifi": [0, 2, 4, 10, 11, 13, 14, 18, 19], "bit": [0, 1, 10], "bia": [0, 1, 4, 10], "each": [0, 1, 4, 10, 11, 13, 14, 17, 18, 19, 25], "row": [0, 1, 6, 10, 12, 13, 17, 18, 19, 25], "store": [0, 10, 11, 12, 13], "itself": [0, 17, 24], "end": [0, 1, 15, 17, 26], "can": [0, 1, 2, 10, 11, 13, 14, 15, 17, 23, 24, 25, 26], "4": [0, 10, 14, 15, 17, 18, 19, 25], "8": [0, 10, 14, 17, 19], "uint32_t": 0, "xor128": 0, "random": 0, "gener": [0, 2, 11, 13, 14, 15, 23, 26], "9": [0, 13, 14, 17, 19], "base": [0, 2, 11, 12, 13, 14, 17], "thi": [0, 2, 6, 8, 9, 10, 11, 13, 14, 15, 17, 20, 21, 22, 23, 25, 26, 27], "paper": 0, "findminmax": 0, "m": [0, 14, 15, 16], "min": 0, "max": [0, 4, 19], "find": [0, 12, 14], "matrix": [0, 2, 18, 27], "bool": [0, 1, 4, 8, 9, 10, 12, 13, 19], "a_symmetr": 0, "b_symmetr": 0, "quantizationgranular": 0, "q_gran": 0, "has_bia": 0, "fuse_relu": 0, "bias_typ": 0, "direct": [0, 12, 15, 22, 23, 25, 26], "fals": [0, 1, 8, 13, 19, 24], "requantizeoutputprocessingavx2": 0, "out": [0, 1, 14, 20, 22, 24], "inp": 0, "block_type_t": 0, "block": [0, 1, 23, 25, 26], "ld_out": 0, "ld_in": 0, "requantizationparams_t": 0, "requant": 0, "avx2": [0, 2], "i": [0, 1, 2, 4, 6, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24, 25, 26, 27], "c_per_g": 0, "requantizeoutputprocessinggconvavx512": 0, "avx512": 0, "intyp": 1, "indextyp": 1, "offsettyp": 1, "outtyp": 1, "embeddingspmdm_autovec": 1, "block_siz": 1, "output_s": [1, 11], "index_s": 1, "data_s": 1, "indic": [1, 3, 12, 13, 17, 19], "offsets_or_length": 1, "normalize_by_length": 1, "is_weight_posit": 1, "use_offset": 1, "true": [1, 8, 13, 19], "output_strid": 1, "input_strid": 1, "scale_bias_last": [1, 10], "no_bag": 1, "is_bf16_out": 1, "is_bf16_in": 1, "version": [1, 2, 15], "embeddingspmdm_ref": 1, "index": [1, 11, 12, 13, 14, 15, 17, 23, 25], "offset": [1, 3, 6, 11, 12, 18, 19], "element": [1, 10, 12, 13, 17], "address": [1, 2, 13, 14], "sum": [1, 4, 11, 13, 18, 19], "option": [1, 2, 3, 6, 8, 12, 14, 18, 19], "null": 1, "non": [1, 4, 8, 19], "whether": [1, 4, 8, 13, 14, 22], "normal": [1, 17], "length": [1, 4, 6, 11, 13, 18, 19, 25], "If": [1, 2, 13, 14, 15, 19, 21, 23, 24, 25], "posit": [1, 4, 11, 13, 19], "set": [1, 8, 12, 13, 16, 17, 18, 19], "instead": [1, 14, 24], "same": [1, 2, 4, 8, 11, 14, 17, 18, 23, 24, 25], "appear": [1, 15], "embed": [1, 2, 14, 15, 24, 27], "bag": [1, 11, 19, 27], "bfloat16": [1, 10], "embeddingspmdmfp8_autovec": 1, "exponent_bit": 1, "exponent_bia": [1, 10], "expon": 1, "note": [2, 12, 14, 15, 23, 24, 25, 26], "The": [2, 4, 8, 10, 11, 13, 15, 16, 17, 18, 19, 21, 23, 24, 25, 26], "most": [2, 14, 15, 17, 24], "date": [2, 14, 15, 24], "script": [2, 14, 15, 24], "bundl": [2, 14, 15, 24], "repo": [2, 14, 15, 24, 25], "under": [2, 14, 15, 21, 22, 24, 25], "setup_env": [2, 14, 15, 24], "bash": [2, 14, 15, 24], "step": [2, 13, 14, 15, 17, 24, 25], "fbgemm_gpu": [2, 8, 14, 17, 19, 20, 21, 22, 23, 25], "follow": [2, 11, 14, 15, 17, 22, 23, 24, 25], "toolchain": [2, 14, 15], "run": [2, 14, 15, 24], "cpu": [2, 8, 9, 16, 24, 27], "higher": 2, "In": [2, 11, 13, 14, 15, 17, 21, 23, 25], "doe": [2, 3, 15, 23, 24, 25], "have": [2, 10, 11, 12, 14, 17, 24], "ani": [2, 11, 14, 18, 21, 22, 24, 25], "intel": 2, "mkl": 2, "howev": [2, 14, 17, 22], "comparison": 2, "some": [2, 14, 17, 24], "benchmark": 2, "found": [2, 14, 15, 24], "path": [2, 13, 14, 16, 23, 26], "through": [2, 21, 23, 25], "intel_mkl_dir": 2, "variabl": 2, "built": [2, 14, 15, 24, 27], "report": [2, 15], "otherwis": [2, 8, 13, 15, 22], "subset": 2, "all": [2, 11, 12, 13, 14, 15, 17, 19, 22, 24], "three": [2, 17], "git": [2, 14], "submodul": [2, 14], "custom": [2, 26], "desir": [2, 14, 17, 18, 23], "thei": [2, 14, 24, 26], "asmjit_src_dir": 2, "cpuinfo_src_dir": 2, "googletest_source_dir": 2, "With": 2, "inner": [2, 17], "take": [2, 14], "one": [2, 4, 10, 11, 12, 14, 18, 19, 23, 25], "doesn": 2, "fit": [2, 22], "approach": 2, "so": [2, 11, 14, 15, 16, 17], "implement": [2, 4, 10, 13, 14, 17], "dynam": 2, "effici": [2, 27], "shape": [2, 4, 17, 19], "specif": [2, 11, 13, 14, 19, 22], "vector": [2, 5, 6, 7, 8, 9, 13, 18, 25], "code": [2, 13, 14, 22, 24], "third": 2, "parti": 2, "call": [2, 8, 13, 15], "detect": [2, 16], "runtim": [2, 14], "pytorch": [2, 13, 17, 20, 24, 25, 27], "project": [2, 21], "dispatch": [2, 8], "optim": [2, 10, 13, 19], "test": [2, 10, 14, 15, 21, 27], "you": [2, 21, 23, 25], "don": [2, 11, 14, 24], "want": [2, 21], "togeth": [2, 23, 24], "default": [2, 11, 14, 15, 19], "turn": [2, 24], "off": [2, 15, 20], "simpli": [2, 14], "fbgemm_build_test": 2, "conda": [2, 16, 24], "For": [2, 16, 17, 20, 22, 23, 24, 25, 26], "platform": [2, 14, 22], "machin": [2, 14, 15, 16, 27], "microsoft": [2, 10], "visual": 2, "studio": 2, "2019": 2, "newer": [2, 14], "recommend": [2, 6, 10, 14, 15, 17], "here": [2, 8, 14, 15, 21, 23, 24, 25, 26], "necessari": [2, 14], "ninja": [2, 14], "etc": [2, 14, 19], "n": [2, 10, 14, 15, 26], "env_nam": [2, 14, 15], "y": [2, 6, 14, 15, 18, 24], "doxygen": [2, 23, 24], "make": [2, 12, 14, 21, 23, 24, 25], "openbla": 2, "packag": [2, 14, 16, 24], "onli": [2, 4, 10, 11, 12, 13, 16, 17, 21, 23, 24, 26], "clone": [2, 14], "along": [2, 14, 15], "its": [2, 8, 10, 11, 14, 19, 22, 24, 26], "insid": [2, 13, 14, 15, 16, 24, 26], "recurs": [2, 14], "http": [2, 14, 15, 21, 23, 24, 25], "github": [2, 14, 21], "com": [2, 14, 21], "cd": [2, 14, 16, 24], "assum": [2, 11], "process": [2, 6, 13, 15, 17, 21, 25], "straightforward": 2, "creat": [2, 8, 14, 17, 21, 23, 25, 26], "directori": [2, 14, 16, 21, 23, 24], "mkdir": 2, "argument": [2, 11, 23, 24, 25], "build_arg": 2, "duse_sanit": 2, "dfbgemm_library_typ": 2, "share": [2, 8], "dpython_execut": 2, "which": [2, 11, 13, 14, 15, 17, 19, 24], "python3": [2, 15], "document": [2, 8, 21, 22], "dfbgemm_build_doc": 2, "ON": [2, 22], "j": [2, 17], "verbos": 2, "As": [2, 11, 14, 15, 17], "write": [2, 13, 14, 15, 24, 25], "fail": [2, 15, 16, 23], "due": [2, 14], "known": [2, 14, 19], "regress": 2, "To": [2, 13, 14, 16, 26], "work": [2, 14, 15, 17, 21], "around": 2, "append": [2, 14, 23, 25], "export": [2, 14, 16], "prior": [2, 14, 15, 22], "cflag": 2, "wno": 2, "error": [2, 10, 15, 23, 24, 25], "mayb": 2, "uniniti": 2, "restrict": 2, "cxxflag": 2, "pleas": [2, 21, 23, 25], "see": [2, 8, 14, 15, 17, 23, 25, 26], "77939": 2, "1094": 2, "1666": 2, "more": [2, 8, 14, 19, 23, 25, 26], "detail": [2, 13, 15], "exactli": 2, "extra": 2, "need": [2, 13, 14, 15, 16, 17, 21, 23, 25, 26], "ad": [2, 14, 21, 24], "invoc": [2, 14, 24], "llvm": [2, 14], "standard": [2, 14], "libc": [2, 14], "openmp": [2, 14], "libomp": 2, "locat": [2, 8, 12, 13, 14, 17], "cc_path": 2, "cxx_path": 2, "dcmake_c_compil": 2, "dcmake_cxx_compil": 2, "dcmake_c_flag": [2, 14], "fopenmp": 2, "stdlib": [2, 14], "conda_prefix": [2, 14], "includ": [2, 9, 13, 14, 22, 23, 25], "dcmake_cxx_flag": [2, 14], "likewis": 2, "also": [2, 13, 14, 19, 26], "veri": [2, 14, 23, 24, 25], "target": [2, 8, 10, 11, 14, 17, 23, 24, 25, 26], "architectur": [2, 14, 15], "bc": [2, 14], "x64": 2, "program": [2, 21], "file": [2, 14, 15, 20, 21, 23, 24, 25, 26], "x86": [2, 27], "enterpris": 2, "vc": 2, "auxiliari": 2, "vcvarsal": 2, "bat": 2, "build_dir": 2, "dfbgemm_build_benchmark": 2, "dcmake_build_typ": 2, "releas": [2, 15], "cl": 2, "ex": 2, "v": [2, 4, 6, 16, 18], "int_nbit_split_embedding_codegen_lookup_funct": 3, "dev_weight": [3, 12], "uvm_weight": [3, 12], "weights_plac": [3, 12], "weights_offset": [3, 12], "weights_ti": [3, 12], "d_offset": [3, 10, 12], "total_d": [3, 12, 19], "max_int2_d": 3, "max_int4_d": 3, "max_int8_d": 3, "max_float16_d": 3, "max_float32_d": 3, "pooling_mod": [3, 19], "indice_weight": 3, "output_dtyp": [3, 10, 19], "lxu_cache_weight": [3, 12, 13], "lxu_cache_loc": [3, 12, 13], "row_align": [3, 12], "max_float8_d": 3, "fp8_exponent_bit": 3, "fp8_exponent_bia": 3, "int_nbit_split_embedding_uvm_caching_codegen_lookup_funct": 3, "cache_hash_size_cumsum": [3, 12], "total_cache_hash_s": [3, 12], "cache_index_table_map": [3, 12], "lxu_cache_st": [3, 12], "lxu_stat": 3, "simlar": 3, "uvm_cach": 3, "lookup": [3, 12, 13], "pruned_hashmap_lookup_cuda": 3, "hash_tabl": 3, "hash_table_offset": 3, "pruned_array_lookup_cuda": 3, "index_remap": 3, "index_remappings_offset": 3, "bounds_check_indices_cuda": 3, "rows_per_t": 3, "bounds_check_mod": [3, 19], "warn": [3, 19, 23], "b_ofset": 3, "max_b": [3, 12], "int_nbit_split_embedding_codegen_lookup_function_cpu": 3, "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu": 3, "pruned_hashmap_insert_unweighted_cpu": 3, "dense_indic": 3, "pruned_hashmap_lookup_unweighted_cpu": 3, "pruned_array_lookup_cpu": 3, "tupl": [4, 5, 6, 11, 12, 13, 19], "gqa_attn_splitk": 4, "xq": 4, "cache_k": 4, "cache_v": 4, "seq_posit": 4, "doubl": [4, 6, 10, 11], "qk_scale": 4, "num_split_k": 4, "kv_cache_quant_num_group": 4, "use_tensor_cor": 4, "cache_logical_dtype_int": 4, "decod": 4, "queri": 4, "split": 4, "w": [4, 16], "bf16": [4, 10], "int4": [4, 10], "kv": 4, "cuda": [4, 9, 19, 27], "gqa": 4, "cach": [4, 12, 13, 14, 19], "It": [4, 13, 14, 15, 17], "current": [4, 13, 14, 15, 17, 19], "context": 4, "16384": 4, "fix": [4, 11], "head": 4, "128": 4, "an": [4, 8, 11, 13, 15, 16, 17, 19, 23, 24, 25, 26], "arbitrari": [4, 13], "b": [4, 11, 14, 17, 18, 19, 23, 24, 25, 26], "h_q": 4, "d": [4, 17, 18, 26], "where": [4, 6, 8, 11, 13, 17, 18, 19], "batch": [4, 6, 11, 17, 18, 27], "num": 4, "max_t": 4, "h_kv": 4, "sequenc": 4, "contain": [4, 8, 13, 14, 17, 18, 19, 25], "actual": [4, 14], "token": [4, 17], "appli": [4, 11, 14, 17, 19], "after": [4, 11, 13, 14, 15, 16, 17, 19, 24, 25, 26], "qk": 4, "control": 4, "amount": [4, 19], "parallel": [4, 13], "wise": [4, 17, 19], "fp8": [4, 10], "quantiz": [4, 27], "singl": [4, 8, 10, 13], "now": 4, "core": 4, "wmma": 4, "instruct": [4, 21, 23, 24, 25, 27], "fast": 4, "kv_cach": 4, "2": [4, 10, 13, 14, 15, 16, 17, 18, 19, 23, 25, 26], "return": [4, 8, 10, 11, 13, 18, 19, 23, 24, 25], "A": [4, 8, 10, 13, 14, 15, 17, 18, 19, 22, 23, 24, 25], "combin": [4, 27], "metadata": [4, 13], "softmax": 4, "tbe_input_combine_cpu": 5, "indices_list": 5, "offsets_list": 5, "per_sample_weight": [5, 19], "include_last_offset": 5, "padding_fused_tbe_input_combine_cpu": 5, "batch_siz": 5, "solv": 6, "issu": [6, 8, 14, 15, 20], "when": [6, 11, 13, 14, 16, 17, 19, 23, 24, 26], "differ": [6, 11, 13, 17], "often": 6, "occur": [6, 13, 23], "spars": [6, 17, 27], "featur": [6, 11, 14, 17, 19, 20], "system": [6, 14, 15, 17], "well": [6, 11, 14, 23], "natur": [6, 17], "languag": [6, 17, 26], "jagged_to_padded_dense_forward": 6, "c10": [6, 10], "symintarrayref": 6, "max_length": [6, 18], "padding_valu": [6, 18], "jagged_dense_elementwise_add_jagged_output_cuda": 6, "x_valu": [6, 18], "x_offset": [6, 18, 25], "dens": [6, 18, 25], "jagged_to_padded_dens": [6, 18], "jagged_dense_elementwise_add": [6, 18], "jagged_dense_elementwise_mul": [6, 18], "batched_dense_vec_jagged_2d_mul": [6, 18], "a_valu": [6, 18], "a_offset": [6, 18], "dense_to_jag": [6, 18], "symint": 6, "total_l": [6, 18], "jagged_dense_elementwise_add_jagged_output": [6, 18], "jagged_1d_to_dens": [6, 18], "max_l": 6, "jagged_2d_to_dens": [6, 14, 15, 18, 24, 25], "max_sequence_length": [6, 18, 25], "recat_embedding_grad_output_cuda": 7, "grad_output": 7, "num_features_per_rank": 7, "recat_embedding_grad_output_mixed_d_cuda": 7, "dim_sum_per_rank": 7, "recat_embedding_grad_output_mixed_d_batch_cuda": 7, "cumsum_dim_sum_per_rank": 7, "recat_embedding_grad_output_mixed_d_cpu": 7, "new_managed_tensor": 8, "self": [8, 13], "alloc": [8, 23], "unifi": 8, "manag": [8, 14, 15, 19], "uvm": [8, 16], "Then": 8, "prefer": [8, 13, 15], "storag": [8, 10, 12, 13], "host": [8, 14], "establish": 8, "map": [8, 11, 12, 13, 17, 19], "devic": [8, 9, 14, 19], "new": [8, 10, 12, 23, 24, 25], "new_managed_tensor_meta": 8, "placehold": 8, "meta": [8, 22], "kei": [8, 13], "empti": [8, 17, 18, 26], "new_host_mapped_tensor": 8, "new_unified_tensor": 8, "is_host_map": 8, "either": [8, 10, 11, 13, 14, 15], "depend": [8, 10, 14, 15, 17], "new_unified_tensor_meta": 8, "new_vanilla_managed_tensor": 8, "allow": [8, 14], "automat": [8, 11, 16, 24], "uvm_storag": 8, "check": [8, 19], "gpu": [8, 13, 14, 15, 16, 27], "is_uvm_tensor": 8, "BUT": [8, 22], "uvm_to_cpu": 8, "effect": [8, 17], "move": [8, 13], "from": [8, 10, 11, 12, 13, 14, 15, 16, 17, 19, 21, 22, 23, 24, 25, 26], "uvm_to_devic": 8, "prototyp": 8, "whose": 8, "uvm_cuda_mem_advis": 8, "cuda_memory_advis": 8, "cudamemadvis": 8, "cudamemoryadvis": 8, "enum": [8, 10, 13], "avail": [8, 14, 15, 16, 24], "python": [8, 13, 14, 16, 23, 24, 26], "side": [8, 13, 14, 23, 25, 27], "namespac": 8, "over": [8, 14], "valid": 8, "inform": [8, 15, 17, 25, 26], "uvm_cuda_mem_prefetch_async": 8, "device_t": 8, "cudamemprefetchasync": 8, "prefetch": [8, 13], "destin": 8, "uvm_mem_advice_dont_fork": 8, "madvis": 8, "madv_dontfork": 8, "workaround": 8, "driver": [8, 14], "un": 8, "page": [8, 21, 26, 27], "tabl": [8, 11, 17, 27], "fork": [8, 21], "caus": [8, 14, 15, 22, 24], "slowdown": 8, "next": [8, 13, 17, 23, 25], "access": [8, 13, 19], "uvm_to_cpu_clon": 8, "copi": 8, "contigu": [8, 11], "thread": [8, 13], "memcpi": 8, "section": [9, 14, 15, 25], "variou": 9, "all_to_one_devic": 9, "inputtensor": 9, "target_devic": 9, "permute_pooled_embs_split_gpu": 9, "pooled_emb": 9, "offset_dim_list": 9, "permute_list": 9, "inv_offset_dim_list": 9, "inv_permute_list": 9, "permute_pooled_embs_auto_grad_split_gpu": 9, "permute_pooled_embs_auto_grad_gpu": 9, "permute_pooled_embs_cpu_impl": 9, "allow_dupl": 9, "permute_pooled_embs_split_cpu": 9, "permute_pooled_embs_auto_grad_split_cpu": 9, "permute_pooled_embs_auto_grad": 9, "permute_pooled_embs_auto_grad_cpu": 9, "model": [10, 11], "techniqu": 10, "reduc": [10, 13], "larg": [10, 14], "order": [10, 17, 21], "achiev": [10, 15], "better": [10, 13, 23], "small": 10, "loss": [10, 22], "accuraci": 10, "_float_to_bfloat16_gpu": 10, "brain": 10, "_bfloat16_to_float_gpu": 10, "_float_to_fp8rowwise_gpu": 10, "forward": 10, "dtype": [10, 19], "sparsetyp": [10, 19], "throw": [10, 23], "_fp8rowwise_to_float_gpu": 10, "represent": [10, 17], "_float_to_fused8bitrowwise_gpu": 10, "_half_to_fused8bitrowwise_gpu": 10, "half": 10, "_single_or_half_precision_to_fused8bitrowwise_gpu": 10, "_fused8bitrowwise_to_float_gpu": 10, "_fused8bitrowwise_to_half_gpu": 10, "_fused8bitrowwise_to_single_or_half_precision_gpu": 10, "quant_padding_float_typ": 10, "_fused8bitrowwise_to_float_mixed_dim_gpu": 10, "kfloat": 10, "khalf": 10, "_float_to_fusednbitrowwise_gpu": 10, "_half_to_fusednbitrowwise_gpu": 10, "_single_or_half_precision_to_fusednbitrowwise_gpu": 10, "_fusednbitrowwise_to_float_gpu": 10, "_fusednbitrowwise_to_half_gpu": 10, "_fusednbitrowwise_to_single_or_half_precision_gpu": 10, "_float_to_hfp8_gpu": 10, "ebit": 10, "max_po": 10, "hybrid": 10, "hfp8": 10, "_hfp8_to_float_gpu": 10, "_float_to_msfp_gpu": 10, "bounding_box_s": 10, "mbit": 10, "min_po": 10, "msfp": 10, "_msfp_to_float_gpu": 10, "_float_to_paddedfp8rowwise_gpu": 10, "row_dim": 10, "pad": [10, 13, 17, 18, 25], "_paddedfp8rowwise_to_float_gpu": 10, "output_last_dim": 10, "_fused8bitrowwise_to_float_cpu_out": 10, "_float_to_fused8bitrowwise_cpu_out": 10, "float_to_fused8bitrowwise_cpu": 10, "half_to_fused8bitrowwise_cpu": 10, "float_or_half_to_fused8bitrowwise_cpu": 10, "fused8bitrowwise_to_float_cpu": 10, "fused8bitrowwise_to_half_cpu": 10, "fused8bitrowwise_to_float_or_half_cpu": 10, "float_to_fp8rowwise_cpu": 10, "fp8rowwise_to_float_cpu": 10, "fusednbitrowwise_to_float_cpu": 10, "fusednbitrowwise_sbfront_to_float_cpu": 10, "int2": 10, "front": 10, "float32": 10, "torch": [10, 13, 14, 15, 18, 19, 24, 25], "quint4x2": 10, "quint2x4": 10, "quantizedcpu": 10, "backend": [10, 27], "purpos": [10, 17, 18, 19, 22], "becaus": [10, 14, 17], "refer": [10, 14, 17, 24, 25], "rate": [10, 19], "hold": [10, 13, 17], "fusednbitrowwise_to_half_cpu": 10, "fusednbitrowwise_to_float_or_half_cpu": 10, "floattofp8quantized_ref": 10, "nrow": 10, "ncol": 10, "fp8quantizedtofloat_ref": 10, "expand_into_jagged_permute_cuda": 11, "permut": 11, "input_offset": 11, "output_offset": 11, "expand_into_jagged_permut": 11, "expand": 11, "case": [11, 14, 15, 17, 21], "ha": [11, 13, 15, 17, 21, 23, 24], "across": [11, 14], "rank": [11, 17], "level": 11, "exclus": [11, 13], "op": [11, 15, 18, 25], "sit": 11, "we": [11, 13, 14, 17, 21], "deriv": [11, 17, 22], "arrai": [11, 18, 25], "comput": [11, 14, 15, 19], "formula": 11, "output_permut": 11, "table_offset": 11, "bag_offset": 11, "histogram_binning_calibration_cpu": 11, "logit": 11, "bin_num_exampl": 11, "bin_num_posit": 11, "positive_weight": 11, "lower_bound": 11, "upper_bound": 11, "bin_ctr_in_use_aft": 11, "bin_ctr_weight_valu": 11, "divid": [11, 17], "predict": 11, "rang": [11, 13, 17], "e": [11, 13, 14, 17, 23, 25, 26], "bin": [11, 14], "two": [11, 17, 18, 19, 24], "exampl": [11, 13, 14, 15, 16, 18, 19, 23, 24, 25, 26], "fall": [11, 14, 15], "bucket": [11, 14], "basic": [11, 13, 25], "histogram": 11, "result": [11, 13, 14, 18], "statist": 11, "real": 11, "ctr": 11, "num_po": 11, "num_exampl": 11, "final": 11, "calibr": 11, "pre": [11, 15], "cali": 11, "wai": [11, 22], "within": 11, "suffici": [11, 21, 24], "That": 11, "fine": 11, "grain": 11, "modul": [11, 14, 15, 19, 25], "theoret": 11, "layer": [11, 13], "uncalibr": 11, "befor": [11, 13, 14, 19, 26], "sigmoid": 11, "calibart": 11, "pass": [11, 19, 21, 24], "lower": 11, "bound": [11, 17], "calibration_target": 11, "observ": 11, "statisct": 11, "final_calibrated_predict": 11, "bin_ctr_weight": 11, "bin_ctr": 11, "calibrated_predict": 11, "bin_id": 11, "generic_histogram_binning_calibration_by_feature_cpu": 11, "segment_valu": 11, "segment_length": 11, "num_seg": 11, "bin_boundari": 11, "extens": [11, 23, 24], "ectr": 11, "abov": [11, 13, 15, 17, 22, 23, 25, 26], "accept": [11, 21], "sort": [11, 12, 13, 14], "keyjaggedtensor": 11, "num_bin": 11, "longer": [11, 20, 23], "still": [11, 14], "parambin_ctr_weight_valu": 11, "get_unique_indices_cuda": 12, "linear_indic": 12, "max_indic": 12, "compute_count": 12, "dedupl": 12, "get_unique_indices_with_inverse_cuda": 12, "compute_inverse_indic": 12, "lru_cache_find_uncached_cuda": 12, "unique_indic": 12, "unique_indices_length": [12, 13], "time_stamp": 12, "lru_stat": 12, "gather_cache_stat": 12, "uvm_cache_stat": 12, "lock_cache_lin": 12, "lxu_cache_locking_count": 12, "lru": [12, 13, 19], "uncach": [12, 13], "them": 12, "host_lxu_cache_slot": 12, "h_in": 12, "cache_set": [12, 19], "linearize_cache_indices_cuda": 12, "b_offset": 12, "indices_base_offset": 12, "linear": [12, 13], "uniqu": [12, 13, 26], "linearize_cache_indices_from_row_idx_cuda": 12, "update_table_indic": 12, "update_row_indic": 12, "format": [12, 24, 25], "inplac": 12, "updat": [12, 13, 14, 15, 16, 19, 21], "lru_cache_populate_cuda": 12, "hash_size_cumsum": 12, "linear_cache_indic": 12, "stochastic_round": [12, 19], "fetch": [12, 13], "insert": [12, 13, 26], "timestep": 12, "lru_cache_populate_byte_cuda": 12, "byte": [12, 13], "direct_mapped_lru_cache_populate_byte_cuda": 12, "lxu_cache_miss_timestamp": 12, "assoc": 12, "variant": [12, 14, 15, 24], "lfu_cache_populate_cuda": 12, "lfu_stat": 12, "lfu": [12, 19], "lfu_cache_populate_byte_cuda": 12, "lxu_cache_lookup_cuda": 12, "invalid_index": 12, "num_uniq_cache_indic": 12, "lxu_cache_locations_output": 12, "look": [12, 19], "up": [12, 13, 16, 19], "slot": [12, 13], "sentinel": [12, 13], "miss": [12, 13, 14], "direct_mapped_lxu_cache_lookup_cuda": 12, "lxu_cache_flush_cuda": 12, "flush": [12, 13], "reset_weight_momentum_cuda": 12, "momentum1_dev": 12, "momentum1_uvm": 12, "momentum1_plac": 12, "momentum1_offset": 12, "pruned_indic": 12, "pruned_indices_offset": 12, "logical_table_id": 12, "buffer_id": 12, "lxu_cache_locking_counter_decrement_cuda": 12, "decrement": 12, "counter": 12, "lxu_cache_locations_update_cuda": 12, "lxu_cache_locations_new": 12, "rocksdbwritemod": 13, "rocksdb": 13, "mode": [13, 16, 19], "offload": 13, "3": [13, 14, 15, 17, 18, 19, 22, 25], "iter": 13, "fwd_rocksdb_read": 13, "l2": [13, 19], "fwd": 13, "fwd_l1_evict": 13, "l1": 13, "eviciton": 13, "evict": 13, "bwd_l1_cnflct_miss_write_back": 13, "conflict": 13, "bwd": 13, "fill": 13, "potenti": 13, "trigger": 13, "onc": [13, 15, 21], "full": [13, 14, 15, 26], "addition": 13, "do": [13, 14, 15, 21], "io": 13, "enumer": 13, "inlin": [13, 26], "hash_shard": 13, "id": [13, 15], "num_shard": 13, "hash": 13, "shard": 13, "algorithm": [13, 19], "cuda_callback_func": 13, "cudastream_t": 13, "stream": [13, 14], "cudaerror_t": 13, "statu": 13, "functor": 13, "callback": 13, "cudastreamaddcallback": 13, "common": [13, 14, 15, 17, 25], "cudastreamcallback_t": 13, "cast": 13, "invok": [13, 14, 24], "delet": 13, "anoth": [13, 26], "none": [13, 19], "masked_index_put_cuda": 13, "count": 13, "use_pipelin": 13, "preferred_sm": 13, "similar": [13, 14, 17, 19], "index_put": 13, "ignor": [13, 16, 19, 24], "2d": [13, 17, 18, 19, 25], "put": [13, 24], "equival": [13, 17], "filter_": 13, "indices_": 13, "nonzero": 13, "flatten": 13, "1d": [13, 18, 19, 25], "flag": [13, 14, 24], "overlap": 13, "other": [13, 15, 17, 22, 23, 24, 25], "fraction": 13, "sm": 13, "resourc": 13, "competit": 13, "masked_index_select_cuda": 13, "index_select": 13, "ssd_generate_row_addrs_cuda": 13, "assigned_cache_slot": 13, "linear_index_inverse_indic": 13, "unique_indices_count_cumsum": 13, "cache_set_inverse_indic": 13, "inserted_ssd_weight": 13, "cache_set_sorted_unique_indic": 13, "memori": [13, 15, 19, 27], "tbe": [13, 27], "retriev": 13, "scratch": [13, 15], "hbm": [13, 19], "lxu": [13, 19], "associ": 13, "enabl": [13, 14, 16], "conveni": 13, "first": [13, 14, 23, 25, 26], "pointer": [13, 24], "moreov": 13, "list": [13, 14, 17, 18, 19, 22, 23, 25], "post": 13, "backward": [13, 19], "origin": 13, "being": [13, 14, 24], "prefix": [13, 14, 26], "ssd_update_row_addrs_cuda": 13, "ssd_row_addrs_curr": 13, "inserted_ssd_weights_curr_next_map": 13, "lxu_cache_locations_curr": 13, "linear_index_inverse_indices_curr": 13, "unique_indices_count_cumsum_curr": 13, "cache_set_inverse_indices_curr": 13, "inserted_ssd_weights_next": 13, "unique_indices_length_curr": 13, "pipelin": 13, "dure": [13, 14, 17, 19, 25], "reloc": 13, "correct": [13, 14], "between": [13, 17, 23, 24, 26], "been": [13, 14, 23], "compact_indices_cuda": 13, "compact_indic": 13, "compact_count": 13, "mask": 13, "compact": 13, "given": [13, 14, 17, 18], "operat": 13, "remov": 13, "7": [13, 14, 15, 17, 18, 19], "5": [13, 14, 17, 19], "repres": [13, 17], "keep": [13, 14], "class": [13, 24, 25], "cachelibcach": 13, "cachelib_cach": 13, "h": [13, 14, 18, 23], "cachelib": 13, "wrapper": 13, "cachlib": 13, "interact": 13, "maintain": 13, "relat": [13, 17], "initi": 13, "state": [13, 14], "logic": [13, 17, 23], "caller": 13, "reset": 13, "captur": 13, "delai": 13, "markus": 13, "boost": 13, "get": 13, "handl": [13, 17], "read": [13, 17, 19], "done": [13, 14, 15], "embeddingparameterserv": 13, "public": [13, 21, 24], "embeddingkvdb": 13, "ps_table_batched_embed": 13, "servic": [13, 22], "tp": 13, "client": 13, "cachecontext": 13, "kv_db_table_batched_embed": 13, "l2cach": 13, "num_miss": 13, "cached_addr_list": 13, "prealloc": 13, "invalid": 13, "spot": 13, "stai": 13, "struct": 13, "queueitem": 13, "queue": 13, "item": [13, 25], "background": 13, "param": [13, 23, 25], "read_handl": 13, "abstract": 13, "pair": [13, 26], "later": [13, 14], "separ": [13, 24], "get_cach": 13, "monitor": 13, "checkout": 13, "explan": 13, "enable_shared_from_thi": 13, "execut": [13, 15, 16], "dram": 13, "remot": 13, "scalabl": 13, "without": [13, 14, 22], "blow": 13, "subclass": 13, "embeddingrocksdb": 13, "ssd_table_batched_embed": 13, "fbgemm": [14, 15, 18, 20, 21, 22, 24, 25], "experiment": [14, 15, 27], "reproduc": [14, 15, 21, 22], "platform_nam": 14, "unam": 14, "miniconda_prefix": 14, "home": 14, "download": [14, 15], "wget": 14, "q": 14, "anaconda": 14, "miniconda3": 14, "latest": 14, "sh": 14, "o": [14, 15], "p": 14, "load": [14, 17, 25], "shortcut": 14, "bashrc": 14, "command": [14, 15, 23, 24], "against": [14, 16], "env": [14, 15], "name": [14, 15, 22, 23, 25], "python_vers": 14, "12": [14, 17, 19], "upgrad": 14, "pyopenssl": 14, "22": [14, 17], "requir": [14, 15, 16, 17, 19, 24, 25], "recent": [14, 15], "nvcc": 14, "capabl": [14, 16], "bare": 14, "metal": 14, "neither": [14, 22], "nor": [14, 22], "nvidia": 14, "present": [14, 25], "sinc": [14, 17], "pull": [14, 15, 24], "linux": [14, 15], "distribut": [14, 22], "ubuntu": 14, "04": 14, "11": [14, 15, 17], "entrypoint": 14, "devel": 14, "ubuntu22": 14, "rest": [14, 15], "mai": [14, 15, 17, 22], "construct": [14, 15, 17], "mechan": 14, "nvml": 14, "org": [14, 15, 25], "cuda_vers": 14, "label": 14, "verifi": [14, 15, 23, 25], "cuda_runtim": 14, "libnvidia": [14, 15], "ml": [14, 15], "libnccl": [14, 16], "printenv": 14, "extract": 14, "url": [14, 15], "builder": 14, "blob": 14, "main": [14, 21], "install_cuda": 14, "cudnn_url": 14, "redist": 14, "x86_64": 14, "26_cuda12": 14, "archiv": 14, "tar": 14, "xz": 14, "unpack": 14, "xvf": 14, "applic": [14, 15, 19, 23, 25], "alreadi": [14, 15, 21, 23, 25], "repositori": [14, 21], "cmake": 14, "configur": [14, 23], "amd": [14, 15], "minim": 14, "6": [14, 15, 17], "termin": 14, "while": [14, 24], "come": [14, 15], "reason": [14, 15, 24], "oper": [14, 15, 16, 27], "guid": [14, 25], "disabl": 14, "apt": 14, "prompt": 14, "debian_frontend": 14, "noninteract": 14, "db": 14, "radeon": 14, "amdgpu": 14, "focal": 14, "install_5": 14, "50601": 14, "1_all": 14, "deb": 14, "usecas": 14, "hiplibsdk": 14, "dkm": 14, "hipifi": 14, "hip": 14, "dev": 14, "20": 14, "sysroot": 14, "avoid": 14, "glibcxx": 14, "fbgemm_cpu": 14, "10": [14, 15, 17], "older": [14, 15], "accompani": [14, 24], "appropri": 14, "sysroot_linux": 14, "gcc_version": 14, "forg": [14, 24], "gxx_linux": 14, "64": [14, 17], "17": 14, "binari": [14, 22], "cento": 14, "librari": [14, 24, 27], "libstdc": 14, "what": [14, 24], "libcxx_path": 14, "print": [14, 15, 19, 25], "objdump": 14, "tc": 14, "grep": 14, "glibc_": 14, "sed": 14, "vu": 14, "cat": 14, "glibcxx_": 14, "possibl": [14, 17, 21, 22], "just": 14, "minimum": [14, 23, 24, 25], "llvm_version": 14, "16": [14, 17], "libcxx": 14, "outdat": 14, "aarch64": [14, 15], "cannot": 14, "explicitli": 14, "clangxx": 14, "rt": 14, "lib": [14, 15, 16], "ld_library_path": [14, 15, 16], "config": 14, "var": 14, "nvcc_prepend_flag": 14, "correctli": [14, 15, 16, 23, 24], "xcompil": 14, "ccbin": 14, "clangxx_path": 14, "unsupport": 14, "even": [14, 22], "though": [14, 15], "libstd": 14, "mean": [14, 17, 19], "regardless": 14, "scenario": 14, "binpath": 14, "overrid": 14, "exist": [14, 23, 25], "ln": 14, "sf": 14, "path_to_either_gcc_or_clang": 14, "cc": 14, "These": 14, "stage": [14, 17], "click": 14, "hypothesi": [14, 15], "jinja2": 14, "ncurs": 14, "numpi": [14, 15], "scikit": [14, 15], "offici": 14, "homepag": 14, "authorit": [14, 15, 24], "how": [14, 15, 16, 25], "nightli": [14, 15], "rc": 14, "alwai": 14, "reliabl": 14, "arriv": 14, "hour": 14, "than": [14, 17], "window": 14, "silent": 14, "both": [14, 20, 22, 24], "place": [14, 19], "artifact": 14, "select": 14, "thu": [14, 19], "import": [14, 15, 19, 25, 26], "much": [14, 23], "determinist": 14, "whl": [14, 15], "cu121": [14, 15], "rocm5": [14, 15], "ensur": [14, 15, 21], "properli": 14, "__version__": 14, "cuda_cmake_macro": 14, "gemm": 14, "via": 14, "manual": [14, 15, 23], "sha": 14, "pin": 14, "ci": [14, 15], "ci_commit_pin": 14, "txt": [14, 16, 24, 26], "dedb7bdf33": 14, "tag": [14, 23, 26], "fbgemm_vers": 14, "v0": 14, "fbgemm_": 14, "addit": [14, 16, 17, 18], "flow": 14, "becom": 14, "stale": 14, "problem": 14, "re": [14, 15], "attempt": 14, "failur": [14, 15], "clear": [14, 21], "py": [14, 15, 16, 24, 25], "clean": [14, 24], "must": [14, 15, 16, 17, 19, 22, 26], "package_nam": 14, "fbgemm_gpu_": 14, "convent": 14, "major": 14, "minor": 14, "py312": 14, "python_tag": 14, "determin": [14, 17], "processor": 14, "arch": 14, "python_plat_nam": 14, "manylinux2014_": 14, "maco": 14, "macosx_10_9_": 14, "arm64": 14, "macosx_11_0_": 14, "win_": 14, "cpu_onli": 14, "bdist_wheel": 14, "package_vari": 14, "plat": 14, "cxxprefix": 14, "presum": 14, "made": [14, 24], "debug": [14, 16], "assert": 14, "presenc": 14, "unabl": 14, "cudacxx": 14, "cuda_bin_path": 14, "cub": 14, "cub_dir": 14, "header": [14, 23, 26], "cudnn_include_dir": 14, "cudnn_librari": 14, "filepath": 14, "nvml_lib_path": 14, "nccl": [14, 16], "nccl_lib_path": 14, "sm70": [14, 15], "80": 14, "v100": [14, 15], "a100": [14, 15], "cuda_arch_list": 14, "unset": 14, "torch_cuda_arch_list": 14, "preced": 14, "dtorch_cuda_arch_list": 14, "By": [14, 21], "those": [14, 17, 18, 21, 25], "rocm_path": 14, "pytorch_rocm_arch": 14, "gfx906": 14, "gfx908": 14, "gfx90a": 14, "wiki": 14, "gentoo": 14, "rocminfo": 14, "gfx": 14, "dhip_root_dir": 14, "dtorch_use_hip_dsa": 14, "complet": [14, 21, 24], "lot": 14, "jinja": 14, "instanti": 14, "sure": [14, 21, 23, 25], "accident": 14, "cours": 14, "fbgemm_gpu_lib_path": 14, "fbgemm_gpu_pi": [14, 15], "defin": [14, 17, 23], "nm": 14, "gdcu": 14, "referenc": 14, "certain": 14, "gdc": 14, "merge_pooled_embed": [14, 15], "isol": [15, 24], "build": [15, 16, 23, 25, 27], "sm80": 15, "respect": 15, "guarante": 15, "especi": 15, "displai": [15, 26], "setup": 15, "smi": 15, "515": 15, "76": 15, "persist": 15, "bu": [15, 26], "disp": 15, "volatil": 15, "uncorr": 15, "ecc": 15, "fan": 15, "temp": 15, "perf": 15, "pwr": 15, "usag": [15, 24, 25], "cap": 15, "util": [15, 27], "mig": 15, "a10g": 15, "00000000": 15, "00": 15, "1e": 15, "31c": 15, "p0": 15, "59w": 15, "300w": 15, "0mib": 15, "23028mib": 15, "gi": 15, "pid": 15, "No": 15, "expos": 15, "imag": 15, "launch": 15, "toolkit": 15, "interfac": 15, "concis": 15, "info": [15, 23, 25], "dieedg": 15, "avgpwr": 15, "sclk": 15, "mclk": 15, "pwrcap": 15, "vram": 15, "33": 15, "0c": 15, "37": 15, "0w": 15, "300mhz": 15, "1200mhz": 15, "auto": [15, 24], "290": 15, "32": 15, "39": 15, "log": 15, "difficult": 15, "relev": [15, 23], "genai": 15, "triton_vers": 15, "45fff310c8": 15, "about": [15, 25], "link": [15, 24], "encount": 15, "signatur": [15, 24], "traceback": 15, "last": 15, "root": [15, 21], "miniconda": 15, "mycondaenv": 15, "site": 15, "_op": [15, 24], "line": [15, 25, 26], "565": 15, "__getattr__": 15, "overload_nam": 15, "_c": 15, "_jit_get_oper": 15, "qualified_op_nam": 15, "runtimeerror": 15, "except": [15, 23, 25], "wa": 15, "string": [15, 26], "post47": 15, "py3": 15, "egg": 15, "__init__": [15, 25], "21": 15, "_fbgemm_gpu_doc": 15, "noqa": 15, "f401": 15, "e402": 15, "18": 15, "569": 15, "rais": [15, 25], "attributeerror": [15, 25], "_opnamespac": 15, "object": [15, 17], "attribut": [15, 25], "cli": 15, "main_run": 15, "47": 15, "_zn6fbgemm48floatorhalftofusednbitrowwisequantizedsbhalfavx2itli2eeevpkt_miph": 15, "libtorch": 15, "visibl": 15, "incorrectli": [15, 24], "declar": [15, 23], "were": [15, 18], "pr": [15, 23, 24, 25], "1618": 15, "former": 15, "resolv": 15, "latter": 15, "seriou": 15, "tha": 15, "develop": [15, 24], "bench": 16, "good": [16, 22], "instal": [16, 24, 27], "pip": [16, 24], "pytest": 16, "rsx": 16, "pytestcollectionwarn": 16, "split_table_batched_embeddings_test": 16, "quantize_ops_test": 16, "sparse_ops_test": 16, "split_embedding_inference_converter_test": 16, "cuda_visible_devic": 16, "cuda_launch_block": 16, "involv": [16, 17], "rpath": 16, "fbgemm_test_with_rocm": 16, "hip_launch_block": 16, "split_table_batched_embeddings_benchmark": 16, "consecut": 17, "nestedtensor": 17, "raggedtensor": 17, "tensorflow": 17, "notabl": 17, "sentenc": 17, "maxlength": 17, "numel": 17, "greatest": 17, "divisor": 17, "smallest": 17, "sub": 17, "exclud": 17, "partit": 17, "impli": [17, 22], "denot": [17, 23, 25], "offest": 17, "outer": 17, "would": 17, "begin": 17, "maximum": [17, 18, 25], "densor": 17, "form": [17, 22], "figur": 17, "below": 17, "show": [17, 24], "accomod": 17, "At": [17, 23, 24, 25], "multipl": [17, 18, 19, 25, 27], "hadamard": 17, "product": [17, 22], "bmatrix": 17, "rightarrow": 17, "25": 17, "36": 17, "49": 17, "81": 17, "50": 17, "operand": 17, "word": 17, "ax": 17, "properti": 17, "elementwis": [17, 18], "start": [17, 18, 25, 26], "dim": 17, "onto": 17, "part": 17, "everi": 17, "converson": 17, "could": 17, "lead": 17, "smaller": 17, "expect": 17, "happen": 17, "give": 17, "situat": 17, "like": 17, "dense_tensor": 17, "jagged_tensor": 17, "break": 17, "exact": 17, "usual": 17, "area": 18, "outsid": 18, "coverag": 18, "total": [18, 19], "identit": 18, "add": [18, 21, 23, 24, 25], "structur": 18, "jagged_dense_dense_elementwise_add_jagged_output": 18, "y_0": 18, "y_1": 18, "multipli": [18, 19], "max_n": 18, "matmul": 18, "stacked_jagged_1d_to_dens": 18, "arg": [18, 25], "kwarg": 18, "stacked_jagged_2d_to_dens": 18, "split_table_batched_embeddings_op": 19, "splittablebatchedembeddingbagscodegen": 19, "embedding_spec": 19, "feature_table_map": 19, "cache_algorithm": 19, "cachealgorithm": 19, "cache_load_factor": 19, "cache_reserved_memori": 19, "cache_precis": 19, "weights_precis": 19, "enforce_hbm": 19, "optimtyp": 19, "exact_sgd": 19, "record_cache_metr": 19, "gradient_clip": 19, "max_gradi": 19, "learning_r": 19, "01": 19, "ep": 19, "0e": 19, "momentum": 19, "weight_decai": 19, "weight_decay_mod": 19, "weightdecaymod": 19, "eta": 19, "001": 19, "beta1": 19, "beta2": 19, "999": 19, "poolingmod": 19, "boundscheckmod": 19, "sourc": [19, 21, 22, 23, 24, 25], "embeddingloc": 19, "computedevic": 19, "spec": 19, "placement": 19, "capac": 19, "reserv": [19, 22], "adam": 19, "exact_adagrad": 19, "exact_rowwise_adagrad": 19, "lamb": 19, "lars_sgd": 19, "partial_rowwise_adam": 19, "partial_rowwise_lamb": 19, "sgd": 19, "recordcachemetr": 19, "record": 19, "hit": 19, "request": [19, 20, 24], "record_cache_miss_count": 19, "metric": 19, "record_tablewise_cache_miss": 19, "stochast": 19, "round": 19, "gradient": 19, "clip": 19, "learn": 19, "epsilon": 19, "adagrad": 19, "lar": 19, "decai": 19, "decoupl": 19, "pool": [19, 27], "boundari": 19, "fatal": 19, "conatin": 19, "column": 19, "feature_requires_grad": 19, "split_table_batched_embeddings_ops_common": 19, "split_table_batched_embeddings_ops_train": 19, "init_embedding_weights_uniform": 19, "split_embedding_weight": 19, "9426": 19, "7046": 19, "4214": 19, "0419": 19, "1331": 19, "7856": 19, "8124": 19, "2021": 19, "5771": 19, "5911": 19, "7792": 19, "1068": 19, "6203": 19, "4813": 19, "1677": 19, "4790": 19, "5587": 19, "0941": 19, "5754": 19, "3475": 19, "8952": 19, "1964": 19, "0810": 19, "4174": 19, "2513": 19, "4039": 19, "3775": 19, "3273": 19, "5399": 19, "0229": 19, "1455": 19, "8770": 19, "9520": 19, "4593": 19, "7169": 19, "6307": 19, "1765": 19, "8757": 19, "8614": 19, "2051": 19, "0603": 19, "9980": 19, "7958": 19, "5826": 19, "long": 19, "13": 19, "5197": 19, "2957": 19, "3578": 19, "1487": 19, "4873": 19, "3044": 19, "9801": 19, "2769": 19, "7164": 19, "8528": 19, "7159": 19, "6719": 19, "0784": 19, "2016": 19, "2176": 19, "1988": 19, "3825": 19, "5008": 19, "8991": 19, "1405": 19, "2637": 19, "9427": 19, "8902": 19, "3754": 19, "5013": 19, "6105": 19, "9968": 19, "3057": 19, "7621": 19, "9821": 19, "7314": 19, "6195": 19, "grad_fn": 19, "cppnode": 19, "splitlookupfunction_sgd_op": 19, "question": 20, "concern": 20, "discuss": 20, "kick": 20, "regard": 20, "feel": 20, "free": 20, "reach": 20, "easi": 21, "transpar": 21, "describ": 21, "activ": 21, "welcom": [21, 27], "your": [21, 24, 25], "branch": 21, "ve": 21, "chang": [21, 23, 25], "api": [21, 23, 24, 25], "suit": 21, "lint": 21, "haven": 21, "submit": [21, 23, 25], "facebook": [21, 22, 27], "open": 21, "track": 21, "bug": 21, "descript": [21, 23, 24, 25, 26], "abl": 21, "bounti": 21, "safe": 21, "disclosur": 21, "secur": 21, "go": 21, "outlin": 21, "agre": 21, "tree": 21, "claus": 22, "bsd": 22, "softwar": 22, "copyright": 22, "inc": 22, "affili": 22, "right": [22, 26], "redistribut": 22, "modif": 22, "permit": 22, "condit": 22, "met": 22, "retain": 22, "notic": 22, "disclaim": 22, "materi": 22, "contributor": 22, "endors": 22, "promot": 22, "written": 22, "permiss": 22, "BY": 22, "THE": 22, "holder": 22, "AND": 22, "AS": 22, "express": [22, 26], "OR": 22, "warranti": 22, "NOT": 22, "limit": [22, 24], "TO": 22, "OF": 22, "merchant": 22, "FOR": 22, "particular": 22, "IN": 22, "NO": 22, "event": 22, "shall": 22, "BE": 22, "liabl": 22, "indirect": 22, "incident": 22, "special": 22, "exemplari": 22, "consequenti": 22, "damag": 22, "procur": 22, "substitut": 22, "profit": 22, "busi": 22, "interrupt": 22, "theori": 22, "liabil": 22, "contract": 22, "strict": 22, "tort": 22, "neglig": 22, "aris": 22, "IF": 22, "advis": 22, "SUCH": 22, "javadoc": 23, "style": [23, 25], "comment": [23, 24, 26], "sphinx": [23, 24, 25], "breath": 23, "kept": 23, "cpp": [23, 25, 26], "cu": 23, "cuh": 23, "everyth": 23, "ifndef": 23, "doxygen_this_will_be_skip": 23, "endif": 23, "hidden": 23, "html": [23, 24, 25], "descriptionss": 23, "publish": [23, 25], "docstr": [23, 24, 25], "method": [23, 24, 25], "organ": 23, "yet": 23, "top": [23, 27], "defgroup": 23, "directli": [23, 25], "behavior": [23, 25], "tparam": 23, "thrown": [23, 25], "ingroup": 23, "brief": 23, "short": 23, "example_method": [23, 25], "def": [23, 25], "foo": [23, 25], "lst": [23, 25], "And": [23, 25], "verbatim": [23, 25], "text": [23, 25, 26], "diagram": [23, 25], "unpars": 23, "second": [23, 25], "prev": [23, 25], "usabl": [23, 25], "space": [23, 24, 25], "endcod": 23, "align": [23, 25], "param1": [23, 25], "param2": 23, "bad_alloc": 23, "logic_error": 23, "href": 23, "www": [23, 25], "nl": 23, "cmdlink": 23, "On": [23, 25], "doxygengroup": 23, "rst": [23, 25, 26], "content": [23, 26, 27], "toctre": [23, 25], "ini": 23, "taken": 23, "care": 23, "doc": [23, 24, 25, 26], "local": [23, 25], "netlifi": [23, 24, 25], "preview": [23, 25], "serv": 24, "yourself": 24, "shoe": 24, "who": 24, "understand": 24, "live": 24, "easier": 24, "leav": 24, "task": 24, "tool": 24, "graphviz": [24, 26], "assembl": 24, "view": 24, "prepend": 24, "sphinx_lint": 24, "technic": 24, "why": 24, "occasion": 24, "unresolv": 24, "might": 24, "opt": 24, "pycapsul": 24, "neg": 24, "silenc": 24, "nitpick": 24, "conf": 24, "domain": 24, "deploi": 24, "app": 24, "googl": 25, "c_size_t": 25, "ret": 25, "emplace_back": 25, "valueerror": 25, "14": 25, "restructuredtext": 25, "en": 25, "master": 25, "__": 25, "pep": 25, "0287": 25, "42": 25, "autofunct": 25, "c_ulong": 25, "mani": 25, "attach": 25, "fact": 25, "helper": 25, "codebas": 25, "add_doc": 25, "jag": [25, 27], "forc": 25, "hoc": 25, "the_new_doc_modul": 25, "remain": 25, "render": [25, 26], "anchor": 26, "_doc": 26, "underscor": 26, "_": 26, "There": 26, "elsewher": 26, "ref": 26, "literalinclud": 26, "rel": 26, "enclos": 26, "bracket": 26, "skiplin": 26, "suppli": 26, "math": 26, "k_": 26, "k_n": 26, "expressino": 26, "int_a": 26, "frac": 26, "2v": 26, "dx": 26, "left": 26, "dv": 26, "_a": 26, "du": 26, "digraph": 26, "altern": 26, "extern": 26, "dot": 26, "examplegraph": 26, "low": 27, "precis": 27, "high": 27, "convolut": 27, "server": 27, "infer": 27, "collect": 27, "transform": 27, "contribut": 27, "contact": 27, "licens": 27, "autovector": 27, "ssd": 27}, "objects": {"": [[13, 0, 1, "_CPPv4N16RocksdbWriteMode29BWD_L1_CNFLCT_MISS_WRITE_BACKE", "BWD_L1_CNFLCT_MISS_WRITE_BACK"], [1, 1, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec"], [1, 2, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::IndexType"], [1, 2, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::OffsetType"], [1, 2, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::OutType"], [1, 3, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::block_size"], [1, 3, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::data_size"], [1, 3, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::exponent_bias"], [1, 3, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::exponent_bits"], [1, 3, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::index_size"], [1, 3, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::indices"], [1, 3, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::input"], [1, 3, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::input_stride"], [1, 3, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::is_bf16_out"], [1, 3, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::is_weight_positional"], [1, 3, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::normalize_by_lengths"], [1, 3, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::offsets_or_lengths"], [1, 3, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::out"], [1, 3, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::output_size"], [1, 3, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::output_stride"], [1, 3, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::use_offsets"], [1, 3, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::weights"], [1, 1, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec"], [1, 2, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::InType"], [1, 2, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::IndexType"], [1, 2, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::OffsetType"], [1, 2, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::OutType"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::block_size"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::data_size"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::index_size"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::indices"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::input"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::input_stride"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::is_bf16_in"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::is_bf16_out"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::is_weight_positional"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::no_bag"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::normalize_by_lengths"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::offsets_or_lengths"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::out"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::output_size"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::output_stride"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::scale_bias_last"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::use_offsets"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::weights"], [13, 0, 1, "_CPPv4N16RocksdbWriteMode5FLUSHE", "FLUSH"], [10, 1, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref"], [10, 3, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref::ebits"], [10, 3, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref::exponent_bias"], [10, 3, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref::input"], [10, 3, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref::ncols"], [10, 3, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref::nrows"], [10, 3, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref::output"], [10, 1, 1, "_CPPv423FP8rowwise_to_float_cpuRK6TensorbK7int64_t", "FP8rowwise_to_float_cpu"], [10, 3, 1, "_CPPv423FP8rowwise_to_float_cpuRK6TensorbK7int64_t", "FP8rowwise_to_float_cpu::forward"], [10, 3, 1, "_CPPv423FP8rowwise_to_float_cpuRK6TensorbK7int64_t", "FP8rowwise_to_float_cpu::input"], [10, 3, 1, "_CPPv423FP8rowwise_to_float_cpuRK6TensorbK7int64_t", "FP8rowwise_to_float_cpu::output_dtype"], [13, 0, 1, "_CPPv4N16RocksdbWriteMode15FWD_L1_EVICTIONE", "FWD_L1_EVICTION"], [13, 0, 1, "_CPPv4N16RocksdbWriteMode16FWD_ROCKSDB_READE", "FWD_ROCKSDB_READ"], [0, 1, 1, "_CPPv410FindMinMaxPKfPfPf7int64_t", "FindMinMax"], [0, 3, 1, "_CPPv410FindMinMaxPKfPfPf7int64_t", "FindMinMax::len"], [0, 3, 1, "_CPPv410FindMinMaxPKfPfPf7int64_t", "FindMinMax::m"], [0, 3, 1, "_CPPv410FindMinMaxPKfPfPf7int64_t", "FindMinMax::max"], [0, 3, 1, "_CPPv410FindMinMaxPKfPfPf7int64_t", "FindMinMax::min"], [0, 1, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf"], [0, 2, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf::InputType"], [0, 3, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf::bit_rate"], [0, 3, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf::input"], [0, 3, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf::input_columns"], [0, 3, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf::input_rows"], [0, 3, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf::output"], [10, 1, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref"], [10, 3, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::ebits"], [10, 3, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::exponent_bias"], [10, 3, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::input"], [10, 3, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::max_pos"], [10, 3, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::ncols"], [10, 3, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::nrows"], [10, 3, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::output"], [0, 1, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize"], [0, 2, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::T"], [0, 3, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::dst"], [0, 3, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::len"], [0, 3, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::noise_ratio"], [0, 3, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::num_threads"], [0, 3, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::qparams"], [0, 3, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::src"], [0, 3, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::thread_id"], [0, 1, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise"], [0, 3, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::C"], [0, 3, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::G"], [0, 3, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::K"], [0, 2, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::LAYOUT"], [0, 2, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::T"], [0, 3, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::X"], [0, 3, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::dst"], [0, 3, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::scales"], [0, 3, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::src"], [0, 3, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::zero_points"], [13, 4, 1, "_CPPv416RocksdbWriteMode", "RocksdbWriteMode"], [13, 0, 1, "_CPPv4N16RocksdbWriteMode29BWD_L1_CNFLCT_MISS_WRITE_BACKE", "RocksdbWriteMode::BWD_L1_CNFLCT_MISS_WRITE_BACK"], [13, 0, 1, "_CPPv4N16RocksdbWriteMode5FLUSHE", "RocksdbWriteMode::FLUSH"], [13, 0, 1, "_CPPv4N16RocksdbWriteMode15FWD_L1_EVICTIONE", "RocksdbWriteMode::FWD_L1_EVICTION"], [13, 0, 1, "_CPPv4N16RocksdbWriteMode16FWD_ROCKSDB_READE", "RocksdbWriteMode::FWD_ROCKSDB_READ"], [0, 1, 1, "_CPPv46Xor128v", "Xor128"], [10, 1, 1, "_CPPv424_FP8rowwise_to_float_gpuRKN2at6TensorEbK7int64_t", "_FP8rowwise_to_float_gpu"], [10, 3, 1, "_CPPv424_FP8rowwise_to_float_gpuRKN2at6TensorEbK7int64_t", "_FP8rowwise_to_float_gpu::forward"], [10, 3, 1, "_CPPv424_FP8rowwise_to_float_gpuRKN2at6TensorEbK7int64_t", "_FP8rowwise_to_float_gpu::input"], [10, 3, 1, "_CPPv424_FP8rowwise_to_float_gpuRKN2at6TensorEbK7int64_t", "_FP8rowwise_to_float_gpu::output_dtype"], [10, 1, 1, "_CPPv422_bfloat16_to_float_gpuRKN2at6TensorE", "_bfloat16_to_float_gpu"], [10, 3, 1, "_CPPv422_bfloat16_to_float_gpuRKN2at6TensorE", "_bfloat16_to_float_gpu::input"], [10, 1, 1, "_CPPv424_float_to_FP8rowwise_gpuRK6TensorKb", "_float_to_FP8rowwise_gpu"], [10, 3, 1, "_CPPv424_float_to_FP8rowwise_gpuRK6TensorKb", "_float_to_FP8rowwise_gpu::forward"], [10, 3, 1, "_CPPv424_float_to_FP8rowwise_gpuRK6TensorKb", "_float_to_FP8rowwise_gpu::input"], [10, 1, 1, "_CPPv422_float_to_bfloat16_gpuRKN2at6TensorE", "_float_to_bfloat16_gpu"], [10, 3, 1, "_CPPv422_float_to_bfloat16_gpuRKN2at6TensorE", "_float_to_bfloat16_gpu::input"], [10, 1, 1, "_CPPv434_float_to_fused8bitrowwise_cpu_outR6TensorRK6Tensor", "_float_to_fused8bitrowwise_cpu_out"], [10, 3, 1, "_CPPv434_float_to_fused8bitrowwise_cpu_outR6TensorRK6Tensor", "_float_to_fused8bitrowwise_cpu_out::input"], [10, 3, 1, "_CPPv434_float_to_fused8bitrowwise_cpu_outR6TensorRK6Tensor", "_float_to_fused8bitrowwise_cpu_out::output"], [10, 1, 1, "_CPPv430_float_to_fused8bitrowwise_gpuRK6Tensor", "_float_to_fused8bitrowwise_gpu"], [10, 3, 1, "_CPPv430_float_to_fused8bitrowwise_gpuRK6Tensor", "_float_to_fused8bitrowwise_gpu::input"], [10, 1, 1, "_CPPv430_float_to_fusednbitrowwise_gpuRK6TensorK7int64_t", "_float_to_fusednbitrowwise_gpu"], [10, 3, 1, "_CPPv430_float_to_fusednbitrowwise_gpuRK6TensorK7int64_t", "_float_to_fusednbitrowwise_gpu::bit_rate"], [10, 3, 1, "_CPPv430_float_to_fusednbitrowwise_gpuRK6TensorK7int64_t", "_float_to_fusednbitrowwise_gpu::input"], [10, 1, 1, "_CPPv418_float_to_hfp8_gpuRKN2at6TensorEK7int64_tK7int64_tKd", "_float_to_hfp8_gpu"], [10, 3, 1, "_CPPv418_float_to_hfp8_gpuRKN2at6TensorEK7int64_tK7int64_tKd", "_float_to_hfp8_gpu::ebits"], [10, 3, 1, "_CPPv418_float_to_hfp8_gpuRKN2at6TensorEK7int64_tK7int64_tKd", "_float_to_hfp8_gpu::exponent_bias"], [10, 3, 1, "_CPPv418_float_to_hfp8_gpuRKN2at6TensorEK7int64_tK7int64_tKd", "_float_to_hfp8_gpu::input"], [10, 3, 1, "_CPPv418_float_to_hfp8_gpuRKN2at6TensorEK7int64_tK7int64_tKd", "_float_to_hfp8_gpu::max_pos"], [10, 1, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu"], [10, 3, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::bias"], [10, 3, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::bounding_box_size"], [10, 3, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::ebits"], [10, 3, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::input"], [10, 3, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::max_pos"], [10, 3, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::mbits"], [10, 3, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::min_pos"], [10, 1, 1, "_CPPv430_float_to_paddedFP8rowwise_gpuRK6TensorKbK7int64_t", "_float_to_paddedFP8rowwise_gpu"], [10, 3, 1, "_CPPv430_float_to_paddedFP8rowwise_gpuRK6TensorKbK7int64_t", "_float_to_paddedFP8rowwise_gpu::forward"], [10, 3, 1, "_CPPv430_float_to_paddedFP8rowwise_gpuRK6TensorKbK7int64_t", "_float_to_paddedFP8rowwise_gpu::input"], [10, 3, 1, "_CPPv430_float_to_paddedFP8rowwise_gpuRK6TensorKbK7int64_t", "_float_to_paddedFP8rowwise_gpu::row_dim"], [10, 1, 1, "_CPPv434_fused8bitrowwise_to_float_cpu_outR6TensorRK6Tensor", "_fused8bitrowwise_to_float_cpu_out"], [10, 3, 1, "_CPPv434_fused8bitrowwise_to_float_cpu_outR6TensorRK6Tensor", "_fused8bitrowwise_to_float_cpu_out::input"], [10, 3, 1, "_CPPv434_fused8bitrowwise_to_float_cpu_outR6TensorRK6Tensor", "_fused8bitrowwise_to_float_cpu_out::output"], [10, 1, 1, "_CPPv430_fused8bitrowwise_to_float_gpuRKN2at6TensorE", "_fused8bitrowwise_to_float_gpu"], [10, 3, 1, "_CPPv430_fused8bitrowwise_to_float_gpuRKN2at6TensorE", "_fused8bitrowwise_to_float_gpu::input"], [10, 1, 1, "_CPPv440_fused8bitrowwise_to_float_mixed_dim_gpuRKN2at6TensorERKN2at6TensorEK7int64_t", "_fused8bitrowwise_to_float_mixed_dim_gpu"], [10, 3, 1, "_CPPv440_fused8bitrowwise_to_float_mixed_dim_gpuRKN2at6TensorERKN2at6TensorEK7int64_t", "_fused8bitrowwise_to_float_mixed_dim_gpu::D_offsets"], [10, 3, 1, "_CPPv440_fused8bitrowwise_to_float_mixed_dim_gpuRKN2at6TensorERKN2at6TensorEK7int64_t", "_fused8bitrowwise_to_float_mixed_dim_gpu::input"], [10, 3, 1, "_CPPv440_fused8bitrowwise_to_float_mixed_dim_gpuRKN2at6TensorERKN2at6TensorEK7int64_t", "_fused8bitrowwise_to_float_mixed_dim_gpu::output_dtype"], [10, 1, 1, "_CPPv429_fused8bitrowwise_to_half_gpuRKN2at6TensorE", "_fused8bitrowwise_to_half_gpu"], [10, 3, 1, "_CPPv429_fused8bitrowwise_to_half_gpuRKN2at6TensorE", "_fused8bitrowwise_to_half_gpu::input"], [10, 1, 1, "_CPPv449_fused8bitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tKbKb", "_fused8bitrowwise_to_single_or_half_precision_gpu"], [10, 3, 1, "_CPPv449_fused8bitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tKbKb", "_fused8bitrowwise_to_single_or_half_precision_gpu::input"], [10, 3, 1, "_CPPv449_fused8bitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tKbKb", "_fused8bitrowwise_to_single_or_half_precision_gpu::output_dtype"], [10, 3, 1, "_CPPv449_fused8bitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tKbKb", "_fused8bitrowwise_to_single_or_half_precision_gpu::quant_padding_float_type"], [10, 3, 1, "_CPPv449_fused8bitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tKbKb", "_fused8bitrowwise_to_single_or_half_precision_gpu::scale_bias_last"], [10, 1, 1, "_CPPv430_fusednbitrowwise_to_float_gpuRKN2at6TensorEK7int64_t", "_fusednbitrowwise_to_float_gpu"], [10, 3, 1, "_CPPv430_fusednbitrowwise_to_float_gpuRKN2at6TensorEK7int64_t", "_fusednbitrowwise_to_float_gpu::bit_rate"], [10, 3, 1, "_CPPv430_fusednbitrowwise_to_float_gpuRKN2at6TensorEK7int64_t", "_fusednbitrowwise_to_float_gpu::input"], [10, 1, 1, "_CPPv429_fusednbitrowwise_to_half_gpuRKN2at6TensorEK7int64_t", "_fusednbitrowwise_to_half_gpu"], [10, 3, 1, "_CPPv429_fusednbitrowwise_to_half_gpuRKN2at6TensorEK7int64_t", "_fusednbitrowwise_to_half_gpu::bit_rate"], [10, 3, 1, "_CPPv429_fusednbitrowwise_to_half_gpuRKN2at6TensorEK7int64_t", "_fusednbitrowwise_to_half_gpu::input"], [10, 1, 1, "_CPPv449_fusednbitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tK7int64_t", "_fusednbitrowwise_to_single_or_half_precision_gpu"], [10, 3, 1, "_CPPv449_fusednbitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tK7int64_t", "_fusednbitrowwise_to_single_or_half_precision_gpu::bit_rate"], [10, 3, 1, "_CPPv449_fusednbitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tK7int64_t", "_fusednbitrowwise_to_single_or_half_precision_gpu::input"], [10, 3, 1, "_CPPv449_fusednbitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tK7int64_t", "_fusednbitrowwise_to_single_or_half_precision_gpu::output_dtype"], [10, 1, 1, "_CPPv429_half_to_fused8bitrowwise_gpuRK6Tensor", "_half_to_fused8bitrowwise_gpu"], [10, 3, 1, "_CPPv429_half_to_fused8bitrowwise_gpuRK6Tensor", "_half_to_fused8bitrowwise_gpu::input"], [10, 1, 1, "_CPPv429_half_to_fusednbitrowwise_gpuRKN2at6TensorEK7int64_t", "_half_to_fusednbitrowwise_gpu"], [10, 3, 1, "_CPPv429_half_to_fusednbitrowwise_gpuRKN2at6TensorEK7int64_t", "_half_to_fusednbitrowwise_gpu::bit_rate"], [10, 3, 1, "_CPPv429_half_to_fusednbitrowwise_gpuRKN2at6TensorEK7int64_t", "_half_to_fusednbitrowwise_gpu::input"], [10, 1, 1, "_CPPv418_hfp8_to_float_gpuRKN2at6TensorEK7int64_tK7int64_t", "_hfp8_to_float_gpu"], [10, 3, 1, "_CPPv418_hfp8_to_float_gpuRKN2at6TensorEK7int64_tK7int64_t", "_hfp8_to_float_gpu::ebits"], [10, 3, 1, "_CPPv418_hfp8_to_float_gpuRKN2at6TensorEK7int64_tK7int64_t", "_hfp8_to_float_gpu::exponent_bias"], [10, 3, 1, "_CPPv418_hfp8_to_float_gpuRKN2at6TensorEK7int64_tK7int64_t", "_hfp8_to_float_gpu::input"], [10, 1, 1, "_CPPv418_msfp_to_float_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_t", "_msfp_to_float_gpu"], [10, 3, 1, "_CPPv418_msfp_to_float_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_t", "_msfp_to_float_gpu::bias"], [10, 3, 1, "_CPPv418_msfp_to_float_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_t", "_msfp_to_float_gpu::ebits"], [10, 3, 1, "_CPPv418_msfp_to_float_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_t", "_msfp_to_float_gpu::input"], [10, 3, 1, "_CPPv418_msfp_to_float_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_t", "_msfp_to_float_gpu::mbits"], [10, 1, 1, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t", "_paddedFP8rowwise_to_float_gpu"], [10, 3, 1, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t", "_paddedFP8rowwise_to_float_gpu::forward"], [10, 3, 1, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t", "_paddedFP8rowwise_to_float_gpu::input"], [10, 3, 1, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t", "_paddedFP8rowwise_to_float_gpu::output_dtype"], [10, 3, 1, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t", "_paddedFP8rowwise_to_float_gpu::output_last_dim"], [10, 3, 1, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t", "_paddedFP8rowwise_to_float_gpu::row_dim"], [10, 1, 1, "_CPPv449_single_or_half_precision_to_fused8bitrowwise_gpuRK6Tensor", "_single_or_half_precision_to_fused8bitrowwise_gpu"], [10, 3, 1, "_CPPv449_single_or_half_precision_to_fused8bitrowwise_gpuRK6Tensor", "_single_or_half_precision_to_fused8bitrowwise_gpu::input"], [10, 1, 1, "_CPPv449_single_or_half_precision_to_fusednbitrowwise_gpuRK6TensorK7int64_t", "_single_or_half_precision_to_fusednbitrowwise_gpu"], [10, 3, 1, "_CPPv449_single_or_half_precision_to_fusednbitrowwise_gpuRK6TensorK7int64_t", "_single_or_half_precision_to_fusednbitrowwise_gpu::bit_rate"], [10, 3, 1, "_CPPv449_single_or_half_precision_to_fusednbitrowwise_gpuRK6TensorK7int64_t", "_single_or_half_precision_to_fusednbitrowwise_gpu::input"], [9, 1, 1, "_CPPv417all_to_one_deviceNSt6vectorIN2at6TensorEEEN2at6DeviceE", "all_to_one_device"], [9, 3, 1, "_CPPv417all_to_one_deviceNSt6vectorIN2at6TensorEEEN2at6DeviceE", "all_to_one_device::inputTensors"], [9, 3, 1, "_CPPv417all_to_one_deviceNSt6vectorIN2at6TensorEEEN2at6DeviceE", "all_to_one_device::target_device"], [6, 1, 1, "_CPPv431batched_dense_vec_jagged_2d_mulRK6TensorRK6TensorRK6Tensor", "batched_dense_vec_jagged_2d_mul"], [6, 3, 1, "_CPPv431batched_dense_vec_jagged_2d_mulRK6TensorRK6TensorRK6Tensor", "batched_dense_vec_jagged_2d_mul::a_offsets"], [6, 3, 1, "_CPPv431batched_dense_vec_jagged_2d_mulRK6TensorRK6TensorRK6Tensor", "batched_dense_vec_jagged_2d_mul::a_values"], [6, 3, 1, "_CPPv431batched_dense_vec_jagged_2d_mulRK6TensorRK6TensorRK6Tensor", "batched_dense_vec_jagged_2d_mul::v"], [3, 1, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKNSt8optionalI6TensorEERKNSt8optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda"], [3, 3, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKNSt8optionalI6TensorEERKNSt8optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::B_ofsets"], [3, 3, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKNSt8optionalI6TensorEERKNSt8optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::bounds_check_mode"], [3, 3, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKNSt8optionalI6TensorEERKNSt8optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::indices"], [3, 3, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKNSt8optionalI6TensorEERKNSt8optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::max_B"], [3, 3, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKNSt8optionalI6TensorEERKNSt8optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::offsets"], [3, 3, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKNSt8optionalI6TensorEERKNSt8optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::rows_per_table"], [3, 3, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKNSt8optionalI6TensorEERKNSt8optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::warning"], [3, 3, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKNSt8optionalI6TensorEERKNSt8optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::weights"], [13, 1, 1, "_CPPv420compact_indices_cudaNSt6vectorI6TensorEE6TensorNSt6vectorI6TensorEE6Tensor6Tensor", "compact_indices_cuda"], [13, 3, 1, "_CPPv420compact_indices_cudaNSt6vectorI6TensorEE6TensorNSt6vectorI6TensorEE6Tensor6Tensor", "compact_indices_cuda::compact_count"], [13, 3, 1, "_CPPv420compact_indices_cudaNSt6vectorI6TensorEE6TensorNSt6vectorI6TensorEE6Tensor6Tensor", "compact_indices_cuda::compact_indices"], [13, 3, 1, "_CPPv420compact_indices_cudaNSt6vectorI6TensorEE6TensorNSt6vectorI6TensorEE6Tensor6Tensor", "compact_indices_cuda::count"], [13, 3, 1, "_CPPv420compact_indices_cudaNSt6vectorI6TensorEE6TensorNSt6vectorI6TensorEE6Tensor6Tensor", "compact_indices_cuda::indices"], [13, 3, 1, "_CPPv420compact_indices_cudaNSt6vectorI6TensorEE6TensorNSt6vectorI6TensorEE6Tensor6Tensor", "compact_indices_cuda::masks"], [13, 1, 1, "_CPPv418cuda_callback_func12cudaStream_t11cudaError_tPv", "cuda_callback_func"], [13, 3, 1, "_CPPv418cuda_callback_func12cudaStream_t11cudaError_tPv", "cuda_callback_func::functor"], [13, 3, 1, "_CPPv418cuda_callback_func12cudaStream_t11cudaError_tPv", "cuda_callback_func::status"], [13, 3, 1, "_CPPv418cuda_callback_func12cudaStream_t11cudaError_tPv", "cuda_callback_func::stream"], [6, 1, 1, "_CPPv415dense_to_jaggedRK6TensorRKNSt6vectorI6TensorEENSt8optionalIN2at6SymIntEEE", "dense_to_jagged"], [6, 3, 1, "_CPPv415dense_to_jaggedRK6TensorRKNSt6vectorI6TensorEENSt8optionalIN2at6SymIntEEE", "dense_to_jagged::dense"], [6, 3, 1, "_CPPv415dense_to_jaggedRK6TensorRKNSt6vectorI6TensorEENSt8optionalIN2at6SymIntEEE", "dense_to_jagged::offsets"], [6, 3, 1, "_CPPv415dense_to_jaggedRK6TensorRKNSt6vectorI6TensorEENSt8optionalIN2at6SymIntEEE", "dense_to_jagged::total_L"], [12, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda"], [12, 3, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::D_offsets"], [12, 3, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::cache_index_table_map"], [12, 3, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::gather_cache_stats"], [12, 3, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::hash_size_cumsum"], [12, 3, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::linear_cache_indices"], [12, 3, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::lru_state"], [12, 3, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::lxu_cache_miss_timestamp"], [12, 3, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::lxu_cache_state"], [12, 3, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::lxu_cache_weights"], [12, 3, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::row_alignment"], [12, 3, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::time_stamp"], [12, 3, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::total_cache_hash_size"], [12, 3, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::uvm_cache_stats"], [12, 3, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::weights"], [12, 3, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::weights_offsets"], [12, 3, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::weights_tys"], [12, 1, 1, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lxu_cache_lookup_cuda"], [12, 3, 1, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lxu_cache_lookup_cuda::gather_cache_stats"], [12, 3, 1, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lxu_cache_lookup_cuda::invalid_index"], [12, 3, 1, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lxu_cache_lookup_cuda::linear_cache_indices"], [12, 3, 1, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lxu_cache_lookup_cuda::lxu_cache_state"], [12, 3, 1, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lxu_cache_lookup_cuda::uvm_cache_stats"], [23, 1, 1, "_CPPv4I0_NSt6size_tEE14example_method7int32_t1Tf", "example_method"], [23, 2, 1, "_CPPv4I0_NSt6size_tEE14example_method7int32_t1Tf", "example_method::Alignment"], [23, 2, 1, "_CPPv4I0_NSt6size_tEE14example_method7int32_t1Tf", "example_method::T"], [23, 3, 1, "_CPPv4I0_NSt6size_tEE14example_method7int32_t1Tf", "example_method::param1"], [23, 3, 1, "_CPPv4I0_NSt6size_tEE14example_method7int32_t1Tf", "example_method::param2"], [11, 1, 1, "_CPPv431expand_into_jagged_permute_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_t", "expand_into_jagged_permute_cuda"], [11, 3, 1, "_CPPv431expand_into_jagged_permute_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_t", "expand_into_jagged_permute_cuda::input_offsets"], [11, 3, 1, "_CPPv431expand_into_jagged_permute_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_t", "expand_into_jagged_permute_cuda::output_offsets"], [11, 3, 1, "_CPPv431expand_into_jagged_permute_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_t", "expand_into_jagged_permute_cuda::output_size"], [11, 3, 1, "_CPPv431expand_into_jagged_permute_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_t", "expand_into_jagged_permute_cuda::permute"], [10, 1, 1, "_CPPv437float_or_half_to_fused8bitrowwise_cpuRK6Tensor", "float_or_half_to_fused8bitrowwise_cpu"], [10, 3, 1, "_CPPv437float_or_half_to_fused8bitrowwise_cpuRK6Tensor", "float_or_half_to_fused8bitrowwise_cpu::input"], [10, 1, 1, "_CPPv423float_to_FP8rowwise_cpuRK6Tensorb", "float_to_FP8rowwise_cpu"], [10, 3, 1, "_CPPv423float_to_FP8rowwise_cpuRK6Tensorb", "float_to_FP8rowwise_cpu::forward"], [10, 3, 1, "_CPPv423float_to_FP8rowwise_cpuRK6Tensorb", "float_to_FP8rowwise_cpu::input"], [10, 1, 1, "_CPPv429float_to_fused8bitrowwise_cpuRK6Tensor", "float_to_fused8bitrowwise_cpu"], [10, 3, 1, "_CPPv429float_to_fused8bitrowwise_cpuRK6Tensor", "float_to_fused8bitrowwise_cpu::input"], [10, 1, 1, "_CPPv429fused8bitrowwise_to_float_cpuRK6Tensor", "fused8bitrowwise_to_float_cpu"], [10, 3, 1, "_CPPv429fused8bitrowwise_to_float_cpuRK6Tensor", "fused8bitrowwise_to_float_cpu::input"], [10, 1, 1, "_CPPv437fused8bitrowwise_to_float_or_half_cpuRK6TensorK7int64_tKbKb", "fused8bitrowwise_to_float_or_half_cpu"], [10, 3, 1, "_CPPv437fused8bitrowwise_to_float_or_half_cpuRK6TensorK7int64_tKbKb", "fused8bitrowwise_to_float_or_half_cpu::input"], [10, 3, 1, "_CPPv437fused8bitrowwise_to_float_or_half_cpuRK6TensorK7int64_tKbKb", "fused8bitrowwise_to_float_or_half_cpu::output_dtype"], [10, 3, 1, "_CPPv437fused8bitrowwise_to_float_or_half_cpuRK6TensorK7int64_tKbKb", "fused8bitrowwise_to_float_or_half_cpu::quant_padding_float_type"], [10, 3, 1, "_CPPv437fused8bitrowwise_to_float_or_half_cpuRK6TensorK7int64_tKbKb", "fused8bitrowwise_to_float_or_half_cpu::scale_bias_last"], [10, 1, 1, "_CPPv428fused8bitrowwise_to_half_cpuRK6Tensor", "fused8bitrowwise_to_half_cpu"], [10, 3, 1, "_CPPv428fused8bitrowwise_to_half_cpuRK6Tensor", "fused8bitrowwise_to_half_cpu::input"], [10, 1, 1, "_CPPv437fusednbitrowwise_sbfront_to_float_cpuRK6TensorK7int64_t", "fusednbitrowwise_sbfront_to_float_cpu"], [10, 3, 1, "_CPPv437fusednbitrowwise_sbfront_to_float_cpuRK6TensorK7int64_t", "fusednbitrowwise_sbfront_to_float_cpu::bit_rate"], [10, 3, 1, "_CPPv437fusednbitrowwise_sbfront_to_float_cpuRK6TensorK7int64_t", "fusednbitrowwise_sbfront_to_float_cpu::input"], [10, 1, 1, "_CPPv429fusednbitrowwise_to_float_cpuRK6TensorK7int64_t", "fusednbitrowwise_to_float_cpu"], [10, 3, 1, "_CPPv429fusednbitrowwise_to_float_cpuRK6TensorK7int64_t", "fusednbitrowwise_to_float_cpu::bit_rate"], [10, 3, 1, "_CPPv429fusednbitrowwise_to_float_cpuRK6TensorK7int64_t", "fusednbitrowwise_to_float_cpu::input"], [10, 1, 1, "_CPPv437fusednbitrowwise_to_float_or_half_cpuRK6TensorK7int64_tK7int64_t", "fusednbitrowwise_to_float_or_half_cpu"], [10, 3, 1, "_CPPv437fusednbitrowwise_to_float_or_half_cpuRK6TensorK7int64_tK7int64_t", "fusednbitrowwise_to_float_or_half_cpu::bit_rate"], [10, 3, 1, "_CPPv437fusednbitrowwise_to_float_or_half_cpuRK6TensorK7int64_tK7int64_t", "fusednbitrowwise_to_float_or_half_cpu::input"], [10, 3, 1, "_CPPv437fusednbitrowwise_to_float_or_half_cpuRK6TensorK7int64_tK7int64_t", "fusednbitrowwise_to_float_or_half_cpu::output_dtype"], [10, 1, 1, "_CPPv428fusednbitrowwise_to_half_cpuRK6TensorK7int64_t", "fusednbitrowwise_to_half_cpu"], [10, 3, 1, "_CPPv428fusednbitrowwise_to_half_cpuRK6TensorK7int64_t", "fusednbitrowwise_to_half_cpu::bit_rate"], [10, 3, 1, "_CPPv428fusednbitrowwise_to_half_cpuRK6TensorK7int64_t", "fusednbitrowwise_to_half_cpu::input"], [11, 1, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu"], [11, 3, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::bin_boundaries"], [11, 3, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::bin_ctr_in_use_after"], [11, 3, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::bin_ctr_weight_value"], [11, 3, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::bin_num_examples"], [11, 3, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::bin_num_positives"], [11, 3, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::logit"], [11, 3, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::num_segments"], [11, 3, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::positive_weight"], [11, 3, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::segment_lengths"], [11, 3, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::segment_value"], [12, 1, 1, "_CPPv423get_unique_indices_cudaRKN2at6TensorEK7int64_tKb", "get_unique_indices_cuda"], [12, 3, 1, "_CPPv423get_unique_indices_cudaRKN2at6TensorEK7int64_tKb", "get_unique_indices_cuda::compute_count"], [12, 3, 1, "_CPPv423get_unique_indices_cudaRKN2at6TensorEK7int64_tKb", "get_unique_indices_cuda::linear_indices"], [12, 3, 1, "_CPPv423get_unique_indices_cudaRKN2at6TensorEK7int64_tKb", "get_unique_indices_cuda::max_indices"], [12, 1, 1, "_CPPv436get_unique_indices_with_inverse_cudaRKN2at6TensorEK7int64_tKbKb", "get_unique_indices_with_inverse_cuda"], [12, 3, 1, "_CPPv436get_unique_indices_with_inverse_cudaRKN2at6TensorEK7int64_tKbKb", "get_unique_indices_with_inverse_cuda::compute_count"], [12, 3, 1, "_CPPv436get_unique_indices_with_inverse_cudaRKN2at6TensorEK7int64_tKbKb", "get_unique_indices_with_inverse_cuda::compute_inverse_indices"], [12, 3, 1, "_CPPv436get_unique_indices_with_inverse_cudaRKN2at6TensorEK7int64_tKbKb", "get_unique_indices_with_inverse_cuda::linear_indices"], [12, 3, 1, "_CPPv436get_unique_indices_with_inverse_cudaRKN2at6TensorEK7int64_tKbKb", "get_unique_indices_with_inverse_cuda::max_indices"], [4, 1, 1, "_CPPv415gqa_attn_splitkRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_tKbK7int64_t", "gqa_attn_splitk"], [4, 3, 1, "_CPPv415gqa_attn_splitkRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_tKbK7int64_t", "gqa_attn_splitk::XQ"], [4, 3, 1, "_CPPv415gqa_attn_splitkRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_tKbK7int64_t", "gqa_attn_splitk::cache_K"], [4, 3, 1, "_CPPv415gqa_attn_splitkRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_tKbK7int64_t", "gqa_attn_splitk::cache_V"], [4, 3, 1, "_CPPv415gqa_attn_splitkRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_tKbK7int64_t", "gqa_attn_splitk::cache_logical_dtype_int"], [4, 3, 1, "_CPPv415gqa_attn_splitkRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_tKbK7int64_t", "gqa_attn_splitk::kv_cache_quant_num_groups"], [4, 3, 1, "_CPPv415gqa_attn_splitkRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_tKbK7int64_t", "gqa_attn_splitk::num_split_ks"], [4, 3, 1, "_CPPv415gqa_attn_splitkRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_tKbK7int64_t", "gqa_attn_splitk::qk_scale"], [4, 3, 1, "_CPPv415gqa_attn_splitkRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_tKbK7int64_t", "gqa_attn_splitk::seq_positions"], [4, 3, 1, "_CPPv415gqa_attn_splitkRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_tKbK7int64_t", "gqa_attn_splitk::use_tensor_cores"], [10, 1, 1, "_CPPv428half_to_fused8bitrowwise_cpuRK6Tensor", "half_to_fused8bitrowwise_cpu"], [10, 3, 1, "_CPPv428half_to_fused8bitrowwise_cpuRK6Tensor", "half_to_fused8bitrowwise_cpu::input"], [13, 1, 1, "_CPPv410hash_shard7int64_t6size_t", "hash_shard"], [13, 3, 1, "_CPPv410hash_shard7int64_t6size_t", "hash_shard::id"], [13, 3, 1, "_CPPv410hash_shard7int64_t6size_t", "hash_shard::num_shards"], [11, 1, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu"], [11, 3, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::bin_ctr_in_use_after"], [11, 3, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::bin_ctr_weight_value"], [11, 3, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::bin_num_examples"], [11, 3, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::bin_num_positives"], [11, 3, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::logit"], [11, 3, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::lower_bound"], [11, 3, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::positive_weight"], [11, 3, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::upper_bound"], [12, 1, 1, "_CPPv419host_lxu_cache_slot7int64_t7int64_t", "host_lxu_cache_slot"], [12, 3, 1, "_CPPv419host_lxu_cache_slot7int64_t7int64_t", "host_lxu_cache_slot::C"], [12, 3, 1, "_CPPv419host_lxu_cache_slot7int64_t7int64_t", "host_lxu_cache_slot::h_in"], [3, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::D_offsets"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::dev_weights"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::fp8_exponent_bias"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::fp8_exponent_bits"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::indice_weights"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::indices"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::lxu_cache_locations"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::lxu_cache_weights"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::max_float16_D"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::max_float32_D"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::max_float8_D"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::max_int2_D"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::max_int4_D"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::max_int8_D"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::offsets"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::output_dtype"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::pooling_mode"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::row_alignment"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::total_D"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::uvm_weights"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::weights_offsets"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::weights_placements"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::weights_tys"], [3, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::D_offsets"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::dev_weights"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::fp8_exponent_bias"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::fp8_exponent_bits"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::indice_weights"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::indices"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::lxu_cache_locations"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::lxu_cache_weights"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::max_float16_D"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::max_float32_D"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::max_float8_D"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::max_int2_D"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::max_int4_D"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::max_int8_D"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::offsets"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::output_dtype"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::pooling_mode"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::row_alignment"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::total_D"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::uvm_weights"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::weights_offsets"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::weights_placements"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::weights_tys"], [3, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::D_offsets"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::cache_hash_size_cumsum"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::cache_index_table_map"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::dev_weights"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::fp8_exponent_bias"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::fp8_exponent_bits"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::indice_weights"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::indices"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::lxu_cache_locations"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::lxu_cache_state"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::lxu_cache_weights"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::lxu_state"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::max_float16_D"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::max_float32_D"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::max_float8_D"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::max_int2_D"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::max_int4_D"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::max_int8_D"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::offsets"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::output_dtype"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::pooling_mode"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::row_alignment"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::total_D"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::total_cache_hash_size"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::uvm_weights"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::weights_offsets"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::weights_placements"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::weights_tys"], [3, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::D_offsets"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::cache_hash_size_cumsum"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::cache_index_table_map"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::dev_weights"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::fp8_exponent_bias"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::fp8_exponent_bits"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::indice_weights"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::indices"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::lxu_cache_locations"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::lxu_cache_state"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::lxu_cache_weights"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::lxu_state"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::max_float16_D"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::max_float32_D"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::max_float8_D"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::max_int2_D"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::max_int4_D"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::max_int8_D"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::offsets"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::output_dtype"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::pooling_mode"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::row_alignment"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::total_D"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::total_cache_hash_size"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::uvm_weights"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::weights_offsets"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::weights_placements"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::weights_tys"], [8, 1, 1, "_CPPv413is_uvm_tensorRK6Tensor", "is_uvm_tensor"], [8, 3, 1, "_CPPv413is_uvm_tensorRK6Tensor", "is_uvm_tensor::self"], [6, 1, 1, "_CPPv418jagged_1d_to_dense6Tensor6TensorN3c106SymIntE7int64_t", "jagged_1d_to_dense"], [6, 3, 1, "_CPPv418jagged_1d_to_dense6Tensor6TensorN3c106SymIntE7int64_t", "jagged_1d_to_dense::max_L"], [6, 3, 1, "_CPPv418jagged_1d_to_dense6Tensor6TensorN3c106SymIntE7int64_t", "jagged_1d_to_dense::offsets"], [6, 3, 1, "_CPPv418jagged_1d_to_dense6Tensor6TensorN3c106SymIntE7int64_t", "jagged_1d_to_dense::padding_value"], [6, 3, 1, "_CPPv418jagged_1d_to_dense6Tensor6TensorN3c106SymIntE7int64_t", "jagged_1d_to_dense::values"], [6, 1, 1, "_CPPv418jagged_2d_to_dense6Tensor6TensorN3c106SymIntE", "jagged_2d_to_dense"], [6, 3, 1, "_CPPv418jagged_2d_to_dense6Tensor6TensorN3c106SymIntE", "jagged_2d_to_dense::max_sequence_length"], [6, 3, 1, "_CPPv418jagged_2d_to_dense6Tensor6TensorN3c106SymIntE", "jagged_2d_to_dense::offsets"], [6, 3, 1, "_CPPv418jagged_2d_to_dense6Tensor6TensorN3c106SymIntE", "jagged_2d_to_dense::values"], [6, 1, 1, "_CPPv428jagged_dense_elementwise_addRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add"], [6, 3, 1, "_CPPv428jagged_dense_elementwise_addRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add::x_offsets"], [6, 3, 1, "_CPPv428jagged_dense_elementwise_addRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add::x_values"], [6, 3, 1, "_CPPv428jagged_dense_elementwise_addRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add::y"], [6, 1, 1, "_CPPv442jagged_dense_elementwise_add_jagged_outputRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output"], [6, 3, 1, "_CPPv442jagged_dense_elementwise_add_jagged_outputRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output::x_offsets"], [6, 3, 1, "_CPPv442jagged_dense_elementwise_add_jagged_outputRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output::x_values"], [6, 3, 1, "_CPPv442jagged_dense_elementwise_add_jagged_outputRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output::y"], [6, 1, 1, "_CPPv447jagged_dense_elementwise_add_jagged_output_cudaRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output_cuda"], [6, 3, 1, "_CPPv447jagged_dense_elementwise_add_jagged_output_cudaRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output_cuda::x_offsets"], [6, 3, 1, "_CPPv447jagged_dense_elementwise_add_jagged_output_cudaRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output_cuda::x_values"], [6, 3, 1, "_CPPv447jagged_dense_elementwise_add_jagged_output_cudaRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output_cuda::y"], [6, 1, 1, "_CPPv428jagged_dense_elementwise_mulRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_mul"], [6, 3, 1, "_CPPv428jagged_dense_elementwise_mulRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_mul::x_offsets"], [6, 3, 1, "_CPPv428jagged_dense_elementwise_mulRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_mul::x_values"], [6, 3, 1, "_CPPv428jagged_dense_elementwise_mulRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_mul::y"], [6, 1, 1, "_CPPv422jagged_to_padded_denseRK6TensorRKNSt6vectorI6TensorEEKN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense"], [6, 3, 1, "_CPPv422jagged_to_padded_denseRK6TensorRKNSt6vectorI6TensorEEKN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense::max_lengths"], [6, 3, 1, "_CPPv422jagged_to_padded_denseRK6TensorRKNSt6vectorI6TensorEEKN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense::offsets"], [6, 3, 1, "_CPPv422jagged_to_padded_denseRK6TensorRKNSt6vectorI6TensorEEKN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense::padding_value"], [6, 3, 1, "_CPPv422jagged_to_padded_denseRK6TensorRKNSt6vectorI6TensorEEKN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense::values"], [6, 1, 1, "_CPPv430jagged_to_padded_dense_forwardRK6TensorRKNSt6vectorI6TensorEEN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense_forward"], [6, 3, 1, "_CPPv430jagged_to_padded_dense_forwardRK6TensorRKNSt6vectorI6TensorEEN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense_forward::max_lengths"], [6, 3, 1, "_CPPv430jagged_to_padded_dense_forwardRK6TensorRKNSt6vectorI6TensorEEN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense_forward::offsets"], [6, 3, 1, "_CPPv430jagged_to_padded_dense_forwardRK6TensorRKNSt6vectorI6TensorEEN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense_forward::padding_value"], [6, 3, 1, "_CPPv430jagged_to_padded_dense_forwardRK6TensorRKNSt6vectorI6TensorEEN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense_forward::values"], [13, 5, 1, "_CPPv4N5kv_db12CacheContextE", "kv_db::CacheContext"], [13, 5, 1, "_CPPv4N5kv_db13EmbeddingKVDBE", "kv_db::EmbeddingKVDB"], [13, 5, 1, "_CPPv4N5kv_db9QueueItemE", "kv_db::QueueItem"], [13, 5, 1, "_CPPv4N8l2_cache13CacheLibCacheE", "l2_cache::CacheLibCache"], [12, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda"], [12, 3, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::D_offsets"], [12, 3, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::cache_hash_size_cumsum"], [12, 3, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::cache_index_table_map"], [12, 3, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::lfu_state"], [12, 3, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::linear_cache_indices"], [12, 3, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::lxu_cache_state"], [12, 3, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::lxu_cache_weights"], [12, 3, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::row_alignment"], [12, 3, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::total_cache_hash_size"], [12, 3, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::weights"], [12, 3, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::weights_offsets"], [12, 3, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::weights_tys"], [12, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda"], [12, 3, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::D_offsets"], [12, 3, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::cache_hash_size_cumsum"], [12, 3, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::cache_index_table_map"], [12, 3, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::lfu_state"], [12, 3, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::linear_cache_indices"], [12, 3, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::lxu_cache_state"], [12, 3, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::lxu_cache_weights"], [12, 3, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::stochastic_rounding"], [12, 3, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::total_cache_hash_size"], [12, 3, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::weights"], [12, 3, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::weights_offsets"], [12, 1, 1, "_CPPv428linearize_cache_indices_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKNSt8optionalIN2at6TensorEEEK7int64_tK7int64_t", "linearize_cache_indices_cuda"], [12, 3, 1, "_CPPv428linearize_cache_indices_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKNSt8optionalIN2at6TensorEEEK7int64_tK7int64_t", "linearize_cache_indices_cuda::B_offsets"], [12, 3, 1, "_CPPv428linearize_cache_indices_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKNSt8optionalIN2at6TensorEEEK7int64_tK7int64_t", "linearize_cache_indices_cuda::cache_hash_size_cumsum"], [12, 3, 1, "_CPPv428linearize_cache_indices_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKNSt8optionalIN2at6TensorEEEK7int64_tK7int64_t", "linearize_cache_indices_cuda::indices"], [12, 3, 1, "_CPPv428linearize_cache_indices_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKNSt8optionalIN2at6TensorEEEK7int64_tK7int64_t", "linearize_cache_indices_cuda::indices_base_offset"], [12, 3, 1, "_CPPv428linearize_cache_indices_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKNSt8optionalIN2at6TensorEEEK7int64_tK7int64_t", "linearize_cache_indices_cuda::max_B"], [12, 3, 1, "_CPPv428linearize_cache_indices_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKNSt8optionalIN2at6TensorEEEK7int64_tK7int64_t", "linearize_cache_indices_cuda::offsets"], [12, 1, 1, "_CPPv441linearize_cache_indices_from_row_idx_cudaN2at6TensorEN2at6TensorEN2at6TensorE", "linearize_cache_indices_from_row_idx_cuda"], [12, 3, 1, "_CPPv441linearize_cache_indices_from_row_idx_cudaN2at6TensorEN2at6TensorEN2at6TensorE", "linearize_cache_indices_from_row_idx_cuda::cache_hash_size_cumsum"], [12, 3, 1, "_CPPv441linearize_cache_indices_from_row_idx_cudaN2at6TensorEN2at6TensorEN2at6TensorE", "linearize_cache_indices_from_row_idx_cuda::update_row_indices"], [12, 3, 1, "_CPPv441linearize_cache_indices_from_row_idx_cudaN2at6TensorEN2at6TensorEN2at6TensorE", "linearize_cache_indices_from_row_idx_cuda::update_table_indices"], [12, 1, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorEKb", "lru_cache_find_uncached_cuda"], [12, 3, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorEKb", "lru_cache_find_uncached_cuda::compute_inverse_indices"], [12, 3, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorEKb", "lru_cache_find_uncached_cuda::gather_cache_stats"], [12, 3, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorEKb", "lru_cache_find_uncached_cuda::lock_cache_line"], [12, 3, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorEKb", "lru_cache_find_uncached_cuda::lru_state"], [12, 3, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorEKb", "lru_cache_find_uncached_cuda::lxu_cache_locking_counter"], [12, 3, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorEKb", "lru_cache_find_uncached_cuda::lxu_cache_state"], [12, 3, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorEKb", "lru_cache_find_uncached_cuda::max_indices"], [12, 3, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorEKb", "lru_cache_find_uncached_cuda::time_stamp"], [12, 3, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorEKb", "lru_cache_find_uncached_cuda::unique_indices"], [12, 3, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorEKb", "lru_cache_find_uncached_cuda::unique_indices_length"], [12, 3, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorEKb", "lru_cache_find_uncached_cuda::uvm_cache_stats"], [12, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda"], [12, 3, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::D_offsets"], [12, 3, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::cache_index_table_map"], [12, 3, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::gather_cache_stats"], [12, 3, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::hash_size_cumsum"], [12, 3, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::linear_cache_indices"], [12, 3, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::lru_state"], [12, 3, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::lxu_cache_state"], [12, 3, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::lxu_cache_weights"], [12, 3, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::row_alignment"], [12, 3, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::time_stamp"], [12, 3, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::total_cache_hash_size"], [12, 3, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::uvm_cache_stats"], [12, 3, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::weights"], [12, 3, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::weights_offsets"], [12, 3, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::weights_tys"], [12, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_cuda"], [12, 3, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_cuda::D_offsets"], [12, 3, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_cuda::cache_index_table_map"], [12, 3, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_cuda::gather_cache_stats"], [12, 3, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_cuda::hash_size_cumsum"], [12, 3, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_cuda::linear_cache_indices"], [12, 3, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_cuda::lock_cache_line"], [12, 3, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_cuda::lru_state"], [12, 3, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_cuda::lxu_cache_locking_counter"], [12, 3, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_cuda::lxu_cache_state"], [12, 3, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_cuda::lxu_cache_weights"], [12, 3, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_cuda::stochastic_rounding"], [12, 3, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_cuda::time_stamp"], [12, 3, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_cuda::total_cache_hash_size"], [12, 3, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_cuda::uvm_cache_stats"], [12, 3, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_cuda::weights"], [12, 3, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_cuda::weights_offsets"], [12, 1, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda"], [12, 3, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::D_offsets"], [12, 3, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::cache_hash_size_cumsum"], [12, 3, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::cache_index_table_map"], [12, 3, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::lxu_cache_state"], [12, 3, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::lxu_cache_weights"], [12, 3, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::stochastic_rounding"], [12, 3, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::total_D"], [12, 3, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::uvm_weights"], [12, 3, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::weights_offsets"], [12, 1, 1, "_CPPv431lxu_cache_locations_update_cudaN2at6TensorEN2at6TensorENSt8optionalIN2at6TensorEEE", "lxu_cache_locations_update_cuda"], [12, 3, 1, "_CPPv431lxu_cache_locations_update_cudaN2at6TensorEN2at6TensorENSt8optionalIN2at6TensorEEE", "lxu_cache_locations_update_cuda::lxu_cache_locations"], [12, 3, 1, "_CPPv431lxu_cache_locations_update_cudaN2at6TensorEN2at6TensorENSt8optionalIN2at6TensorEEE", "lxu_cache_locations_update_cuda::lxu_cache_locations_new"], [12, 3, 1, "_CPPv431lxu_cache_locations_update_cudaN2at6TensorEN2at6TensorENSt8optionalIN2at6TensorEEE", "lxu_cache_locations_update_cuda::num_uniq_cache_indices"], [12, 1, 1, "_CPPv440lxu_cache_locking_counter_decrement_cudaN2at6TensorEN2at6TensorE", "lxu_cache_locking_counter_decrement_cuda"], [12, 3, 1, "_CPPv440lxu_cache_locking_counter_decrement_cudaN2at6TensorEN2at6TensorE", "lxu_cache_locking_counter_decrement_cuda::lxu_cache_locations"], [12, 3, 1, "_CPPv440lxu_cache_locking_counter_decrement_cudaN2at6TensorEN2at6TensorE", "lxu_cache_locking_counter_decrement_cuda::lxu_cache_locking_counter"], [12, 1, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda"], [12, 3, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::gather_cache_stats"], [12, 3, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::invalid_index"], [12, 3, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::linear_cache_indices"], [12, 3, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::lxu_cache_locations_output"], [12, 3, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::lxu_cache_state"], [12, 3, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::num_uniq_cache_indices"], [12, 3, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::uvm_cache_stats"], [13, 1, 1, "_CPPv421masked_index_put_cuda6Tensor6Tensor6Tensor6TensorKbK7int64_t", "masked_index_put_cuda"], [13, 3, 1, "_CPPv421masked_index_put_cuda6Tensor6Tensor6Tensor6TensorKbK7int64_t", "masked_index_put_cuda::count"], [13, 3, 1, "_CPPv421masked_index_put_cuda6Tensor6Tensor6Tensor6TensorKbK7int64_t", "masked_index_put_cuda::indices"], [13, 3, 1, "_CPPv421masked_index_put_cuda6Tensor6Tensor6Tensor6TensorKbK7int64_t", "masked_index_put_cuda::preferred_sms"], [13, 3, 1, "_CPPv421masked_index_put_cuda6Tensor6Tensor6Tensor6TensorKbK7int64_t", "masked_index_put_cuda::self"], [13, 3, 1, "_CPPv421masked_index_put_cuda6Tensor6Tensor6Tensor6TensorKbK7int64_t", "masked_index_put_cuda::use_pipeline"], [13, 3, 1, "_CPPv421masked_index_put_cuda6Tensor6Tensor6Tensor6TensorKbK7int64_t", "masked_index_put_cuda::values"], [13, 1, 1, "_CPPv424masked_index_select_cuda6Tensor6Tensor6Tensor6TensorKbK7int64_t", "masked_index_select_cuda"], [13, 3, 1, "_CPPv424masked_index_select_cuda6Tensor6Tensor6Tensor6TensorKbK7int64_t", "masked_index_select_cuda::count"], [13, 3, 1, "_CPPv424masked_index_select_cuda6Tensor6Tensor6Tensor6TensorKbK7int64_t", "masked_index_select_cuda::indices"], [13, 3, 1, "_CPPv424masked_index_select_cuda6Tensor6Tensor6Tensor6TensorKbK7int64_t", "masked_index_select_cuda::preferred_sms"], [13, 3, 1, "_CPPv424masked_index_select_cuda6Tensor6Tensor6Tensor6TensorKbK7int64_t", "masked_index_select_cuda::self"], [13, 3, 1, "_CPPv424masked_index_select_cuda6Tensor6Tensor6Tensor6TensorKbK7int64_t", "masked_index_select_cuda::use_pipeline"], [13, 3, 1, "_CPPv424masked_index_select_cuda6Tensor6Tensor6Tensor6TensorKbK7int64_t", "masked_index_select_cuda::values"], [8, 1, 1, "_CPPv422new_host_mapped_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_host_mapped_tensor"], [8, 3, 1, "_CPPv422new_host_mapped_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_host_mapped_tensor::self"], [8, 3, 1, "_CPPv422new_host_mapped_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_host_mapped_tensor::sizes"], [8, 1, 1, "_CPPv418new_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_managed_tensor"], [8, 3, 1, "_CPPv418new_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_managed_tensor::self"], [8, 3, 1, "_CPPv418new_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_managed_tensor::sizes"], [8, 1, 1, "_CPPv423new_managed_tensor_metaRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_managed_tensor_meta"], [8, 3, 1, "_CPPv423new_managed_tensor_metaRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_managed_tensor_meta::self"], [8, 3, 1, "_CPPv423new_managed_tensor_metaRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_managed_tensor_meta::sizes"], [8, 1, 1, "_CPPv418new_unified_tensorRK6TensorRKNSt6vectorINSt7int64_tEEEb", "new_unified_tensor"], [8, 3, 1, "_CPPv418new_unified_tensorRK6TensorRKNSt6vectorINSt7int64_tEEEb", "new_unified_tensor::is_host_mapped"], [8, 3, 1, "_CPPv418new_unified_tensorRK6TensorRKNSt6vectorINSt7int64_tEEEb", "new_unified_tensor::self"], [8, 3, 1, "_CPPv418new_unified_tensorRK6TensorRKNSt6vectorINSt7int64_tEEEb", "new_unified_tensor::sizes"], [8, 1, 1, "_CPPv423new_unified_tensor_metaRK6TensorRKNSt6vectorINSt7int64_tEEEb", "new_unified_tensor_meta"], [8, 3, 1, "_CPPv423new_unified_tensor_metaRK6TensorRKNSt6vectorINSt7int64_tEEEb", "new_unified_tensor_meta::is_host_mapped"], [8, 3, 1, "_CPPv423new_unified_tensor_metaRK6TensorRKNSt6vectorINSt7int64_tEEEb", "new_unified_tensor_meta::self"], [8, 3, 1, "_CPPv423new_unified_tensor_metaRK6TensorRKNSt6vectorINSt7int64_tEEEb", "new_unified_tensor_meta::sizes"], [8, 1, 1, "_CPPv426new_vanilla_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_vanilla_managed_tensor"], [8, 3, 1, "_CPPv426new_vanilla_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_vanilla_managed_tensor::self"], [8, 3, 1, "_CPPv426new_vanilla_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_vanilla_managed_tensor::sizes"], [5, 1, 1, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t", "padding_fused_tbe_input_combine_cpu"], [5, 3, 1, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t", "padding_fused_tbe_input_combine_cpu::batch_size"], [5, 3, 1, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t", "padding_fused_tbe_input_combine_cpu::include_last_offsets"], [5, 3, 1, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t", "padding_fused_tbe_input_combine_cpu::indices_list"], [5, 3, 1, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t", "padding_fused_tbe_input_combine_cpu::offsets_list"], [5, 3, 1, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t", "padding_fused_tbe_input_combine_cpu::per_sample_weights"], [9, 1, 1, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad"], [9, 3, 1, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad::inv_offset_dim_list"], [9, 3, 1, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad::inv_permute_list"], [9, 3, 1, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad::offset_dim_list"], [9, 3, 1, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad::permute_list"], [9, 3, 1, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad::pooled_embs"], [9, 1, 1, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_cpu"], [9, 3, 1, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_cpu::inv_offset_dim_list"], [9, 3, 1, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_cpu::inv_permute_list"], [9, 3, 1, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_cpu::offset_dim_list"], [9, 3, 1, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_cpu::permute_list"], [9, 3, 1, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_cpu::pooled_embs"], [9, 1, 1, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_gpu"], [9, 3, 1, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_gpu::inv_offset_dim_list"], [9, 3, 1, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_gpu::inv_permute_list"], [9, 3, 1, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_gpu::offset_dim_list"], [9, 3, 1, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_gpu::permute_list"], [9, 3, 1, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_gpu::pooled_embs"], [9, 1, 1, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_cpu"], [9, 3, 1, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_cpu::inv_offset_dim_list"], [9, 3, 1, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_cpu::inv_permute_list"], [9, 3, 1, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_cpu::offset_dim_list"], [9, 3, 1, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_cpu::permute_list"], [9, 3, 1, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_cpu::pooled_embs"], [9, 1, 1, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_gpu"], [9, 3, 1, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_gpu::inv_offset_dim_list"], [9, 3, 1, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_gpu::inv_permute_list"], [9, 3, 1, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_gpu::offset_dim_list"], [9, 3, 1, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_gpu::permute_list"], [9, 3, 1, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_gpu::pooled_embs"], [9, 1, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl"], [9, 3, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl::allow_duplicates"], [9, 3, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl::inv_offset_dim_list"], [9, 3, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl::inv_permute_list"], [9, 3, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl::offset_dim_list"], [9, 3, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl::permute_list"], [9, 3, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl::pooled_embs"], [9, 1, 1, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_cpu"], [9, 3, 1, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_cpu::inv_offset_dim_list"], [9, 3, 1, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_cpu::inv_permute_list"], [9, 3, 1, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_cpu::offset_dim_list"], [9, 3, 1, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_cpu::permute_list"], [9, 3, 1, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_cpu::pooled_embs"], [9, 1, 1, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_gpu"], [9, 3, 1, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_gpu::inv_offset_dim_list"], [9, 3, 1, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_gpu::inv_permute_list"], [9, 3, 1, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_gpu::offset_dim_list"], [9, 3, 1, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_gpu::permute_list"], [9, 3, 1, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_gpu::pooled_embs"], [3, 1, 1, "_CPPv423pruned_array_lookup_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cpu"], [3, 3, 1, "_CPPv423pruned_array_lookup_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cpu::index_remappings"], [3, 3, 1, "_CPPv423pruned_array_lookup_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cpu::index_remappings_offsets"], [3, 3, 1, "_CPPv423pruned_array_lookup_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cpu::indices"], [3, 3, 1, "_CPPv423pruned_array_lookup_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cpu::offsets"], [3, 1, 1, "_CPPv424pruned_array_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cuda"], [3, 3, 1, "_CPPv424pruned_array_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cuda::index_remappings"], [3, 3, 1, "_CPPv424pruned_array_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cuda::index_remappings_offsets"], [3, 3, 1, "_CPPv424pruned_array_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cuda::indices"], [3, 3, 1, "_CPPv424pruned_array_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cuda::offsets"], [3, 1, 1, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_insert_unweighted_cpu"], [3, 3, 1, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_insert_unweighted_cpu::dense_indices"], [3, 3, 1, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_insert_unweighted_cpu::hash_table"], [3, 3, 1, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_insert_unweighted_cpu::hash_table_offsets"], [3, 3, 1, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_insert_unweighted_cpu::indices"], [3, 3, 1, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_insert_unweighted_cpu::offsets"], [3, 1, 1, "_CPPv426pruned_hashmap_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_cuda"], [3, 3, 1, "_CPPv426pruned_hashmap_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_cuda::hash_table"], [3, 3, 1, "_CPPv426pruned_hashmap_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_cuda::hash_table_offsets"], [3, 3, 1, "_CPPv426pruned_hashmap_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_cuda::indices"], [3, 3, 1, "_CPPv426pruned_hashmap_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_cuda::offsets"], [3, 1, 1, "_CPPv436pruned_hashmap_lookup_unweighted_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_unweighted_cpu"], [3, 3, 1, "_CPPv436pruned_hashmap_lookup_unweighted_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_unweighted_cpu::hash_table"], [3, 3, 1, "_CPPv436pruned_hashmap_lookup_unweighted_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_unweighted_cpu::hash_table_offsets"], [3, 3, 1, "_CPPv436pruned_hashmap_lookup_unweighted_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_unweighted_cpu::indices"], [3, 3, 1, "_CPPv436pruned_hashmap_lookup_unweighted_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_unweighted_cpu::offsets"], [13, 5, 1, "_CPPv4N2ps24EmbeddingParameterServerE", "ps::EmbeddingParameterServer"], [7, 1, 1, "_CPPv432recat_embedding_grad_output_cuda6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_cuda"], [7, 3, 1, "_CPPv432recat_embedding_grad_output_cuda6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_cuda::grad_output"], [7, 3, 1, "_CPPv432recat_embedding_grad_output_cuda6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_cuda::num_features_per_rank"], [7, 1, 1, "_CPPv446recat_embedding_grad_output_mixed_D_batch_cudaRK6TensorRK6TensorRK6Tensor", "recat_embedding_grad_output_mixed_D_batch_cuda"], [7, 3, 1, "_CPPv446recat_embedding_grad_output_mixed_D_batch_cudaRK6TensorRK6TensorRK6Tensor", "recat_embedding_grad_output_mixed_D_batch_cuda::cumsum_dim_sum_per_rank"], [7, 3, 1, "_CPPv446recat_embedding_grad_output_mixed_D_batch_cudaRK6TensorRK6TensorRK6Tensor", "recat_embedding_grad_output_mixed_D_batch_cuda::dim_sum_per_rank"], [7, 3, 1, "_CPPv446recat_embedding_grad_output_mixed_D_batch_cudaRK6TensorRK6TensorRK6Tensor", "recat_embedding_grad_output_mixed_D_batch_cuda::grad_output"], [7, 1, 1, "_CPPv439recat_embedding_grad_output_mixed_D_cpuRK6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_mixed_D_cpu"], [7, 3, 1, "_CPPv439recat_embedding_grad_output_mixed_D_cpuRK6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_mixed_D_cpu::dim_sum_per_rank"], [7, 3, 1, "_CPPv439recat_embedding_grad_output_mixed_D_cpuRK6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_mixed_D_cpu::grad_output"], [7, 1, 1, "_CPPv440recat_embedding_grad_output_mixed_D_cudaRK6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_mixed_D_cuda"], [7, 3, 1, "_CPPv440recat_embedding_grad_output_mixed_D_cudaRK6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_mixed_D_cuda::dim_sum_per_rank"], [7, 3, 1, "_CPPv440recat_embedding_grad_output_mixed_D_cudaRK6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_mixed_D_cuda::grad_output"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::A_SYMMETRIC"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::BIAS_TYPE"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::B_SYMMETRIC"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::DIRECT"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::FUSE_RELU"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::HAS_BIAS"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::Q_GRAN"], [0, 3, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::block"], [0, 3, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::inp"], [0, 3, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::ld_in"], [0, 3, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::ld_out"], [0, 3, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::out"], [0, 3, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::r"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::A_SYMMETRIC"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::BIAS_TYPE"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::B_SYMMETRIC"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::C_PER_G"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::FUSE_RELU"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::HAS_BIAS"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::Q_GRAN"], [0, 3, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::block"], [0, 3, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::inp"], [0, 3, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::ld_in"], [0, 3, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::ld_out"], [0, 3, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::out"], [0, 3, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::r"], [12, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda"], [12, 3, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::D_offsets"], [12, 3, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::buffer_ids"], [12, 3, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::cache_hash_size_cumsum"], [12, 3, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::dev_weights"], [12, 3, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::logical_table_ids"], [12, 3, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::lxu_cache_state"], [12, 3, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::lxu_cache_weights"], [12, 3, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::momentum1_dev"], [12, 3, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::momentum1_offsets"], [12, 3, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::momentum1_placements"], [12, 3, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::momentum1_uvm"], [12, 3, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::pruned_indices"], [12, 3, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::pruned_indices_offsets"], [12, 3, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::total_cache_hash_size"], [12, 3, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::uvm_weights"], [12, 3, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::weights_offsets"], [12, 3, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::weights_placements"], [13, 5, 1, "_CPPv4N3ssd16EmbeddingRocksDBE", "ssd::EmbeddingRocksDB"], [13, 1, 1, "_CPPv427ssd_generate_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_generate_row_addrs_cuda"], [13, 3, 1, "_CPPv427ssd_generate_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_generate_row_addrs_cuda::assigned_cache_slots"], [13, 3, 1, "_CPPv427ssd_generate_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_generate_row_addrs_cuda::cache_set_inverse_indices"], [13, 3, 1, "_CPPv427ssd_generate_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_generate_row_addrs_cuda::cache_set_sorted_unique_indices"], [13, 3, 1, "_CPPv427ssd_generate_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_generate_row_addrs_cuda::inserted_ssd_weights"], [13, 3, 1, "_CPPv427ssd_generate_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_generate_row_addrs_cuda::linear_index_inverse_indices"], [13, 3, 1, "_CPPv427ssd_generate_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_generate_row_addrs_cuda::lxu_cache_locations"], [13, 3, 1, "_CPPv427ssd_generate_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_generate_row_addrs_cuda::lxu_cache_weights"], [13, 3, 1, "_CPPv427ssd_generate_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_generate_row_addrs_cuda::unique_indices_count_cumsum"], [13, 3, 1, "_CPPv427ssd_generate_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_generate_row_addrs_cuda::unique_indices_length"], [13, 1, 1, "_CPPv425ssd_update_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_update_row_addrs_cuda"], [13, 3, 1, "_CPPv425ssd_update_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_update_row_addrs_cuda::cache_set_inverse_indices_curr"], [13, 3, 1, "_CPPv425ssd_update_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_update_row_addrs_cuda::inserted_ssd_weights_curr_next_map"], [13, 3, 1, "_CPPv425ssd_update_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_update_row_addrs_cuda::inserted_ssd_weights_next"], [13, 3, 1, "_CPPv425ssd_update_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_update_row_addrs_cuda::linear_index_inverse_indices_curr"], [13, 3, 1, "_CPPv425ssd_update_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_update_row_addrs_cuda::lxu_cache_locations_curr"], [13, 3, 1, "_CPPv425ssd_update_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_update_row_addrs_cuda::lxu_cache_weights"], [13, 3, 1, "_CPPv425ssd_update_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_update_row_addrs_cuda::ssd_row_addrs_curr"], [13, 3, 1, "_CPPv425ssd_update_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_update_row_addrs_cuda::unique_indices_count_cumsum_curr"], [13, 3, 1, "_CPPv425ssd_update_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_update_row_addrs_cuda::unique_indices_length_curr"], [5, 1, 1, "_CPPv421tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE", "tbe_input_combine_cpu"], [5, 3, 1, "_CPPv421tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE", "tbe_input_combine_cpu::include_last_offsets"], [5, 3, 1, "_CPPv421tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE", "tbe_input_combine_cpu::indices_list"], [5, 3, 1, "_CPPv421tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE", "tbe_input_combine_cpu::offsets_list"], [5, 3, 1, "_CPPv421tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE", "tbe_input_combine_cpu::per_sample_weights"], [8, 1, 1, "_CPPv419uvm_cuda_mem_adviseRK6Tensor7int64_t", "uvm_cuda_mem_advise"], [8, 3, 1, "_CPPv419uvm_cuda_mem_adviseRK6Tensor7int64_t", "uvm_cuda_mem_advise::cuda_memory_advise"], [8, 3, 1, "_CPPv419uvm_cuda_mem_adviseRK6Tensor7int64_t", "uvm_cuda_mem_advise::self"], [8, 1, 1, "_CPPv427uvm_cuda_mem_prefetch_asyncRK6TensorNSt8optionalI6TensorEE", "uvm_cuda_mem_prefetch_async"], [8, 3, 1, "_CPPv427uvm_cuda_mem_prefetch_asyncRK6TensorNSt8optionalI6TensorEE", "uvm_cuda_mem_prefetch_async::device_t"], [8, 3, 1, "_CPPv427uvm_cuda_mem_prefetch_asyncRK6TensorNSt8optionalI6TensorEE", "uvm_cuda_mem_prefetch_async::self"], [8, 1, 1, "_CPPv424uvm_mem_advice_dont_forkRK6Tensor", "uvm_mem_advice_dont_fork"], [8, 3, 1, "_CPPv424uvm_mem_advice_dont_forkRK6Tensor", "uvm_mem_advice_dont_fork::self"], [8, 1, 1, "_CPPv411uvm_storageRK6Tensor", "uvm_storage"], [8, 3, 1, "_CPPv411uvm_storageRK6Tensor", "uvm_storage::self"], [8, 1, 1, "_CPPv410uvm_to_cpuRK6Tensor", "uvm_to_cpu"], [8, 3, 1, "_CPPv410uvm_to_cpuRK6Tensor", "uvm_to_cpu::self"], [8, 1, 1, "_CPPv416uvm_to_cpu_cloneRK6Tensor", "uvm_to_cpu_clone"], [8, 3, 1, "_CPPv416uvm_to_cpu_cloneRK6Tensor", "uvm_to_cpu_clone::self"], [8, 1, 1, "_CPPv413uvm_to_deviceRK6TensorRK6Tensor", "uvm_to_device"], [8, 3, 1, "_CPPv413uvm_to_deviceRK6TensorRK6Tensor", "uvm_to_device::prototype"], [8, 3, 1, "_CPPv413uvm_to_deviceRK6TensorRK6Tensor", "uvm_to_device::self"], [19, 6, 0, "-", "fbgemm_gpu"]], "fbgemm_gpu.docs.examples": [[25, 7, 1, "", "example_method"]], "fbgemm_gpu.split_table_batched_embeddings_ops": [[19, 7, 1, "", "SplitTableBatchedEmbeddingBagsCodegen"]], "torch.ops.fbgemm": [[18, 7, 1, "", "batched_dense_vec_jagged_2d_mul"], [18, 7, 1, "", "dense_to_jagged"], [18, 7, 1, "", "jagged_1d_to_dense"], [18, 7, 1, "", "jagged_2d_to_dense"], [18, 7, 1, "", "jagged_dense_dense_elementwise_add_jagged_output"], [18, 7, 1, "", "jagged_dense_elementwise_add"], [18, 7, 1, "", "jagged_dense_elementwise_add_jagged_output"], [18, 7, 1, "", "jagged_dense_elementwise_mul"], [18, 7, 1, "", "jagged_to_padded_dense"], [18, 7, 1, "", "stacked_jagged_1d_to_dense"], [18, 7, 1, "", "stacked_jagged_2d_to_dense"]]}, "objtypes": {"0": "cpp:enumerator", "1": "cpp:function", "2": "cpp:templateParam", "3": "cpp:functionParam", "4": "cpp:enum", "5": "cpp:class", "6": "py:module", "7": "py:function"}, "objnames": {"0": ["cpp", "enumerator", "C++ enumerator"], "1": ["cpp", "function", "C++ function"], "2": ["cpp", "templateParam", "C++ template parameter"], "3": ["cpp", "functionParam", "C++ function parameter"], "4": ["cpp", "enum", "C++ enum"], "5": ["cpp", "class", "C++ class"], "6": ["py", "module", "Python module"], "7": ["py", "function", "Python function"]}, "titleterms": {"quantiz": [0, 10], "util": 0, "refer": [0, 26], "implement": [0, 1], "method": [0, 1], "avx": 0, "2": 0, "512": 0, "tbe": [1, 19], "cpu": [1, 3, 6, 7, 10, 11, 14, 15], "autovector": 1, "fp8": 1, "16": 1, "32": 1, "autovec": 1, "build": [2, 14, 24], "instruct": [2, 14, 15, 16], "fbgemm": [2, 27], "requir": 2, "hardwar": 2, "softwar": 2, "depend": 2, "asmjit": 2, "cpuinfo": 2, "googletest": 2, "set": [2, 14, 15, 24], "up": [2, 14, 15, 24], "an": [2, 14], "isol": [2, 14], "environ": [2, 14, 15, 16, 24], "instal": [2, 14, 15], "tool": [2, 14], "c": [2, 14, 23, 27], "compil": [2, 14], "other": [2, 14, 26], "librari": [2, 15], "prepar": [2, 14], "linux": 2, "maco": 2, "cmake": 2, "gcc": [2, 14], "issu": [2, 21], "12": 2, "clang": [2, 14], "bazel": 2, "window": 2, "embed": [3, 9, 12, 13, 19], "oper": [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 17, 18, 19], "cuda": [3, 6, 7, 8, 10, 11, 13, 14, 15, 16], "experiment": 4, "attent": 4, "combin": [5, 17], "input": 5, "jag": [6, 17, 18], "tensor": [6, 17, 18], "layout": 7, "transform": 7, "memori": 8, "pool": 9, "merg": 9, "permut": 9, "spars": 11, "data": 11, "tabl": [12, 19], "batch": [12, 19], "ssd": 13, "miniconda": 14, "conda": [14, 15], "onli": [14, 15], "genai": 14, "docker": [14, 15], "imag": 14, "cudnn": 14, "cutlass": 14, "rocm": [14, 15, 16], "miopen": 14, "symlink": 14, "pytorch": [14, 15], "through": [14, 15], "pip": [14, 15], "post": [14, 15], "check": [14, 15], "triton": [14, 15], "pre": 14, "setup": [14, 16], "The": 14, "process": 14, "wheel": 14, "variabl": 14, "For": 14, "develop": [14, 27], "undefin": [14, 15], "symbol": [14, 15], "glibc": 14, "version": 14, "compat": 14, "nvidia": 15, "driver": 15, "contain": 15, "runtim": 15, "amdgpu": 15, "python": [15, 25, 27], "fbgemm_gpu": [15, 16, 24, 27], "packag": 15, "public": 15, "pypi": 15, "test": 16, "run": 16, "variant": 16, "benchmark": 16, "high": 17, "level": 17, "overview": [17, 27], "format": 17, "valu": 17, "offset": 17, "max": 17, "length": 17, "exampl": 17, "arithmet": 17, "convers": 17, "dens": 17, "contact": 20, "u": 20, "github": 20, "slack": 20, "contribut": 21, "code": [21, 23, 25, 26], "conduct": 21, "pull": 21, "request": 21, "contributor": 21, "licens": [21, 22], "agreement": 21, "cla": 21, "ad": [23, 25, 26], "document": [23, 24, 25, 26, 27], "gener": [24, 25, 27], "guidelin": 24, "specif": 24, "guid": 24, "toolchain": 24, "lint": 24, "deploy": 24, "preview": 24, "todo": 25, "auto": 25, "sphinx": 26, "pointer": 26, "section": 26, "referenc": 26, "sourc": 26, "latex": 26, "graph": 26, "homepag": 27, "info": 27, "api": 27}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx.ext.todo": 2, "sphinx.ext.viewcode": 1, "sphinx": 57}, "alltitles": {"Installation Instructions": [[15, "installation-instructions"]], "Set Up CPU-Only Environment": [[15, "set-up-cpu-only-environment"]], "Set Up CUDA Environment": [[15, "set-up-cuda-environment"]], "Install NVIDIA Drivers": [[15, "install-nvidia-drivers"]], "Set Up the CUDA Docker Container and Conda Environment": [[15, "set-up-the-cuda-docker-container-and-conda-environment"]], "Install the CUDA Runtime": [[15, "install-the-cuda-runtime"]], "Set Up ROCm Environment": [[15, "set-up-rocm-environment"]], "Install AMDGPU Drivers": [[15, "install-amdgpu-drivers"]], "Set Up the ROCm Docker Container and Conda Environment": [[15, "set-up-the-rocm-docker-container-and-conda-environment"]], "Install Python Libraries": [[15, "install-python-libraries"]], "Install PyTorch": [[15, "install-pytorch"], [14, "install-pytorch"]], "Install Triton": [[15, "install-triton"]], "Install the FBGEMM_GPU Package": [[15, "install-the-fbgemm-gpu-package"]], "Install through PyTorch PIP": [[15, "install-through-pytorch-pip"]], "Install through Public PyPI": [[15, "install-through-public-pypi"]], "Post-Installation Checks": [[15, "post-installation-checks"]], "Undefined Symbols": [[15, "undefined-symbols"]], "Jagged Tensor Operators": [[17, "jagged-tensor-operators"], [6, "jagged-tensor-operators"], [18, "jagged-tensor-operators"]], "High Level Overview": [[17, "high-level-overview"]], "Jagged Tensor Format": [[17, "jagged-tensor-format"]], "Values": [[17, "values"]], "Offsets": [[17, "offsets"]], "Max Lengths": [[17, "max-lengths"]], "Jagged Tensor Example": [[17, "jagged-tensor-example"]], "Jagged Tensor Operations": [[17, "jagged-tensor-operations"]], "Arithmetic Operations": [[17, "arithmetic-operations"]], "Conversion Operations": [[17, "conversion-operations"]], "Jagged to Dense": [[17, "jagged-to-dense"]], "Dense to Jagged": [[17, "dense-to-jagged"]], "Combined Arithmetic + Conversion Operations": [[17, "combined-arithmetic-conversion-operations"]], "Test Instructions": [[16, "test-instructions"]], "Setup the FBGEMM_GPU Test Environment": [[16, "setup-the-fbgemm-gpu-test-environment"]], "Running FBGEMM_GPU Tests": [[16, "running-fbgemm-gpu-tests"]], "Testing with the CUDA Variant": [[16, "testing-with-the-cuda-variant"]], "Testing with the ROCm Variant": [[16, "testing-with-the-rocm-variant"]], "Running FBGEMM_GPU Benchmarks": [[16, "running-fbgemm-gpu-benchmarks"]], "Table Batched Embedding (TBE) Operators": [[19, "module-fbgemm_gpu"]], "Contact Us": [[20, "contact-us"]], "GitHub": [[20, "github"]], "Slack": [[20, "slack"]], "FBGEMM and FBGEMM_GPU Documentation Homepage": [[27, "fbgemm-and-fbgemm-gpu-documentation-homepage"]], "General Info": [[27, null]], "FBGEMM Development": [[27, null]], "FBGEMM_GPU Development": [[27, null]], "FBGEMM_GPU Overview": [[27, null]], "FBGEMM C++ API": [[27, null]], "FBGEMM_GPU C++ API": [[27, null]], "FBGEMM_GPU Python API": [[27, null]], "Documentation": [[24, "documentation"]], "General Documentation Guidelines": [[24, "general-documentation-guidelines"]], "Specific Documentation Guides": [[24, "specific-documentation-guides"]], "Building the Documentation": [[24, "building-the-documentation"]], "Set Up Build Environment": [[24, "set-up-build-environment"]], "Build FBGEMM_GPU": [[24, "build-fbgemm-gpu"]], "Set Up the Documentation Toolchain": [[24, "set-up-the-documentation-toolchain"]], "Build the Documentation": [[24, "build-the-documentation"]], "Linting the Documentation": [[24, "linting-the-documentation"]], "Deployment Preview": [[24, "deployment-preview"]], "Sphinx Documentation Pointers": [[26, "sphinx-documentation-pointers"]], "References Other Sections of the Documentation": [[26, "references-other-sections-of-the-documentation"]], "Referencing the Source Code": [[26, "referencing-the-source-code"]], "Adding LaTeX": [[26, "adding-latex"]], "Adding Graphs": [[26, "adding-graphs"]], "Adding Documentation to Python Code": [[25, "adding-documentation-to-python-code"]], "Todo": [[25, "id1"]], "Adding Documentation to Auto-Generated Python Code": [[25, "adding-documentation-to-auto-generated-python-code"]], "Pooled Embeddings Operators": [[9, "pooled-embeddings-operators"]], "Merge Operators": [[9, "merge-operators"]], "Permutation Operators": [[9, "permutation-operators"]], "Quantization Operators": [[10, "quantization-operators"]], "CUDA Operators": [[10, "cuda-operators"], [11, "cuda-operators"], [3, "cuda-operators"], [7, "cuda-operators"], [6, "cuda-operators"], [13, "cuda-operators"]], "CPU Operators": [[10, "cpu-operators"], [11, "cpu-operators"], [3, "cpu-operators"], [7, "cpu-operators"], [6, "cpu-operators"]], "Sparse Data Operators": [[11, "sparse-data-operators"]], "Quantization Utilities": [[0, "quantization-utilities"]], "Reference Implementation Methods": [[0, "reference-implementation-methods"]], "AVX-2 Implementation Methods": [[0, "avx-2-implementation-methods"]], "AVX-512 Implementation Methods": [[0, "avx-512-implementation-methods"]], "TBE CPU Autovectorization": [[1, "tbe-cpu-autovectorization"]], "FP8/16/32 Autovec Implementation Methods": [[1, "fp8-16-32-autovec-implementation-methods"]], "Build Instructions": [[2, "build-instructions"], [14, "build-instructions"]], "FBGEMM Requirements": [[2, "fbgemm-requirements"]], "Hardware Requirements": [[2, "hardware-requirements"]], "Software Dependencies": [[2, "software-dependencies"]], "asmjit": [[2, "asmjit"]], "cpuinfo": [[2, "cpuinfo"]], "GoogleTest": [[2, "googletest"]], "Set Up an Isolated Build Environment": [[2, "set-up-an-isolated-build-environment"], [14, "set-up-an-isolated-build-environment"]], "Install the Build Tools": [[2, "install-the-build-tools"], [14, "install-the-build-tools"]], "C/C++ Compiler": [[2, "c-c-compiler"]], "Other Build Tools": [[2, "other-build-tools"], [14, "other-build-tools"]], "Build the FBGEMM Library": [[2, "build-the-fbgemm-library"]], "Preparing the Build": [[2, "preparing-the-build"], [14, "preparing-the-build"]], "Building on Linux and macOS (CMake + GCC)": [[2, "building-on-linux-and-macos-cmake-gcc"]], "Build Issues with GCC 12+": [[2, "build-issues-with-gcc-12"]], "Building on Linux and macOS (CMake + Clang)": [[2, "building-on-linux-and-macos-cmake-clang"]], "Building on Linux (Bazel)": [[2, "building-on-linux-bazel"]], "Building on Windows": [[2, "building-on-windows"]], "License": [[22, "license"], [21, "license"]], "Adding Documentation to C++ Code": [[23, "adding-documentation-to-c-code"]], "Contributing": [[21, "contributing"]], "Code of Conduct": [[21, "code-of-conduct"]], "Pull Requests": [[21, "pull-requests"]], "Contributor License Agreement (\u201cCLA\u201d)": [[21, "contributor-license-agreement-cla"]], "Issues": [[21, "issues"]], "Experimental Operators": [[4, "experimental-operators"]], "Attention Operators": [[4, "attention-operators"]], "Combine Input Operators": [[5, "combine-input-operators"]], "Embedding Operators": [[3, "embedding-operators"]], "Layout Transformation Operators": [[7, "layout-transformation-operators"]], "CUDA Memory Operators": [[8, "cuda-memory-operators"]], "Table Batched Embedding Operators": [[12, "table-batched-embedding-operators"]], "Install Miniconda": [[14, "install-miniconda"]], "Set Up the Conda Environment": [[14, "set-up-the-conda-environment"]], "Set Up for CPU-Only Build": [[14, "set-up-for-cpu-only-build"]], "Set Up for CUDA / GenAI-Only Build": [[14, "set-up-for-cuda-genai-only-build"]], "CUDA Docker Image": [[14, "cuda-docker-image"]], "Install CUDA": [[14, "install-cuda"]], "Install cuDNN": [[14, "install-cudnn"]], "Install CUTLASS": [[14, "install-cutlass"]], "Set Up for ROCm Build": [[14, "set-up-for-rocm-build"]], "ROCm Docker Image": [[14, "rocm-docker-image"]], "Install ROCm": [[14, "install-rocm"]], "Install MIOpen": [[14, "install-miopen"]], "C/C++ Compiler (GCC)": [[14, "c-c-compiler-gcc"]], "C/C++ Compiler (Clang)": [[14, "c-c-compiler-clang"]], "Compiler Symlinks": [[14, "compiler-symlinks"]], "Installation Through Conda": [[14, "installation-through-conda"]], "Installation Through PyTorch PIP": [[14, "installation-through-pytorch-pip"]], "Post-Install Checks": [[14, "post-install-checks"]], "Install PyTorch-Triton": [[14, "install-pytorch-triton"]], "Other Pre-Build Setup": [[14, "other-pre-build-setup"]], "The Build Process": [[14, "the-build-process"]], "Set Wheel Build Variables": [[14, "set-wheel-build-variables"]], "CPU-Only Build": [[14, "cpu-only-build"]], "CUDA Build": [[14, "cuda-build"]], "GenAI-Only Build": [[14, "genai-only-build"]], "ROCm Build": [[14, "rocm-build"]], "Post-Build Checks (For Developers)": [[14, "post-build-checks-for-developers"]], "Undefined Symbols Check": [[14, "undefined-symbols-check"]], "GLIBC Version Compatibility Check": [[14, "glibc-version-compatibility-check"]], "SSD Embedding Operators": [[13, "ssd-embedding-operators"]]}, "indexentries": {"findminmax (c++ function)": [[0, "_CPPv410FindMinMaxPKfPfPf7int64_t"]], "floatorhalftofusednbitrowwisequantizedsbhalf (c++ function)": [[0, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE"]], "fusedquantizedequantize (c++ function)": [[0, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif"]], "quantizegroupwise (c++ function)": [[0, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T"]], "xor128 (c++ function)": [[0, "_CPPv46Xor128v"]], "requantizeoutputprocessingavx2 (c++ function)": [[0, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE"]], "requantizeoutputprocessinggconvavx512 (c++ function)": [[0, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE"]], "embeddingspmdmfp8_autovec (c++ function)": [[1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib"]], "embeddingspmdm_autovec (c++ function)": [[1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb"]], "bounds_check_indices_cuda (c++ function)": [[3, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKNSt8optionalI6TensorEERKNSt8optionalI6TensorEEK7int64_t"]], "int_nbit_split_embedding_codegen_lookup_function (c++ function)": [[3, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE"]], "int_nbit_split_embedding_codegen_lookup_function_cpu (c++ function)": [[3, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE"]], "int_nbit_split_embedding_uvm_caching_codegen_lookup_function (c++ function)": [[3, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE"]], "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu (c++ function)": [[3, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE"]], "pruned_array_lookup_cpu (c++ function)": [[3, "_CPPv423pruned_array_lookup_cpu6Tensor6Tensor6Tensor6Tensor"]], "pruned_array_lookup_cuda (c++ function)": [[3, "_CPPv424pruned_array_lookup_cuda6Tensor6Tensor6Tensor6Tensor"]], "pruned_hashmap_insert_unweighted_cpu (c++ function)": [[3, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor"]], "pruned_hashmap_lookup_cuda (c++ function)": [[3, "_CPPv426pruned_hashmap_lookup_cuda6Tensor6Tensor6Tensor6Tensor"]], "pruned_hashmap_lookup_unweighted_cpu (c++ function)": [[3, "_CPPv436pruned_hashmap_lookup_unweighted_cpu6Tensor6Tensor6Tensor6Tensor"]], "gqa_attn_splitk (c++ function)": [[4, "_CPPv415gqa_attn_splitkRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_tKbK7int64_t"]], "padding_fused_tbe_input_combine_cpu (c++ function)": [[5, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t"]], "tbe_input_combine_cpu (c++ function)": [[5, "_CPPv421tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE"]], "batched_dense_vec_jagged_2d_mul (c++ function)": [[6, "_CPPv431batched_dense_vec_jagged_2d_mulRK6TensorRK6TensorRK6Tensor"]], "dense_to_jagged (c++ function)": [[6, "_CPPv415dense_to_jaggedRK6TensorRKNSt6vectorI6TensorEENSt8optionalIN2at6SymIntEEE"]], "jagged_1d_to_dense (c++ function)": [[6, "_CPPv418jagged_1d_to_dense6Tensor6TensorN3c106SymIntE7int64_t"]], "jagged_2d_to_dense (c++ function)": [[6, "_CPPv418jagged_2d_to_dense6Tensor6TensorN3c106SymIntE"]], "jagged_dense_elementwise_add (c++ function)": [[6, "_CPPv428jagged_dense_elementwise_addRK6TensorRKNSt6vectorI6TensorEERK6Tensor"]], "jagged_dense_elementwise_add_jagged_output (c++ function)": [[6, "_CPPv442jagged_dense_elementwise_add_jagged_outputRK6TensorRKNSt6vectorI6TensorEERK6Tensor"]], "jagged_dense_elementwise_add_jagged_output_cuda (c++ function)": [[6, "_CPPv447jagged_dense_elementwise_add_jagged_output_cudaRK6TensorRKNSt6vectorI6TensorEERK6Tensor"]], "jagged_dense_elementwise_mul (c++ function)": [[6, "_CPPv428jagged_dense_elementwise_mulRK6TensorRKNSt6vectorI6TensorEERK6Tensor"]], "jagged_to_padded_dense (c++ function)": [[6, "_CPPv422jagged_to_padded_denseRK6TensorRKNSt6vectorI6TensorEEKN3c1014SymIntArrayRefEKd"]], "jagged_to_padded_dense_forward (c++ function)": [[6, "_CPPv430jagged_to_padded_dense_forwardRK6TensorRKNSt6vectorI6TensorEEN3c1014SymIntArrayRefEKd"]], "recat_embedding_grad_output_cuda (c++ function)": [[7, "_CPPv432recat_embedding_grad_output_cuda6TensorRKNSt6vectorI7int64_tEE"]], "recat_embedding_grad_output_mixed_d_batch_cuda (c++ function)": [[7, "_CPPv446recat_embedding_grad_output_mixed_D_batch_cudaRK6TensorRK6TensorRK6Tensor"]], "recat_embedding_grad_output_mixed_d_cpu (c++ function)": [[7, "_CPPv439recat_embedding_grad_output_mixed_D_cpuRK6TensorRKNSt6vectorI7int64_tEE"]], "recat_embedding_grad_output_mixed_d_cuda (c++ function)": [[7, "_CPPv440recat_embedding_grad_output_mixed_D_cudaRK6TensorRKNSt6vectorI7int64_tEE"]], "is_uvm_tensor (c++ function)": [[8, "_CPPv413is_uvm_tensorRK6Tensor"]], "new_host_mapped_tensor (c++ function)": [[8, "_CPPv422new_host_mapped_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE"]], "new_managed_tensor (c++ function)": [[8, "_CPPv418new_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE"]], "new_managed_tensor_meta (c++ function)": [[8, "_CPPv423new_managed_tensor_metaRK6TensorRKNSt6vectorINSt7int64_tEEE"]], "new_unified_tensor (c++ function)": [[8, "_CPPv418new_unified_tensorRK6TensorRKNSt6vectorINSt7int64_tEEEb"]], "new_unified_tensor_meta (c++ function)": [[8, "_CPPv423new_unified_tensor_metaRK6TensorRKNSt6vectorINSt7int64_tEEEb"]], "new_vanilla_managed_tensor (c++ function)": [[8, "_CPPv426new_vanilla_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE"]], "uvm_cuda_mem_advise (c++ function)": [[8, "_CPPv419uvm_cuda_mem_adviseRK6Tensor7int64_t"]], "uvm_cuda_mem_prefetch_async (c++ function)": [[8, "_CPPv427uvm_cuda_mem_prefetch_asyncRK6TensorNSt8optionalI6TensorEE"]], "uvm_mem_advice_dont_fork (c++ function)": [[8, "_CPPv424uvm_mem_advice_dont_forkRK6Tensor"]], "uvm_storage (c++ function)": [[8, "_CPPv411uvm_storageRK6Tensor"]], "uvm_to_cpu (c++ function)": [[8, "_CPPv410uvm_to_cpuRK6Tensor"]], "uvm_to_cpu_clone (c++ function)": [[8, "_CPPv416uvm_to_cpu_cloneRK6Tensor"]], "uvm_to_device (c++ function)": [[8, "_CPPv413uvm_to_deviceRK6TensorRK6Tensor"]], "all_to_one_device (c++ function)": [[9, "_CPPv417all_to_one_deviceNSt6vectorIN2at6TensorEEEN2at6DeviceE"]], "permute_pooled_embs_auto_grad (c++ function)": [[9, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor"]], "permute_pooled_embs_auto_grad_cpu (c++ function)": [[9, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor"]], "permute_pooled_embs_auto_grad_gpu (c++ function)": [[9, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor"]], "permute_pooled_embs_auto_grad_split_cpu (c++ function)": [[9, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE"]], "permute_pooled_embs_auto_grad_split_gpu (c++ function)": [[9, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE"]], "permute_pooled_embs_cpu_impl (c++ function)": [[9, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb"]], "permute_pooled_embs_split_cpu (c++ function)": [[9, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE"]], "permute_pooled_embs_split_gpu (c++ function)": [[9, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE"]], "fp8quantizedtofloat_ref (c++ function)": [[10, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi"]], "fp8rowwise_to_float_cpu (c++ function)": [[10, "_CPPv423FP8rowwise_to_float_cpuRK6TensorbK7int64_t"]], "floattofp8quantized_ref (c++ function)": [[10, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd"]], "_fp8rowwise_to_float_gpu (c++ function)": [[10, "_CPPv424_FP8rowwise_to_float_gpuRKN2at6TensorEbK7int64_t"]], "_bfloat16_to_float_gpu (c++ function)": [[10, "_CPPv422_bfloat16_to_float_gpuRKN2at6TensorE"]], "_float_to_fp8rowwise_gpu (c++ function)": [[10, "_CPPv424_float_to_FP8rowwise_gpuRK6TensorKb"]], "_float_to_bfloat16_gpu (c++ function)": [[10, "_CPPv422_float_to_bfloat16_gpuRKN2at6TensorE"]], "_float_to_fused8bitrowwise_cpu_out (c++ function)": [[10, "_CPPv434_float_to_fused8bitrowwise_cpu_outR6TensorRK6Tensor"]], "_float_to_fused8bitrowwise_gpu (c++ function)": [[10, "_CPPv430_float_to_fused8bitrowwise_gpuRK6Tensor"]], "_float_to_fusednbitrowwise_gpu (c++ function)": [[10, "_CPPv430_float_to_fusednbitrowwise_gpuRK6TensorK7int64_t"]], "_float_to_hfp8_gpu (c++ function)": [[10, "_CPPv418_float_to_hfp8_gpuRKN2at6TensorEK7int64_tK7int64_tKd"]], "_float_to_msfp_gpu (c++ function)": [[10, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd"]], "_float_to_paddedfp8rowwise_gpu (c++ function)": [[10, "_CPPv430_float_to_paddedFP8rowwise_gpuRK6TensorKbK7int64_t"]], "_fused8bitrowwise_to_float_cpu_out (c++ function)": [[10, "_CPPv434_fused8bitrowwise_to_float_cpu_outR6TensorRK6Tensor"]], "_fused8bitrowwise_to_float_gpu (c++ function)": [[10, "_CPPv430_fused8bitrowwise_to_float_gpuRKN2at6TensorE"]], "_fused8bitrowwise_to_float_mixed_dim_gpu (c++ function)": [[10, "_CPPv440_fused8bitrowwise_to_float_mixed_dim_gpuRKN2at6TensorERKN2at6TensorEK7int64_t"]], "_fused8bitrowwise_to_half_gpu (c++ function)": [[10, "_CPPv429_fused8bitrowwise_to_half_gpuRKN2at6TensorE"]], "_fused8bitrowwise_to_single_or_half_precision_gpu (c++ function)": [[10, "_CPPv449_fused8bitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tKbKb"]], "_fusednbitrowwise_to_float_gpu (c++ function)": [[10, "_CPPv430_fusednbitrowwise_to_float_gpuRKN2at6TensorEK7int64_t"]], "_fusednbitrowwise_to_half_gpu (c++ function)": [[10, "_CPPv429_fusednbitrowwise_to_half_gpuRKN2at6TensorEK7int64_t"]], "_fusednbitrowwise_to_single_or_half_precision_gpu (c++ function)": [[10, "_CPPv449_fusednbitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tK7int64_t"]], "_half_to_fused8bitrowwise_gpu (c++ function)": [[10, "_CPPv429_half_to_fused8bitrowwise_gpuRK6Tensor"]], "_half_to_fusednbitrowwise_gpu (c++ function)": [[10, "_CPPv429_half_to_fusednbitrowwise_gpuRKN2at6TensorEK7int64_t"]], "_hfp8_to_float_gpu (c++ function)": [[10, "_CPPv418_hfp8_to_float_gpuRKN2at6TensorEK7int64_tK7int64_t"]], "_msfp_to_float_gpu (c++ function)": [[10, "_CPPv418_msfp_to_float_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_t"]], "_paddedfp8rowwise_to_float_gpu (c++ function)": [[10, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t"]], "_single_or_half_precision_to_fused8bitrowwise_gpu (c++ function)": [[10, "_CPPv449_single_or_half_precision_to_fused8bitrowwise_gpuRK6Tensor"]], "_single_or_half_precision_to_fusednbitrowwise_gpu (c++ function)": [[10, "_CPPv449_single_or_half_precision_to_fusednbitrowwise_gpuRK6TensorK7int64_t"]], "float_or_half_to_fused8bitrowwise_cpu (c++ function)": [[10, "_CPPv437float_or_half_to_fused8bitrowwise_cpuRK6Tensor"]], "float_to_fp8rowwise_cpu (c++ function)": [[10, "_CPPv423float_to_FP8rowwise_cpuRK6Tensorb"]], "float_to_fused8bitrowwise_cpu (c++ function)": [[10, "_CPPv429float_to_fused8bitrowwise_cpuRK6Tensor"]], "fused8bitrowwise_to_float_cpu (c++ function)": [[10, "_CPPv429fused8bitrowwise_to_float_cpuRK6Tensor"]], "fused8bitrowwise_to_float_or_half_cpu (c++ function)": [[10, "_CPPv437fused8bitrowwise_to_float_or_half_cpuRK6TensorK7int64_tKbKb"]], "fused8bitrowwise_to_half_cpu (c++ function)": [[10, "_CPPv428fused8bitrowwise_to_half_cpuRK6Tensor"]], "fusednbitrowwise_sbfront_to_float_cpu (c++ function)": [[10, "_CPPv437fusednbitrowwise_sbfront_to_float_cpuRK6TensorK7int64_t"]], "fusednbitrowwise_to_float_cpu (c++ function)": [[10, "_CPPv429fusednbitrowwise_to_float_cpuRK6TensorK7int64_t"]], "fusednbitrowwise_to_float_or_half_cpu (c++ function)": [[10, "_CPPv437fusednbitrowwise_to_float_or_half_cpuRK6TensorK7int64_tK7int64_t"]], "fusednbitrowwise_to_half_cpu (c++ function)": [[10, "_CPPv428fusednbitrowwise_to_half_cpuRK6TensorK7int64_t"]], "half_to_fused8bitrowwise_cpu (c++ function)": [[10, "_CPPv428half_to_fused8bitrowwise_cpuRK6Tensor"]], "expand_into_jagged_permute_cuda (c++ function)": [[11, "_CPPv431expand_into_jagged_permute_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_t"]], "generic_histogram_binning_calibration_by_feature_cpu (c++ function)": [[11, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td"]], "histogram_binning_calibration_cpu (c++ function)": [[11, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td"]], "direct_mapped_lru_cache_populate_byte_cuda (c++ function)": [[12, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE"]], "direct_mapped_lxu_cache_lookup_cuda (c++ function)": [[12, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE"]], "get_unique_indices_cuda (c++ function)": [[12, "_CPPv423get_unique_indices_cudaRKN2at6TensorEK7int64_tKb"]], "get_unique_indices_with_inverse_cuda (c++ function)": [[12, "_CPPv436get_unique_indices_with_inverse_cudaRKN2at6TensorEK7int64_tKbKb"]], "host_lxu_cache_slot (c++ function)": [[12, "_CPPv419host_lxu_cache_slot7int64_t7int64_t"]], "lfu_cache_populate_byte_cuda (c++ function)": [[12, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t"]], "lfu_cache_populate_cuda (c++ function)": [[12, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb"]], "linearize_cache_indices_cuda (c++ function)": [[12, "_CPPv428linearize_cache_indices_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKNSt8optionalIN2at6TensorEEEK7int64_tK7int64_t"]], "linearize_cache_indices_from_row_idx_cuda (c++ function)": [[12, "_CPPv441linearize_cache_indices_from_row_idx_cudaN2at6TensorEN2at6TensorEN2at6TensorE"]], "lru_cache_find_uncached_cuda (c++ function)": [[12, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorEKb"]], "lru_cache_populate_byte_cuda (c++ function)": [[12, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE"]], "lru_cache_populate_cuda (c++ function)": [[12, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE"]], "lxu_cache_flush_cuda (c++ function)": [[12, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb"]], "lxu_cache_locations_update_cuda (c++ function)": [[12, "_CPPv431lxu_cache_locations_update_cudaN2at6TensorEN2at6TensorENSt8optionalIN2at6TensorEEE"]], "lxu_cache_locking_counter_decrement_cuda (c++ function)": [[12, "_CPPv440lxu_cache_locking_counter_decrement_cudaN2at6TensorEN2at6TensorE"]], "lxu_cache_lookup_cuda (c++ function)": [[12, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEE"]], "reset_weight_momentum_cuda (c++ function)": [[12, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t"]], "rocksdbwritemode (c++ enum)": [[13, "_CPPv416RocksdbWriteMode"]], "rocksdbwritemode::bwd_l1_cnflct_miss_write_back (c++ enumerator)": [[13, "_CPPv4N16RocksdbWriteMode29BWD_L1_CNFLCT_MISS_WRITE_BACKE"]], "rocksdbwritemode::flush (c++ enumerator)": [[13, "_CPPv4N16RocksdbWriteMode5FLUSHE"]], "rocksdbwritemode::fwd_l1_eviction (c++ enumerator)": [[13, "_CPPv4N16RocksdbWriteMode15FWD_L1_EVICTIONE"]], "rocksdbwritemode::fwd_rocksdb_read (c++ enumerator)": [[13, "_CPPv4N16RocksdbWriteMode16FWD_ROCKSDB_READE"]], "compact_indices_cuda (c++ function)": [[13, "_CPPv420compact_indices_cudaNSt6vectorI6TensorEE6TensorNSt6vectorI6TensorEE6Tensor6Tensor"]], "cuda_callback_func (c++ function)": [[13, "_CPPv418cuda_callback_func12cudaStream_t11cudaError_tPv"]], "hash_shard (c++ function)": [[13, "_CPPv410hash_shard7int64_t6size_t"]], "kv_db::cachecontext (c++ class)": [[13, "_CPPv4N5kv_db12CacheContextE"]], "kv_db::embeddingkvdb (c++ class)": [[13, "_CPPv4N5kv_db13EmbeddingKVDBE"]], "kv_db::queueitem (c++ struct)": [[13, "_CPPv4N5kv_db9QueueItemE"]], "l2_cache::cachelibcache (c++ class)": [[13, "_CPPv4N8l2_cache13CacheLibCacheE"]], "masked_index_put_cuda (c++ function)": [[13, "_CPPv421masked_index_put_cuda6Tensor6Tensor6Tensor6TensorKbK7int64_t"]], "masked_index_select_cuda (c++ function)": [[13, "_CPPv424masked_index_select_cuda6Tensor6Tensor6Tensor6TensorKbK7int64_t"]], "ps::embeddingparameterserver (c++ class)": [[13, "_CPPv4N2ps24EmbeddingParameterServerE"]], "ssd::embeddingrocksdb (c++ class)": [[13, "_CPPv4N3ssd16EmbeddingRocksDBE"]], "ssd_generate_row_addrs_cuda (c++ function)": [[13, "_CPPv427ssd_generate_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor"]], "ssd_update_row_addrs_cuda (c++ function)": [[13, "_CPPv425ssd_update_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor"]], "batched_dense_vec_jagged_2d_mul() (in module torch.ops.fbgemm)": [[18, "torch.ops.fbgemm.batched_dense_vec_jagged_2d_mul"]], "dense_to_jagged() (in module torch.ops.fbgemm)": [[18, "torch.ops.fbgemm.dense_to_jagged"]], "jagged_1d_to_dense() (in module torch.ops.fbgemm)": [[18, "torch.ops.fbgemm.jagged_1d_to_dense"]], "jagged_2d_to_dense() (in module torch.ops.fbgemm)": [[18, "torch.ops.fbgemm.jagged_2d_to_dense"]], "jagged_dense_dense_elementwise_add_jagged_output() (in module torch.ops.fbgemm)": [[18, "torch.ops.fbgemm.jagged_dense_dense_elementwise_add_jagged_output"]], "jagged_dense_elementwise_add() (in module torch.ops.fbgemm)": [[18, "torch.ops.fbgemm.jagged_dense_elementwise_add"]], "jagged_dense_elementwise_add_jagged_output() (in module torch.ops.fbgemm)": [[18, "torch.ops.fbgemm.jagged_dense_elementwise_add_jagged_output"]], "jagged_dense_elementwise_mul() (in module torch.ops.fbgemm)": [[18, "torch.ops.fbgemm.jagged_dense_elementwise_mul"]], "jagged_to_padded_dense() (in module torch.ops.fbgemm)": [[18, "torch.ops.fbgemm.jagged_to_padded_dense"]], "stacked_jagged_1d_to_dense() (in module torch.ops.fbgemm)": [[18, "torch.ops.fbgemm.stacked_jagged_1d_to_dense"]], "stacked_jagged_2d_to_dense() (in module torch.ops.fbgemm)": [[18, "torch.ops.fbgemm.stacked_jagged_2d_to_dense"]], "splittablebatchedembeddingbagscodegen() (in module fbgemm_gpu.split_table_batched_embeddings_ops)": [[19, "fbgemm_gpu.split_table_batched_embeddings_ops.SplitTableBatchedEmbeddingBagsCodegen"]], "fbgemm_gpu": [[19, "module-fbgemm_gpu"]], "module": [[19, "module-fbgemm_gpu"]], "example_method (c++ function)": [[23, "_CPPv4I0_NSt6size_tEE14example_method7int32_t1Tf"]], "example_method() (in module fbgemm_gpu.docs.examples)": [[25, "fbgemm_gpu.docs.examples.example_method"]]}}) \ No newline at end of file +Search.setIndex({"docnames": ["fbgemm-cpp-api/QuantUtils", "fbgemm-cpp-api/tbe_cpu_autovec", "fbgemm-development/BuildInstructions", "fbgemm_gpu-cpp-api/embedding_ops", "fbgemm_gpu-cpp-api/experimental_ops", "fbgemm_gpu-cpp-api/input_combine", "fbgemm_gpu-cpp-api/jagged_tensor_ops", "fbgemm_gpu-cpp-api/layout_transform_ops", "fbgemm_gpu-cpp-api/memory_utils", "fbgemm_gpu-cpp-api/merge_pooled_embeddings", "fbgemm_gpu-cpp-api/quantize_ops", "fbgemm_gpu-cpp-api/sparse_ops", "fbgemm_gpu-cpp-api/split_table_batched_embeddings", "fbgemm_gpu-cpp-api/ssd_embedding_ops", "fbgemm_gpu-development/BuildInstructions", "fbgemm_gpu-development/InstallationInstructions", "fbgemm_gpu-development/TestInstructions", "fbgemm_gpu-overview/jagged-tensor-ops/JaggedTensorOps", "fbgemm_gpu-python-api/jagged_tensor_ops", "fbgemm_gpu-python-api/table_batched_embedding_ops", "general/ContactUs", "general/Contributing", "general/License", "general/documentation/Cpp", "general/documentation/Overview", "general/documentation/Python", "general/documentation/Sphinx", "index"], "filenames": ["fbgemm-cpp-api/QuantUtils.rst", "fbgemm-cpp-api/tbe_cpu_autovec.rst", "fbgemm-development/BuildInstructions.rst", "fbgemm_gpu-cpp-api/embedding_ops.rst", "fbgemm_gpu-cpp-api/experimental_ops.rst", "fbgemm_gpu-cpp-api/input_combine.rst", "fbgemm_gpu-cpp-api/jagged_tensor_ops.rst", "fbgemm_gpu-cpp-api/layout_transform_ops.rst", "fbgemm_gpu-cpp-api/memory_utils.rst", "fbgemm_gpu-cpp-api/merge_pooled_embeddings.rst", "fbgemm_gpu-cpp-api/quantize_ops.rst", "fbgemm_gpu-cpp-api/sparse_ops.rst", "fbgemm_gpu-cpp-api/split_table_batched_embeddings.rst", "fbgemm_gpu-cpp-api/ssd_embedding_ops.rst", "fbgemm_gpu-development/BuildInstructions.rst", "fbgemm_gpu-development/InstallationInstructions.rst", "fbgemm_gpu-development/TestInstructions.rst", "fbgemm_gpu-overview/jagged-tensor-ops/JaggedTensorOps.rst", "fbgemm_gpu-python-api/jagged_tensor_ops.rst", "fbgemm_gpu-python-api/table_batched_embedding_ops.rst", "general/ContactUs.rst", "general/Contributing.rst", "general/License.rst", "general/documentation/Cpp.rst", "general/documentation/Overview.rst", "general/documentation/Python.rst", "general/documentation/Sphinx.rst", "index.rst"], "titles": ["Quantization Utilities", "TBE CPU Autovectorization", "Build Instructions", "Embedding Operators", "Experimental Operators", "Combine Input Operators", "Jagged Tensor Operators", "Layout Transformation Operators", "CUDA Memory Operators", "Pooled Embeddings Operators", "Quantization Operators", "Sparse Data Operators", "Table Batched Embedding Operators", "SSD Embedding Operators", "Build Instructions", "Installation Instructions", "Test Instructions", "Jagged Tensor Operators", "Jagged Tensor Operators", "Table Batched Embedding (TBE) Operators", "Contact Us", "Contributing", "License", "Adding Documentation to C++ Code", "Documentation", "Adding Documentation to Python Code", "Sphinx Documentation Pointers", "FBGEMM and FBGEMM_GPU Documentation Homepage"], "terms": {"templat": [0, 1, 14, 23], "typenam": [0, 1, 23], "t": [0, 2, 4, 8, 11, 14, 19, 21, 23, 24], "layout_t": 0, "layout": [0, 27], "kcx": 0, "void": [0, 3, 8, 10, 12, 13], "quantizegroupwis": 0, "const": [0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 25], "float": [0, 1, 10, 18, 19, 23, 25], "src": 0, "int": [0, 1, 10, 18, 19, 23, 25], "k": [0, 4], "c": [0, 12, 15, 17, 22, 24, 25, 26], "x": [0, 6, 13, 17, 23, 25], "g": [0, 2, 11, 13, 14, 23, 25], "scale": [0, 1, 4, 10], "std": [0, 1, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 23, 25], "int32_t": [0, 1, 23, 25], "zero_point": 0, "dst": 0, "point": [0, 10, 18, 23, 25], "data": [0, 1, 4, 8, 13, 17, 19, 22, 27], "type": [0, 1, 2, 4, 10, 15, 17, 18, 19, 23], "paramet": [0, 1, 4, 8, 10, 11, 13, 18, 19, 23, 24, 25], "output": [0, 1, 4, 6, 10, 11, 13, 18, 19, 23, 25], "int8_t": 0, "uint8_t": [0, 1, 10, 12], "ar": [0, 2, 6, 12, 13, 14, 15, 17, 18, 19, 22, 23, 24, 25], "support": [0, 2, 4, 13, 14, 15, 17, 25, 27], "input": [0, 1, 4, 6, 8, 10, 11, 13, 17, 18, 19, 23, 27], "tensor": [0, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13, 19, 24, 25, 27], "kxc": 0, "correspond": [0, 11, 12, 13, 17, 23, 25], "kcr": 0, "kctr": 0, "weight": [0, 1, 3, 11, 12, 13, 19], "time": [0, 2, 14, 15, 17], "dimens": [0, 4, 6, 8, 11, 17, 18, 19, 25], "krsc": 0, "ktrsc": 0, "channel": [0, 14, 15, 20], "number": [0, 1, 2, 4, 10, 11, 13, 14, 17, 18, 19, 24], "r": [0, 16, 24], "": [0, 2, 8, 14, 16, 17, 21, 23, 24, 25], "group": [0, 4, 17, 23], "function": [0, 2, 13, 14, 23, 25], "perform": [0, 2, 10, 11, 13, 17, 27], "channelwis": 0, "1": [0, 1, 2, 4, 11, 12, 13, 14, 15, 16, 17, 18, 19, 24, 25, 26], "groupwis": 0, "per": [0, 17], "size": [0, 2, 4, 8, 10, 11, 17, 18, 19], "should": [0, 10, 11, 12, 14, 15, 17, 21, 23, 24, 25], "equal": [0, 17, 25], "zero": [0, 18, 25], "reprsent": 0, "fusedquantizedequant": 0, "int64_t": [0, 1, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13], "len": [0, 17], "tensorquantizationparam": 0, "qparam": 0, "thread_id": 0, "0": [0, 2, 4, 10, 11, 12, 13, 14, 15, 17, 18, 19, 25], "num_thread": 0, "noise_ratio": 0, "0f": 0, "fuse": [0, 10, 19], "integ": [0, 8, 10, 17], "dequant": [0, 10], "kernel": [0, 2, 8, 10, 13, 16, 27], "acceler": 0, "awar": 0, "train": [0, 13, 19, 27], "fp32": [0, 1, 10, 19], "valu": [0, 6, 8, 10, 11, 12, 13, 18, 19, 23, 24, 25], "u": [0, 14, 26, 27], "int8": [0, 19], "us": [0, 1, 2, 4, 8, 11, 13, 14, 15, 16, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27], "provid": [0, 2, 13, 14, 15, 16, 22, 23, 24, 25, 27], "back": [0, 8, 12, 14, 15], "inputtyp": 0, "floatorhalftofusednbitrowwisequantizedsbhalf": 0, "bit_rat": [0, 10], "size_t": [0, 10, 13, 23], "input_row": 0, "input_column": 0, "convert": [0, 8, 10, 13, 17, 18, 25], "fp16": [0, 10, 19], "rowwis": [0, 10, 19], "bitrat": 0, "specifi": [0, 2, 4, 10, 11, 13, 14, 18, 19], "bit": [0, 1, 10], "bia": [0, 1, 4, 10], "each": [0, 1, 4, 10, 11, 13, 14, 17, 18, 19, 25], "row": [0, 1, 6, 10, 12, 13, 17, 18, 19, 25], "store": [0, 10, 11, 12, 13], "itself": [0, 17, 24], "end": [0, 1, 15, 17, 26], "can": [0, 1, 2, 10, 11, 13, 14, 15, 17, 23, 24, 25, 26], "4": [0, 10, 14, 15, 17, 18, 19, 25], "8": [0, 10, 14, 17, 19], "uint32_t": 0, "xor128": 0, "random": 0, "gener": [0, 2, 11, 13, 14, 15, 23, 26], "9": [0, 13, 14, 17, 19], "base": [0, 2, 11, 12, 13, 14, 17], "thi": [0, 2, 6, 8, 9, 10, 11, 13, 14, 15, 17, 20, 21, 22, 23, 25, 26, 27], "paper": 0, "findminmax": 0, "m": [0, 14, 15, 16], "min": 0, "max": [0, 4, 19], "find": [0, 12, 14], "matrix": [0, 2, 18, 27], "bool": [0, 1, 4, 8, 9, 10, 12, 13, 19], "a_symmetr": 0, "b_symmetr": 0, "quantizationgranular": 0, "q_gran": 0, "has_bia": 0, "fuse_relu": 0, "bias_typ": 0, "direct": [0, 12, 15, 22, 23, 25, 26], "fals": [0, 1, 8, 13, 19, 24], "requantizeoutputprocessingavx2": 0, "out": [0, 1, 14, 20, 22, 24], "inp": 0, "block_type_t": 0, "block": [0, 1, 23, 25, 26], "ld_out": 0, "ld_in": 0, "requantizationparams_t": 0, "requant": 0, "avx2": [0, 2], "i": [0, 1, 2, 4, 6, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24, 25, 26, 27], "c_per_g": 0, "requantizeoutputprocessinggconvavx512": 0, "avx512": 0, "intyp": 1, "indextyp": 1, "offsettyp": 1, "outtyp": 1, "embeddingspmdm_autovec": 1, "block_siz": 1, "output_s": [1, 11], "index_s": 1, "data_s": 1, "indic": [1, 3, 12, 13, 17, 19], "offsets_or_length": 1, "normalize_by_length": 1, "is_weight_posit": 1, "use_offset": 1, "true": [1, 8, 13, 19], "output_strid": 1, "input_strid": 1, "scale_bias_last": [1, 10], "no_bag": 1, "is_bf16_out": 1, "is_bf16_in": 1, "version": [1, 2, 15], "embeddingspmdm_ref": 1, "index": [1, 11, 12, 13, 14, 15, 17, 23, 25], "offset": [1, 3, 6, 11, 12, 18, 19], "element": [1, 10, 12, 13, 17], "address": [1, 2, 13, 14], "sum": [1, 4, 11, 13, 18, 19], "option": [1, 2, 3, 6, 8, 12, 14, 18, 19], "null": 1, "non": [1, 4, 8, 19], "whether": [1, 4, 8, 13, 14, 22], "normal": [1, 17], "length": [1, 4, 6, 11, 13, 18, 19, 25], "If": [1, 2, 13, 14, 15, 19, 21, 23, 24, 25], "posit": [1, 4, 11, 13, 19], "set": [1, 8, 12, 13, 16, 17, 18, 19], "instead": [1, 14, 24], "same": [1, 2, 4, 8, 11, 14, 17, 18, 23, 24, 25], "appear": [1, 15], "embed": [1, 2, 14, 15, 24, 27], "bag": [1, 11, 19, 27], "bfloat16": [1, 10], "embeddingspmdmfp8_autovec": 1, "exponent_bit": 1, "exponent_bia": [1, 10], "expon": 1, "note": [2, 12, 14, 15, 23, 24, 25, 26], "The": [2, 4, 8, 10, 11, 13, 15, 16, 17, 18, 19, 21, 23, 24, 25, 26], "most": [2, 14, 15, 17, 24], "date": [2, 14, 15, 24], "script": [2, 14, 15, 24], "bundl": [2, 14, 15, 24], "repo": [2, 14, 15, 24, 25], "under": [2, 14, 15, 21, 22, 24, 25], "setup_env": [2, 14, 15, 24], "bash": [2, 14, 15, 24], "step": [2, 13, 14, 15, 17, 24, 25], "fbgemm_gpu": [2, 8, 14, 17, 19, 20, 21, 22, 23, 25], "follow": [2, 11, 14, 15, 17, 22, 23, 24, 25], "toolchain": [2, 14, 15], "run": [2, 14, 15, 24], "cpu": [2, 8, 9, 16, 24, 27], "higher": 2, "In": [2, 11, 13, 14, 15, 17, 21, 23, 25], "doe": [2, 3, 15, 23, 24, 25], "have": [2, 10, 11, 12, 14, 17, 24], "ani": [2, 11, 14, 18, 21, 22, 24, 25], "intel": 2, "mkl": 2, "howev": [2, 14, 17, 22], "comparison": 2, "some": [2, 14, 17, 24], "benchmark": 2, "found": [2, 14, 15, 24], "path": [2, 13, 14, 16, 23, 26], "through": [2, 21, 23, 25], "intel_mkl_dir": 2, "variabl": 2, "built": [2, 14, 15, 24, 27], "report": [2, 15], "otherwis": [2, 8, 13, 15, 22], "subset": 2, "all": [2, 11, 12, 13, 14, 15, 17, 19, 22, 24], "three": [2, 17], "git": [2, 14], "submodul": [2, 14], "custom": [2, 26], "desir": [2, 14, 17, 18, 23], "thei": [2, 14, 24, 26], "asmjit_src_dir": 2, "cpuinfo_src_dir": 2, "googletest_source_dir": 2, "With": 2, "inner": [2, 17], "take": [2, 14], "one": [2, 4, 10, 11, 12, 14, 18, 19, 23, 25], "doesn": 2, "fit": [2, 22], "approach": 2, "so": [2, 11, 14, 15, 16, 17], "implement": [2, 4, 10, 13, 14, 17], "dynam": 2, "effici": [2, 27], "shape": [2, 4, 17, 19], "specif": [2, 11, 13, 14, 19, 22], "vector": [2, 5, 6, 7, 8, 9, 13, 18, 25], "code": [2, 13, 14, 22, 24], "third": 2, "parti": 2, "call": [2, 8, 13, 15], "detect": [2, 16], "runtim": [2, 14], "pytorch": [2, 13, 17, 20, 24, 25, 27], "project": [2, 21], "dispatch": [2, 8], "optim": [2, 10, 13, 19], "test": [2, 10, 14, 15, 21, 27], "you": [2, 21, 23, 25], "don": [2, 11, 14, 24], "want": [2, 21], "togeth": [2, 23, 24], "default": [2, 11, 14, 15, 19], "turn": [2, 24], "off": [2, 15, 20], "simpli": [2, 14], "fbgemm_build_test": 2, "conda": [2, 16, 24], "For": [2, 16, 17, 20, 22, 23, 24, 25, 26], "platform": [2, 14, 22], "machin": [2, 14, 15, 16, 27], "microsoft": [2, 10], "visual": 2, "studio": 2, "2019": 2, "newer": [2, 14], "recommend": [2, 6, 10, 14, 15, 17], "here": [2, 8, 14, 15, 21, 23, 24, 25, 26], "necessari": [2, 14], "ninja": [2, 14], "etc": [2, 14, 19], "n": [2, 10, 14, 15, 26], "env_nam": [2, 14, 15], "y": [2, 6, 14, 15, 18, 24], "doxygen": [2, 23, 24], "make": [2, 12, 14, 21, 23, 24, 25], "openbla": 2, "packag": [2, 14, 16, 24], "onli": [2, 4, 10, 11, 12, 13, 16, 17, 21, 23, 24, 26], "clone": [2, 14], "along": [2, 14, 15], "its": [2, 8, 10, 11, 14, 19, 22, 24, 26], "insid": [2, 13, 14, 15, 16, 24, 26], "recurs": [2, 14], "http": [2, 14, 15, 21, 23, 24, 25], "github": [2, 14, 21], "com": [2, 14, 21], "cd": [2, 14, 16, 24], "assum": [2, 11], "process": [2, 6, 13, 15, 17, 21, 25], "straightforward": 2, "creat": [2, 8, 14, 17, 21, 23, 25, 26], "directori": [2, 14, 16, 21, 23, 24], "mkdir": 2, "argument": [2, 11, 23, 24, 25], "build_arg": 2, "duse_sanit": 2, "dfbgemm_library_typ": 2, "share": [2, 8], "dpython_execut": 2, "which": [2, 11, 13, 14, 15, 17, 19, 24], "python3": [2, 15], "document": [2, 8, 21, 22], "dfbgemm_build_doc": 2, "ON": [2, 22], "j": [2, 17], "verbos": 2, "As": [2, 11, 14, 15, 17], "write": [2, 13, 14, 15, 24, 25], "fail": [2, 15, 16, 23], "due": [2, 14], "known": [2, 14, 19], "regress": 2, "To": [2, 13, 14, 16, 26], "work": [2, 14, 15, 17, 21], "around": 2, "append": [2, 14, 23, 25], "export": [2, 14, 16], "prior": [2, 14, 15, 22], "cflag": 2, "wno": 2, "error": [2, 10, 15, 23, 24, 25], "mayb": 2, "uniniti": 2, "restrict": 2, "cxxflag": 2, "pleas": [2, 21, 23, 25], "see": [2, 8, 14, 15, 17, 23, 25, 26], "77939": 2, "1094": 2, "1666": 2, "more": [2, 8, 14, 19, 23, 25, 26], "detail": [2, 13, 15], "exactli": 2, "extra": 2, "need": [2, 13, 14, 15, 16, 17, 21, 23, 25, 26], "ad": [2, 14, 21, 24], "invoc": [2, 14, 24], "llvm": [2, 14], "standard": [2, 14], "libc": [2, 14], "openmp": [2, 14], "libomp": 2, "locat": [2, 8, 12, 13, 14, 17], "cc_path": 2, "cxx_path": 2, "dcmake_c_compil": 2, "dcmake_cxx_compil": 2, "dcmake_c_flag": [2, 14], "fopenmp": 2, "stdlib": [2, 14], "conda_prefix": [2, 14], "includ": [2, 9, 13, 14, 22, 23, 25], "dcmake_cxx_flag": [2, 14], "likewis": 2, "also": [2, 13, 14, 19, 26], "veri": [2, 14, 23, 24, 25], "target": [2, 8, 10, 11, 14, 17, 23, 24, 25, 26], "architectur": [2, 14, 15], "bc": [2, 14], "x64": 2, "program": [2, 21], "file": [2, 14, 15, 20, 21, 23, 24, 25, 26], "x86": [2, 27], "enterpris": 2, "vc": 2, "auxiliari": 2, "vcvarsal": 2, "bat": 2, "build_dir": 2, "dfbgemm_build_benchmark": 2, "dcmake_build_typ": 2, "releas": [2, 15], "cl": 2, "ex": 2, "v": [2, 4, 6, 16, 18], "int_nbit_split_embedding_codegen_lookup_funct": 3, "dev_weight": [3, 12], "uvm_weight": [3, 12], "weights_plac": [3, 12], "weights_offset": [3, 12], "weights_ti": [3, 12], "d_offset": [3, 10, 12], "total_d": [3, 12, 19], "max_int2_d": 3, "max_int4_d": 3, "max_int8_d": 3, "max_float16_d": 3, "max_float32_d": 3, "pooling_mod": [3, 19], "indice_weight": 3, "output_dtyp": [3, 10, 19], "lxu_cache_weight": [3, 12, 13], "lxu_cache_loc": [3, 12, 13], "row_align": [3, 12], "max_float8_d": 3, "fp8_exponent_bit": 3, "fp8_exponent_bia": 3, "int_nbit_split_embedding_uvm_caching_codegen_lookup_funct": 3, "cache_hash_size_cumsum": [3, 12], "total_cache_hash_s": [3, 12], "cache_index_table_map": [3, 12], "lxu_cache_st": [3, 12], "lxu_stat": 3, "simlar": 3, "uvm_cach": 3, "lookup": [3, 12, 13], "pruned_hashmap_lookup_cuda": 3, "hash_tabl": 3, "hash_table_offset": 3, "pruned_array_lookup_cuda": 3, "index_remap": 3, "index_remappings_offset": 3, "bounds_check_indices_cuda": 3, "rows_per_t": 3, "bounds_check_mod": [3, 19], "warn": [3, 19, 23], "b_ofset": 3, "max_b": [3, 12], "int_nbit_split_embedding_codegen_lookup_function_cpu": 3, "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu": 3, "pruned_hashmap_insert_unweighted_cpu": 3, "dense_indic": 3, "pruned_hashmap_lookup_unweighted_cpu": 3, "pruned_array_lookup_cpu": 3, "tupl": [4, 5, 6, 11, 12, 13, 19], "gqa_attn_splitk": 4, "xq": 4, "cache_k": 4, "cache_v": 4, "seq_posit": 4, "doubl": [4, 6, 10, 11], "qk_scale": 4, "num_split_k": 4, "kv_cache_quant_num_group": 4, "use_tensor_cor": 4, "cache_logical_dtype_int": 4, "decod": 4, "queri": 4, "split": 4, "w": [4, 16], "bf16": [4, 10], "int4": [4, 10], "kv": 4, "cuda": [4, 9, 19, 27], "gqa": 4, "cach": [4, 12, 13, 14, 19], "It": [4, 13, 14, 15, 17], "current": [4, 13, 14, 15, 17, 19], "context": 4, "16384": 4, "fix": [4, 11], "head": 4, "128": 4, "an": [4, 8, 11, 13, 15, 16, 17, 19, 23, 24, 25, 26], "arbitrari": [4, 13], "b": [4, 11, 14, 17, 18, 19, 23, 24, 25, 26], "h_q": 4, "d": [4, 17, 18, 26], "where": [4, 6, 8, 11, 13, 17, 18, 19], "batch": [4, 6, 11, 17, 18, 27], "num": 4, "max_t": 4, "h_kv": 4, "sequenc": 4, "contain": [4, 8, 13, 14, 17, 18, 19, 25], "actual": [4, 14], "token": [4, 17], "appli": [4, 11, 14, 17, 19], "after": [4, 11, 13, 14, 15, 16, 17, 19, 24, 25, 26], "qk": 4, "control": 4, "amount": [4, 19], "parallel": [4, 13], "wise": [4, 17, 19], "fp8": [4, 10], "quantiz": [4, 27], "singl": [4, 8, 10, 13], "now": 4, "core": 4, "wmma": 4, "instruct": [4, 21, 23, 24, 25, 27], "fast": 4, "kv_cach": 4, "2": [4, 10, 13, 14, 15, 16, 17, 18, 19, 23, 25, 26], "return": [4, 8, 10, 11, 13, 18, 19, 23, 24, 25], "A": [4, 8, 10, 13, 14, 15, 17, 18, 19, 22, 23, 24, 25], "combin": [4, 27], "metadata": [4, 13], "softmax": 4, "tbe_input_combine_cpu": 5, "indices_list": 5, "offsets_list": 5, "per_sample_weight": [5, 19], "include_last_offset": 5, "padding_fused_tbe_input_combine_cpu": 5, "batch_siz": 5, "solv": 6, "issu": [6, 8, 14, 15, 20], "when": [6, 11, 13, 14, 16, 17, 19, 23, 24, 26], "differ": [6, 11, 13, 17], "often": 6, "occur": [6, 13, 23], "spars": [6, 17, 27], "featur": [6, 11, 14, 17, 19, 20], "system": [6, 14, 15, 17], "well": [6, 11, 14, 23], "natur": [6, 17], "languag": [6, 17, 26], "jagged_to_padded_dense_forward": 6, "c10": [6, 10], "symintarrayref": 6, "max_length": [6, 18], "padding_valu": [6, 18], "jagged_dense_elementwise_add_jagged_output_cuda": 6, "x_valu": [6, 18], "x_offset": [6, 18, 25], "dens": [6, 18, 25], "jagged_to_padded_dens": [6, 18], "jagged_dense_elementwise_add": [6, 18], "jagged_dense_elementwise_mul": [6, 18], "batched_dense_vec_jagged_2d_mul": [6, 18], "a_valu": [6, 18], "a_offset": [6, 18], "dense_to_jag": [6, 18], "symint": 6, "total_l": [6, 18], "jagged_dense_elementwise_add_jagged_output": [6, 18], "jagged_1d_to_dens": [6, 18], "max_l": 6, "jagged_2d_to_dens": [6, 14, 15, 18, 24, 25], "max_sequence_length": [6, 18, 25], "recat_embedding_grad_output_cuda": 7, "grad_output": 7, "num_features_per_rank": 7, "recat_embedding_grad_output_mixed_d_cuda": 7, "dim_sum_per_rank": 7, "recat_embedding_grad_output_mixed_d_batch_cuda": 7, "cumsum_dim_sum_per_rank": 7, "recat_embedding_grad_output_mixed_d_cpu": 7, "new_managed_tensor": 8, "self": [8, 13], "alloc": [8, 23], "unifi": 8, "manag": [8, 14, 15, 19], "uvm": [8, 16], "Then": 8, "prefer": [8, 13, 15], "storag": [8, 10, 12, 13], "host": [8, 14], "establish": 8, "map": [8, 11, 12, 13, 17, 19], "devic": [8, 9, 14, 19], "new": [8, 10, 12, 23, 24, 25], "new_managed_tensor_meta": 8, "placehold": 8, "meta": [8, 22], "kei": [8, 13], "empti": [8, 17, 18, 26], "new_host_mapped_tensor": 8, "new_unified_tensor": 8, "is_host_map": 8, "either": [8, 10, 11, 13, 14, 15], "depend": [8, 10, 14, 15, 17], "new_unified_tensor_meta": 8, "new_vanilla_managed_tensor": 8, "allow": [8, 14], "automat": [8, 11, 16, 24], "uvm_storag": 8, "check": [8, 19], "gpu": [8, 13, 14, 15, 16, 27], "is_uvm_tensor": 8, "BUT": [8, 22], "uvm_to_cpu": 8, "effect": [8, 17], "move": [8, 13], "from": [8, 10, 11, 12, 13, 14, 15, 16, 17, 19, 21, 22, 23, 24, 25, 26], "uvm_to_devic": 8, "prototyp": 8, "whose": 8, "uvm_cuda_mem_advis": 8, "cuda_memory_advis": 8, "cudamemadvis": 8, "cudamemoryadvis": 8, "enum": [8, 10, 13], "avail": [8, 14, 15, 16, 24], "python": [8, 13, 14, 16, 23, 24, 26], "side": [8, 13, 14, 23, 25, 27], "namespac": 8, "over": [8, 14], "valid": 8, "inform": [8, 15, 17, 25, 26], "uvm_cuda_mem_prefetch_async": 8, "device_t": 8, "cudamemprefetchasync": 8, "prefetch": [8, 13], "destin": 8, "uvm_mem_advice_dont_fork": 8, "madvis": 8, "madv_dontfork": 8, "workaround": 8, "driver": [8, 14], "un": 8, "page": [8, 21, 26, 27], "tabl": [8, 11, 17, 27], "fork": [8, 21], "caus": [8, 14, 15, 22, 24], "slowdown": 8, "next": [8, 13, 17, 23, 25], "access": [8, 13, 19], "uvm_to_cpu_clon": 8, "copi": 8, "contigu": [8, 11], "thread": [8, 13], "memcpi": 8, "section": [9, 14, 15, 25], "variou": 9, "all_to_one_devic": 9, "inputtensor": 9, "target_devic": 9, "permute_pooled_embs_split_gpu": 9, "pooled_emb": 9, "offset_dim_list": 9, "permute_list": 9, "inv_offset_dim_list": 9, "inv_permute_list": 9, "permute_pooled_embs_auto_grad_split_gpu": 9, "permute_pooled_embs_auto_grad_gpu": 9, "permute_pooled_embs_cpu_impl": 9, "allow_dupl": 9, "permute_pooled_embs_split_cpu": 9, "permute_pooled_embs_auto_grad_split_cpu": 9, "permute_pooled_embs_auto_grad": 9, "permute_pooled_embs_auto_grad_cpu": 9, "model": [10, 11], "techniqu": 10, "reduc": [10, 13], "larg": [10, 14], "order": [10, 17, 21], "achiev": [10, 15], "better": [10, 13, 23], "small": 10, "loss": [10, 22], "accuraci": 10, "_float_to_bfloat16_gpu": 10, "brain": 10, "_bfloat16_to_float_gpu": 10, "_float_to_fp8rowwise_gpu": 10, "forward": 10, "dtype": [10, 19], "sparsetyp": [10, 19], "throw": [10, 23], "_fp8rowwise_to_float_gpu": 10, "represent": [10, 17], "_float_to_fused8bitrowwise_gpu": 10, "_half_to_fused8bitrowwise_gpu": 10, "half": 10, "_single_or_half_precision_to_fused8bitrowwise_gpu": 10, "_fused8bitrowwise_to_float_gpu": 10, "_fused8bitrowwise_to_half_gpu": 10, "_fused8bitrowwise_to_single_or_half_precision_gpu": 10, "quant_padding_float_typ": 10, "_fused8bitrowwise_to_float_mixed_dim_gpu": 10, "kfloat": 10, "khalf": 10, "_float_to_fusednbitrowwise_gpu": 10, "_half_to_fusednbitrowwise_gpu": 10, "_single_or_half_precision_to_fusednbitrowwise_gpu": 10, "_fusednbitrowwise_to_float_gpu": 10, "_fusednbitrowwise_to_half_gpu": 10, "_fusednbitrowwise_to_single_or_half_precision_gpu": 10, "_float_to_hfp8_gpu": 10, "ebit": 10, "max_po": 10, "hybrid": 10, "hfp8": 10, "_hfp8_to_float_gpu": 10, "_float_to_msfp_gpu": 10, "bounding_box_s": 10, "mbit": 10, "min_po": 10, "msfp": 10, "_msfp_to_float_gpu": 10, "_float_to_paddedfp8rowwise_gpu": 10, "row_dim": 10, "pad": [10, 13, 17, 18, 25], "_paddedfp8rowwise_to_float_gpu": 10, "output_last_dim": 10, "_fused8bitrowwise_to_float_cpu_out": 10, "_float_to_fused8bitrowwise_cpu_out": 10, "float_to_fused8bitrowwise_cpu": 10, "half_to_fused8bitrowwise_cpu": 10, "float_or_half_to_fused8bitrowwise_cpu": 10, "fused8bitrowwise_to_float_cpu": 10, "fused8bitrowwise_to_half_cpu": 10, "fused8bitrowwise_to_float_or_half_cpu": 10, "float_to_fp8rowwise_cpu": 10, "fp8rowwise_to_float_cpu": 10, "fusednbitrowwise_to_float_cpu": 10, "fusednbitrowwise_sbfront_to_float_cpu": 10, "int2": 10, "front": 10, "float32": 10, "torch": [10, 13, 14, 15, 18, 19, 24, 25], "quint4x2": 10, "quint2x4": 10, "quantizedcpu": 10, "backend": [10, 27], "purpos": [10, 17, 18, 19, 22], "becaus": [10, 14, 17], "refer": [10, 14, 17, 24, 25], "rate": [10, 19], "hold": [10, 13, 17], "fusednbitrowwise_to_half_cpu": 10, "fusednbitrowwise_to_float_or_half_cpu": 10, "floattofp8quantized_ref": 10, "nrow": 10, "ncol": 10, "fp8quantizedtofloat_ref": 10, "expand_into_jagged_permute_cuda": 11, "permut": 11, "input_offset": 11, "output_offset": 11, "expand_into_jagged_permut": 11, "expand": 11, "case": [11, 14, 15, 17, 21], "ha": [11, 13, 15, 17, 21, 23, 24], "across": [11, 14], "rank": [11, 17], "level": 11, "exclus": [11, 13], "op": [11, 15, 18, 25], "sit": 11, "we": [11, 13, 14, 17, 21], "deriv": [11, 17, 22], "arrai": [11, 18, 25], "comput": [11, 14, 15, 19], "formula": 11, "output_permut": 11, "table_offset": 11, "bag_offset": 11, "histogram_binning_calibration_cpu": 11, "logit": 11, "bin_num_exampl": 11, "bin_num_posit": 11, "positive_weight": 11, "lower_bound": 11, "upper_bound": 11, "bin_ctr_in_use_aft": 11, "bin_ctr_weight_valu": 11, "divid": [11, 17], "predict": 11, "rang": [11, 13, 17], "e": [11, 13, 14, 17, 23, 25, 26], "bin": [11, 14], "two": [11, 17, 18, 19, 24], "exampl": [11, 13, 14, 15, 16, 18, 19, 23, 24, 25, 26], "fall": [11, 14, 15], "bucket": [11, 14], "basic": [11, 13, 25], "histogram": 11, "result": [11, 13, 14, 18], "statist": 11, "real": 11, "ctr": 11, "num_po": 11, "num_exampl": 11, "final": 11, "calibr": 11, "pre": [11, 15], "cali": 11, "wai": [11, 22], "within": 11, "suffici": [11, 21, 24], "That": 11, "fine": 11, "grain": 11, "modul": [11, 14, 15, 19, 25], "theoret": 11, "layer": [11, 13], "uncalibr": 11, "befor": [11, 13, 14, 19, 26], "sigmoid": 11, "calibart": 11, "pass": [11, 19, 21, 24], "lower": 11, "bound": [11, 17], "calibration_target": 11, "observ": 11, "statisct": 11, "final_calibrated_predict": 11, "bin_ctr_weight": 11, "bin_ctr": 11, "calibrated_predict": 11, "bin_id": 11, "generic_histogram_binning_calibration_by_feature_cpu": 11, "segment_valu": 11, "segment_length": 11, "num_seg": 11, "bin_boundari": 11, "extens": [11, 23, 24], "ectr": 11, "abov": [11, 13, 15, 17, 22, 23, 25, 26], "accept": [11, 21], "sort": [11, 12, 13, 14], "keyjaggedtensor": 11, "num_bin": 11, "longer": [11, 20, 23], "still": [11, 14], "parambin_ctr_weight_valu": 11, "get_unique_indices_cuda": 12, "linear_indic": 12, "max_indic": 12, "compute_count": 12, "dedupl": 12, "get_unique_indices_with_inverse_cuda": 12, "compute_inverse_indic": 12, "lru_cache_find_uncached_cuda": 12, "unique_indic": 12, "unique_indices_length": [12, 13], "time_stamp": 12, "lru_stat": 12, "gather_cache_stat": 12, "uvm_cache_stat": 12, "lock_cache_lin": 12, "lxu_cache_locking_count": 12, "lru": [12, 13, 19], "uncach": [12, 13], "them": 12, "host_lxu_cache_slot": 12, "h_in": 12, "cache_set": [12, 19], "linearize_cache_indices_cuda": 12, "b_offset": 12, "indices_base_offset": 12, "linear": [12, 13], "uniqu": [12, 13, 26], "linearize_cache_indices_from_row_idx_cuda": 12, "update_table_indic": 12, "update_row_indic": 12, "format": [12, 24, 25], "inplac": 12, "updat": [12, 13, 14, 15, 16, 19, 21], "lru_cache_populate_cuda": 12, "hash_size_cumsum": 12, "linear_cache_indic": 12, "stochastic_round": [12, 19], "fetch": [12, 13], "insert": [12, 13, 26], "timestep": 12, "lru_cache_populate_byte_cuda": 12, "byte": [12, 13], "direct_mapped_lru_cache_populate_byte_cuda": 12, "lxu_cache_miss_timestamp": 12, "assoc": 12, "variant": [12, 14, 15, 24], "lfu_cache_populate_cuda": 12, "lfu_stat": 12, "lfu": [12, 19], "lfu_cache_populate_byte_cuda": 12, "lxu_cache_lookup_cuda": 12, "invalid_index": 12, "num_uniq_cache_indic": 12, "lxu_cache_locations_output": 12, "look": [12, 19], "up": [12, 13, 16, 19], "slot": [12, 13], "sentinel": [12, 13], "miss": [12, 13, 14], "direct_mapped_lxu_cache_lookup_cuda": 12, "lxu_cache_flush_cuda": 12, "flush": [12, 13], "reset_weight_momentum_cuda": 12, "momentum1_dev": 12, "momentum1_uvm": 12, "momentum1_plac": 12, "momentum1_offset": 12, "pruned_indic": 12, "pruned_indices_offset": 12, "logical_table_id": 12, "buffer_id": 12, "lxu_cache_locking_counter_decrement_cuda": 12, "decrement": 12, "counter": 12, "lxu_cache_locations_update_cuda": 12, "lxu_cache_locations_new": 12, "rocksdbwritemod": 13, "rocksdb": 13, "mode": [13, 16, 19], "offload": 13, "3": [13, 14, 15, 17, 18, 19, 22, 25], "iter": 13, "fwd_rocksdb_read": 13, "l2": [13, 19], "fwd": 13, "fwd_l1_evict": 13, "l1": 13, "eviciton": 13, "evict": 13, "bwd_l1_cnflct_miss_write_back": 13, "conflict": 13, "bwd": 13, "fill": 13, "potenti": 13, "trigger": 13, "onc": [13, 15, 21], "full": [13, 14, 15, 26], "addition": 13, "do": [13, 14, 15, 21], "io": 13, "enumer": 13, "inlin": [13, 26], "hash_shard": 13, "id": [13, 15], "num_shard": 13, "hash": 13, "shard": 13, "algorithm": [13, 19], "cuda_callback_func": 13, "cudastream_t": 13, "stream": [13, 14], "cudaerror_t": 13, "statu": 13, "functor": 13, "callback": 13, "cudastreamaddcallback": 13, "common": [13, 14, 15, 17, 25], "cudastreamcallback_t": 13, "cast": 13, "invok": [13, 14, 24], "delet": 13, "anoth": [13, 26], "none": [13, 19], "masked_index_put_cuda": 13, "count": 13, "use_pipelin": 13, "preferred_sm": 13, "similar": [13, 14, 17, 19], "index_put": 13, "ignor": [13, 16, 19, 24], "2d": [13, 17, 18, 19, 25], "put": [13, 24], "equival": [13, 17], "filter_": 13, "indices_": 13, "nonzero": 13, "flatten": 13, "1d": [13, 18, 19, 25], "flag": [13, 14, 24], "overlap": 13, "other": [13, 15, 17, 22, 23, 24, 25], "fraction": 13, "sm": 13, "resourc": 13, "competit": 13, "masked_index_select_cuda": 13, "index_select": 13, "ssd_generate_row_addrs_cuda": 13, "assigned_cache_slot": 13, "linear_index_inverse_indic": 13, "unique_indices_count_cumsum": 13, "cache_set_inverse_indic": 13, "inserted_ssd_weight": 13, "cache_set_sorted_unique_indic": 13, "memori": [13, 15, 19, 27], "tbe": [13, 27], "retriev": 13, "scratch": [13, 15], "hbm": [13, 19], "lxu": [13, 19], "associ": 13, "enabl": [13, 14, 16], "conveni": 13, "first": [13, 14, 23, 25, 26], "pointer": [13, 24], "moreov": 13, "list": [13, 14, 17, 18, 19, 22, 23, 25], "post": 13, "backward": [13, 19], "origin": 13, "being": [13, 14, 24], "prefix": [13, 14, 26], "ssd_update_row_addrs_cuda": 13, "ssd_row_addrs_curr": 13, "inserted_ssd_weights_curr_next_map": 13, "lxu_cache_locations_curr": 13, "linear_index_inverse_indices_curr": 13, "unique_indices_count_cumsum_curr": 13, "cache_set_inverse_indices_curr": 13, "inserted_ssd_weights_next": 13, "unique_indices_length_curr": 13, "pipelin": 13, "dure": [13, 14, 17, 19, 25], "reloc": 13, "correct": [13, 14], "between": [13, 17, 23, 24, 26], "been": [13, 14, 23], "compact_indices_cuda": 13, "compact_indic": 13, "compact_count": 13, "mask": 13, "compact": 13, "given": [13, 14, 17, 18], "operat": 13, "remov": 13, "7": [13, 14, 15, 17, 18, 19], "5": [13, 14, 17, 19], "repres": [13, 17], "keep": [13, 14], "class": [13, 24, 25], "cachelibcach": 13, "cachelib_cach": 13, "h": [13, 14, 18, 23], "cachelib": 13, "wrapper": 13, "cachlib": 13, "interact": 13, "maintain": 13, "relat": [13, 17], "initi": 13, "state": [13, 14], "logic": [13, 17, 23], "caller": 13, "reset": 13, "captur": 13, "delai": 13, "markus": 13, "boost": 13, "get": 13, "handl": [13, 17], "read": [13, 17, 19], "done": [13, 14, 15], "embeddingparameterserv": 13, "public": [13, 21, 24], "embeddingkvdb": 13, "ps_table_batched_embed": 13, "servic": [13, 22], "tp": 13, "client": 13, "cachecontext": 13, "kv_db_table_batched_embed": 13, "l2cach": 13, "num_miss": 13, "cached_addr_list": 13, "prealloc": 13, "invalid": 13, "spot": 13, "stai": 13, "struct": 13, "queueitem": 13, "queue": 13, "item": [13, 25], "background": 13, "param": [13, 23, 25], "read_handl": 13, "abstract": 13, "pair": [13, 26], "later": [13, 14], "separ": [13, 24], "get_cach": 13, "monitor": 13, "checkout": 13, "explan": 13, "enable_shared_from_thi": 13, "execut": [13, 15, 16], "dram": 13, "remot": 13, "scalabl": 13, "without": [13, 14, 22], "blow": 13, "subclass": 13, "embeddingrocksdb": 13, "ssd_table_batched_embed": 13, "fbgemm": [14, 15, 18, 20, 21, 22, 24, 25], "experiment": [14, 15, 27], "reproduc": [14, 15, 21, 22], "platform_nam": 14, "unam": 14, "miniconda_prefix": 14, "home": 14, "download": [14, 15], "wget": 14, "q": 14, "anaconda": 14, "miniconda3": 14, "latest": 14, "sh": 14, "o": [14, 15], "p": 14, "load": [14, 17, 25], "shortcut": 14, "bashrc": 14, "command": [14, 15, 23, 24], "against": [14, 16], "env": [14, 15], "name": [14, 15, 22, 23, 25], "python_vers": 14, "12": [14, 17, 19], "upgrad": 14, "pyopenssl": 14, "22": [14, 17], "requir": [14, 15, 16, 17, 19, 24, 25], "recent": [14, 15], "nvcc": 14, "capabl": [14, 16], "bare": 14, "metal": 14, "neither": [14, 22], "nor": [14, 22], "nvidia": 14, "present": [14, 25], "sinc": [14, 17], "pull": [14, 15, 24], "linux": [14, 15], "distribut": [14, 22], "ubuntu": 14, "04": 14, "11": [14, 15, 17], "entrypoint": 14, "devel": 14, "ubuntu22": 14, "rest": [14, 15], "mai": [14, 15, 17, 22], "construct": [14, 15, 17], "mechan": 14, "nvml": 14, "org": [14, 15, 25], "cuda_vers": 14, "label": 14, "verifi": [14, 15, 23, 25], "cuda_runtim": 14, "libnvidia": [14, 15], "ml": [14, 15], "libnccl": [14, 16], "printenv": 14, "extract": 14, "url": [14, 15], "builder": 14, "blob": 14, "main": [14, 21], "install_cuda": 14, "cudnn_url": 14, "redist": 14, "x86_64": 14, "26_cuda12": 14, "archiv": 14, "tar": 14, "xz": 14, "unpack": 14, "xvf": 14, "applic": [14, 15, 19, 23, 25], "alreadi": [14, 15, 21, 23, 25], "repositori": [14, 21], "cmake": 14, "configur": [14, 23], "amd": [14, 15], "minim": 14, "6": [14, 15, 17], "termin": 14, "while": [14, 24], "come": [14, 15], "reason": [14, 15, 24], "oper": [14, 15, 16, 27], "guid": [14, 25], "disabl": 14, "apt": 14, "prompt": 14, "debian_frontend": 14, "noninteract": 14, "db": 14, "radeon": 14, "amdgpu": 14, "focal": 14, "install_5": 14, "50601": 14, "1_all": 14, "deb": 14, "usecas": 14, "hiplibsdk": 14, "dkm": 14, "hipifi": 14, "hip": 14, "dev": 14, "20": 14, "sysroot": 14, "avoid": 14, "glibcxx": 14, "fbgemm_cpu": 14, "10": [14, 15, 17], "older": [14, 15], "accompani": [14, 24], "appropri": 14, "sysroot_linux": 14, "gcc_version": 14, "forg": [14, 24], "gxx_linux": 14, "64": [14, 17], "17": 14, "binari": [14, 22], "cento": 14, "librari": [14, 24, 27], "libstdc": 14, "what": [14, 24], "libcxx_path": 14, "print": [14, 15, 19, 25], "objdump": 14, "tc": 14, "grep": 14, "glibc_": 14, "sed": 14, "vu": 14, "cat": 14, "glibcxx_": 14, "possibl": [14, 17, 21, 22], "just": 14, "minimum": [14, 23, 24, 25], "llvm_version": 14, "16": [14, 17], "libcxx": 14, "outdat": 14, "aarch64": [14, 15], "cannot": 14, "explicitli": 14, "clangxx": 14, "rt": 14, "lib": [14, 15, 16], "ld_library_path": [14, 15, 16], "config": 14, "var": 14, "nvcc_prepend_flag": 14, "correctli": [14, 15, 16, 23, 24], "xcompil": 14, "ccbin": 14, "clangxx_path": 14, "unsupport": 14, "even": [14, 22], "though": [14, 15], "libstd": 14, "mean": [14, 17, 19], "regardless": 14, "scenario": 14, "binpath": 14, "overrid": 14, "exist": [14, 23, 25], "ln": 14, "sf": 14, "path_to_either_gcc_or_clang": 14, "cc": 14, "These": 14, "stage": [14, 17], "click": 14, "hypothesi": [14, 15], "jinja2": 14, "ncurs": 14, "numpi": [14, 15], "scikit": [14, 15], "offici": 14, "homepag": 14, "authorit": [14, 15, 24], "how": [14, 15, 16, 25], "nightli": [14, 15], "rc": 14, "alwai": 14, "reliabl": 14, "arriv": 14, "hour": 14, "than": [14, 17], "window": 14, "silent": 14, "both": [14, 20, 22, 24], "place": [14, 19], "artifact": 14, "select": 14, "thu": [14, 19], "import": [14, 15, 19, 25, 26], "much": [14, 23], "determinist": 14, "whl": [14, 15], "cu121": [14, 15], "rocm5": [14, 15], "ensur": [14, 15, 21], "properli": 14, "__version__": 14, "cuda_cmake_macro": 14, "gemm": 14, "via": 14, "manual": [14, 15, 23], "sha": 14, "pin": 14, "ci": [14, 15], "ci_commit_pin": 14, "txt": [14, 16, 24, 26], "dedb7bdf33": 14, "tag": [14, 23, 26], "fbgemm_vers": 14, "v0": 14, "fbgemm_": 14, "addit": [14, 16, 17, 18], "flow": 14, "becom": 14, "stale": 14, "problem": 14, "re": [14, 15], "attempt": 14, "failur": [14, 15], "clear": [14, 21], "py": [14, 15, 16, 24, 25], "clean": [14, 24], "must": [14, 15, 16, 17, 19, 22, 26], "package_nam": 14, "fbgemm_gpu_": 14, "convent": 14, "major": 14, "minor": 14, "py312": 14, "python_tag": 14, "determin": [14, 17], "processor": 14, "arch": 14, "python_plat_nam": 14, "manylinux2014_": 14, "maco": 14, "macosx_10_9_": 14, "arm64": 14, "macosx_11_0_": 14, "win_": 14, "cpu_onli": 14, "bdist_wheel": 14, "package_vari": 14, "plat": 14, "cxxprefix": 14, "presum": 14, "made": [14, 24], "debug": [14, 16], "assert": 14, "presenc": 14, "unabl": 14, "cudacxx": 14, "cuda_bin_path": 14, "cub": 14, "cub_dir": 14, "header": [14, 23, 26], "cudnn_include_dir": 14, "cudnn_librari": 14, "filepath": 14, "nvml_lib_path": 14, "nccl": [14, 16], "nccl_lib_path": 14, "sm70": [14, 15], "80": 14, "v100": [14, 15], "a100": [14, 15], "cuda_arch_list": 14, "unset": 14, "torch_cuda_arch_list": 14, "preced": 14, "dtorch_cuda_arch_list": 14, "By": [14, 21], "those": [14, 17, 18, 21, 25], "rocm_path": 14, "pytorch_rocm_arch": 14, "gfx906": 14, "gfx908": 14, "gfx90a": 14, "wiki": 14, "gentoo": 14, "rocminfo": 14, "gfx": 14, "dhip_root_dir": 14, "dtorch_use_hip_dsa": 14, "complet": [14, 21, 24], "lot": 14, "jinja": 14, "instanti": 14, "sure": [14, 21, 23, 25], "accident": 14, "cours": 14, "fbgemm_gpu_lib_path": 14, "fbgemm_gpu_pi": [14, 15], "defin": [14, 17, 23], "nm": 14, "gdcu": 14, "referenc": 14, "certain": 14, "gdc": 14, "merge_pooled_embed": [14, 15], "isol": [15, 24], "build": [15, 16, 23, 25, 27], "sm80": 15, "respect": 15, "guarante": 15, "especi": 15, "displai": [15, 26], "setup": 15, "smi": 15, "515": 15, "76": 15, "persist": 15, "bu": [15, 26], "disp": 15, "volatil": 15, "uncorr": 15, "ecc": 15, "fan": 15, "temp": 15, "perf": 15, "pwr": 15, "usag": [15, 24, 25], "cap": 15, "util": [15, 27], "mig": 15, "a10g": 15, "00000000": 15, "00": 15, "1e": 15, "31c": 15, "p0": 15, "59w": 15, "300w": 15, "0mib": 15, "23028mib": 15, "gi": 15, "pid": 15, "No": 15, "expos": 15, "imag": 15, "launch": 15, "toolkit": 15, "interfac": 15, "concis": 15, "info": [15, 23, 25], "dieedg": 15, "avgpwr": 15, "sclk": 15, "mclk": 15, "pwrcap": 15, "vram": 15, "33": 15, "0c": 15, "37": 15, "0w": 15, "300mhz": 15, "1200mhz": 15, "auto": [15, 24], "290": 15, "32": 15, "39": 15, "log": 15, "difficult": 15, "relev": [15, 23], "genai": 15, "triton_vers": 15, "45fff310c8": 15, "about": [15, 25], "link": [15, 24], "encount": 15, "signatur": [15, 24], "traceback": 15, "last": 15, "root": [15, 21], "miniconda": 15, "mycondaenv": 15, "site": 15, "_op": [15, 24], "line": [15, 25, 26], "565": 15, "__getattr__": 15, "overload_nam": 15, "_c": 15, "_jit_get_oper": 15, "qualified_op_nam": 15, "runtimeerror": 15, "except": [15, 23, 25], "wa": 15, "string": [15, 26], "post47": 15, "py3": 15, "egg": 15, "__init__": [15, 25], "21": 15, "_fbgemm_gpu_doc": 15, "noqa": 15, "f401": 15, "e402": 15, "18": 15, "569": 15, "rais": [15, 25], "attributeerror": [15, 25], "_opnamespac": 15, "object": [15, 17], "attribut": [15, 25], "cli": 15, "main_run": 15, "47": 15, "_zn6fbgemm48floatorhalftofusednbitrowwisequantizedsbhalfavx2itli2eeevpkt_miph": 15, "libtorch": 15, "visibl": 15, "incorrectli": [15, 24], "declar": [15, 23], "were": [15, 18], "pr": [15, 23, 24, 25], "1618": 15, "former": 15, "resolv": 15, "latter": 15, "seriou": 15, "tha": 15, "develop": [15, 24], "bench": 16, "good": [16, 22], "instal": [16, 24, 27], "pip": [16, 24], "pytest": 16, "rsx": 16, "pytestcollectionwarn": 16, "split_table_batched_embeddings_test": 16, "quantize_ops_test": 16, "sparse_ops_test": 16, "split_embedding_inference_converter_test": 16, "cuda_visible_devic": 16, "cuda_launch_block": 16, "involv": [16, 17], "rpath": 16, "fbgemm_test_with_rocm": 16, "hip_launch_block": 16, "split_table_batched_embeddings_benchmark": 16, "consecut": 17, "nestedtensor": 17, "raggedtensor": 17, "tensorflow": 17, "notabl": 17, "sentenc": 17, "maxlength": 17, "numel": 17, "greatest": 17, "divisor": 17, "smallest": 17, "sub": 17, "exclud": 17, "partit": 17, "impli": [17, 22], "denot": [17, 23, 25], "offest": 17, "outer": 17, "would": 17, "begin": 17, "maximum": [17, 18, 25], "densor": 17, "form": [17, 22], "figur": 17, "below": 17, "show": [17, 24], "accomod": 17, "At": [17, 23, 24, 25], "multipl": [17, 18, 19, 25, 27], "hadamard": 17, "product": [17, 22], "bmatrix": 17, "rightarrow": 17, "25": 17, "36": 17, "49": 17, "81": 17, "50": 17, "operand": 17, "word": 17, "ax": 17, "properti": 17, "elementwis": [17, 18], "start": [17, 18, 25, 26], "dim": 17, "onto": 17, "part": 17, "everi": 17, "converson": 17, "could": 17, "lead": 17, "smaller": 17, "expect": 17, "happen": 17, "give": 17, "situat": 17, "like": 17, "dense_tensor": 17, "jagged_tensor": 17, "break": 17, "exact": 17, "usual": 17, "area": 18, "outsid": 18, "coverag": 18, "total": [18, 19], "identit": 18, "add": [18, 21, 23, 24, 25], "structur": 18, "jagged_dense_dense_elementwise_add_jagged_output": 18, "y_0": 18, "y_1": 18, "multipli": [18, 19], "max_n": 18, "matmul": 18, "stacked_jagged_1d_to_dens": 18, "arg": [18, 25], "kwarg": 18, "stacked_jagged_2d_to_dens": 18, "split_table_batched_embeddings_op": 19, "splittablebatchedembeddingbagscodegen": 19, "embedding_spec": 19, "feature_table_map": 19, "cache_algorithm": 19, "cachealgorithm": 19, "cache_load_factor": 19, "cache_reserved_memori": 19, "cache_precis": 19, "weights_precis": 19, "enforce_hbm": 19, "optimtyp": 19, "exact_sgd": 19, "record_cache_metr": 19, "gradient_clip": 19, "max_gradi": 19, "learning_r": 19, "01": 19, "ep": 19, "0e": 19, "momentum": 19, "weight_decai": 19, "weight_decay_mod": 19, "weightdecaymod": 19, "eta": 19, "001": 19, "beta1": 19, "beta2": 19, "999": 19, "poolingmod": 19, "boundscheckmod": 19, "sourc": [19, 21, 22, 23, 24, 25], "embeddingloc": 19, "computedevic": 19, "spec": 19, "placement": 19, "capac": 19, "reserv": [19, 22], "adam": 19, "exact_adagrad": 19, "exact_rowwise_adagrad": 19, "lamb": 19, "lars_sgd": 19, "partial_rowwise_adam": 19, "partial_rowwise_lamb": 19, "sgd": 19, "recordcachemetr": 19, "record": 19, "hit": 19, "request": [19, 20, 24], "record_cache_miss_count": 19, "metric": 19, "record_tablewise_cache_miss": 19, "stochast": 19, "round": 19, "gradient": 19, "clip": 19, "learn": 19, "epsilon": 19, "adagrad": 19, "lar": 19, "decai": 19, "decoupl": 19, "pool": [19, 27], "boundari": 19, "fatal": 19, "conatin": 19, "column": 19, "feature_requires_grad": 19, "split_table_batched_embeddings_ops_common": 19, "split_table_batched_embeddings_ops_train": 19, "init_embedding_weights_uniform": 19, "split_embedding_weight": 19, "9426": 19, "7046": 19, "4214": 19, "0419": 19, "1331": 19, "7856": 19, "8124": 19, "2021": 19, "5771": 19, "5911": 19, "7792": 19, "1068": 19, "6203": 19, "4813": 19, "1677": 19, "4790": 19, "5587": 19, "0941": 19, "5754": 19, "3475": 19, "8952": 19, "1964": 19, "0810": 19, "4174": 19, "2513": 19, "4039": 19, "3775": 19, "3273": 19, "5399": 19, "0229": 19, "1455": 19, "8770": 19, "9520": 19, "4593": 19, "7169": 19, "6307": 19, "1765": 19, "8757": 19, "8614": 19, "2051": 19, "0603": 19, "9980": 19, "7958": 19, "5826": 19, "long": 19, "13": 19, "5197": 19, "2957": 19, "3578": 19, "1487": 19, "4873": 19, "3044": 19, "9801": 19, "2769": 19, "7164": 19, "8528": 19, "7159": 19, "6719": 19, "0784": 19, "2016": 19, "2176": 19, "1988": 19, "3825": 19, "5008": 19, "8991": 19, "1405": 19, "2637": 19, "9427": 19, "8902": 19, "3754": 19, "5013": 19, "6105": 19, "9968": 19, "3057": 19, "7621": 19, "9821": 19, "7314": 19, "6195": 19, "grad_fn": 19, "cppnode": 19, "splitlookupfunction_sgd_op": 19, "question": 20, "concern": 20, "discuss": 20, "kick": 20, "regard": 20, "feel": 20, "free": 20, "reach": 20, "easi": 21, "transpar": 21, "describ": 21, "activ": 21, "welcom": [21, 27], "your": [21, 24, 25], "branch": 21, "ve": 21, "chang": [21, 23, 25], "api": [21, 23, 24, 25], "suit": 21, "lint": 21, "haven": 21, "submit": [21, 23, 25], "facebook": [21, 22, 27], "open": 21, "track": 21, "bug": 21, "descript": [21, 23, 24, 25, 26], "abl": 21, "bounti": 21, "safe": 21, "disclosur": 21, "secur": 21, "go": 21, "outlin": 21, "agre": 21, "tree": 21, "claus": 22, "bsd": 22, "softwar": 22, "copyright": 22, "inc": 22, "affili": 22, "right": [22, 26], "redistribut": 22, "modif": 22, "permit": 22, "condit": 22, "met": 22, "retain": 22, "notic": 22, "disclaim": 22, "materi": 22, "contributor": 22, "endors": 22, "promot": 22, "written": 22, "permiss": 22, "BY": 22, "THE": 22, "holder": 22, "AND": 22, "AS": 22, "express": [22, 26], "OR": 22, "warranti": 22, "NOT": 22, "limit": [22, 24], "TO": 22, "OF": 22, "merchant": 22, "FOR": 22, "particular": 22, "IN": 22, "NO": 22, "event": 22, "shall": 22, "BE": 22, "liabl": 22, "indirect": 22, "incident": 22, "special": 22, "exemplari": 22, "consequenti": 22, "damag": 22, "procur": 22, "substitut": 22, "profit": 22, "busi": 22, "interrupt": 22, "theori": 22, "liabil": 22, "contract": 22, "strict": 22, "tort": 22, "neglig": 22, "aris": 22, "IF": 22, "advis": 22, "SUCH": 22, "javadoc": 23, "style": [23, 25], "comment": [23, 24, 26], "sphinx": [23, 24, 25], "breath": 23, "kept": 23, "cpp": [23, 25, 26], "cu": 23, "cuh": 23, "everyth": 23, "ifndef": 23, "doxygen_this_will_be_skip": 23, "endif": 23, "hidden": 23, "html": [23, 24, 25], "descriptionss": 23, "publish": [23, 25], "docstr": [23, 24, 25], "method": [23, 24, 25], "organ": 23, "yet": 23, "top": [23, 27], "defgroup": 23, "directli": [23, 25], "behavior": [23, 25], "tparam": 23, "thrown": [23, 25], "ingroup": 23, "brief": 23, "short": 23, "example_method": [23, 25], "def": [23, 25], "foo": [23, 25], "lst": [23, 25], "And": [23, 25], "verbatim": [23, 25], "text": [23, 25, 26], "diagram": [23, 25], "unpars": 23, "second": [23, 25], "prev": [23, 25], "usabl": [23, 25], "space": [23, 24, 25], "endcod": 23, "align": [23, 25], "param1": [23, 25], "param2": 23, "bad_alloc": 23, "logic_error": 23, "href": 23, "www": [23, 25], "nl": 23, "cmdlink": 23, "On": [23, 25], "doxygengroup": 23, "rst": [23, 25, 26], "content": [23, 26, 27], "toctre": [23, 25], "ini": 23, "taken": 23, "care": 23, "doc": [23, 24, 25, 26], "local": [23, 25], "netlifi": [23, 24, 25], "preview": [23, 25], "serv": 24, "yourself": 24, "shoe": 24, "who": 24, "understand": 24, "live": 24, "easier": 24, "leav": 24, "task": 24, "tool": 24, "graphviz": [24, 26], "assembl": 24, "view": 24, "prepend": 24, "sphinx_lint": 24, "technic": 24, "why": 24, "occasion": 24, "unresolv": 24, "might": 24, "opt": 24, "pycapsul": 24, "neg": 24, "silenc": 24, "nitpick": 24, "conf": 24, "domain": 24, "deploi": 24, "app": 24, "googl": 25, "c_size_t": 25, "ret": 25, "emplace_back": 25, "valueerror": 25, "14": 25, "restructuredtext": 25, "en": 25, "master": 25, "__": 25, "pep": 25, "0287": 25, "42": 25, "autofunct": 25, "c_ulong": 25, "mani": 25, "attach": 25, "fact": 25, "helper": 25, "codebas": 25, "add_doc": 25, "jag": [25, 27], "forc": 25, "hoc": 25, "the_new_doc_modul": 25, "remain": 25, "render": [25, 26], "anchor": 26, "_doc": 26, "underscor": 26, "_": 26, "There": 26, "elsewher": 26, "ref": 26, "literalinclud": 26, "rel": 26, "enclos": 26, "bracket": 26, "skiplin": 26, "suppli": 26, "math": 26, "k_": 26, "k_n": 26, "expressino": 26, "int_a": 26, "frac": 26, "2v": 26, "dx": 26, "left": 26, "dv": 26, "_a": 26, "du": 26, "digraph": 26, "altern": 26, "extern": 26, "dot": 26, "examplegraph": 26, "low": 27, "precis": 27, "high": 27, "convolut": 27, "server": 27, "infer": 27, "collect": 27, "transform": 27, "contribut": 27, "contact": 27, "licens": 27, "autovector": 27, "ssd": 27}, "objects": {"": [[13, 0, 1, "_CPPv4N16RocksdbWriteMode29BWD_L1_CNFLCT_MISS_WRITE_BACKE", "BWD_L1_CNFLCT_MISS_WRITE_BACK"], [1, 1, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec"], [1, 2, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::IndexType"], [1, 2, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::OffsetType"], [1, 2, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::OutType"], [1, 3, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::block_size"], [1, 3, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::data_size"], [1, 3, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::exponent_bias"], [1, 3, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::exponent_bits"], [1, 3, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::index_size"], [1, 3, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::indices"], [1, 3, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::input"], [1, 3, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::input_stride"], [1, 3, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::is_bf16_out"], [1, 3, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::is_weight_positional"], [1, 3, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::normalize_by_lengths"], [1, 3, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::offsets_or_lengths"], [1, 3, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::out"], [1, 3, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::output_size"], [1, 3, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::output_stride"], [1, 3, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::use_offsets"], [1, 3, 1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib", "EmbeddingSpMDMFP8_autovec::weights"], [1, 1, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec"], [1, 2, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::InType"], [1, 2, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::IndexType"], [1, 2, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::OffsetType"], [1, 2, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::OutType"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::block_size"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::data_size"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::index_size"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::indices"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::input"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::input_stride"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::is_bf16_in"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::is_bf16_out"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::is_weight_positional"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::no_bag"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::normalize_by_lengths"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::offsets_or_lengths"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::out"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::output_size"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::output_stride"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::scale_bias_last"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::use_offsets"], [1, 3, 1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb", "EmbeddingSpMDM_autovec::weights"], [13, 0, 1, "_CPPv4N16RocksdbWriteMode5FLUSHE", "FLUSH"], [10, 1, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref"], [10, 3, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref::ebits"], [10, 3, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref::exponent_bias"], [10, 3, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref::input"], [10, 3, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref::ncols"], [10, 3, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref::nrows"], [10, 3, 1, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi", "FP8QuantizedToFloat_ref::output"], [10, 1, 1, "_CPPv423FP8rowwise_to_float_cpuRK6TensorbK7int64_t", "FP8rowwise_to_float_cpu"], [10, 3, 1, "_CPPv423FP8rowwise_to_float_cpuRK6TensorbK7int64_t", "FP8rowwise_to_float_cpu::forward"], [10, 3, 1, "_CPPv423FP8rowwise_to_float_cpuRK6TensorbK7int64_t", "FP8rowwise_to_float_cpu::input"], [10, 3, 1, "_CPPv423FP8rowwise_to_float_cpuRK6TensorbK7int64_t", "FP8rowwise_to_float_cpu::output_dtype"], [13, 0, 1, "_CPPv4N16RocksdbWriteMode15FWD_L1_EVICTIONE", "FWD_L1_EVICTION"], [13, 0, 1, "_CPPv4N16RocksdbWriteMode16FWD_ROCKSDB_READE", "FWD_ROCKSDB_READ"], [0, 1, 1, "_CPPv410FindMinMaxPKfPfPf7int64_t", "FindMinMax"], [0, 3, 1, "_CPPv410FindMinMaxPKfPfPf7int64_t", "FindMinMax::len"], [0, 3, 1, "_CPPv410FindMinMaxPKfPfPf7int64_t", "FindMinMax::m"], [0, 3, 1, "_CPPv410FindMinMaxPKfPfPf7int64_t", "FindMinMax::max"], [0, 3, 1, "_CPPv410FindMinMaxPKfPfPf7int64_t", "FindMinMax::min"], [0, 1, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf"], [0, 2, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf::InputType"], [0, 3, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf::bit_rate"], [0, 3, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf::input"], [0, 3, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf::input_columns"], [0, 3, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf::input_rows"], [0, 3, 1, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE", "FloatOrHalfToFusedNBitRowwiseQuantizedSBHalf::output"], [10, 1, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref"], [10, 3, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::ebits"], [10, 3, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::exponent_bias"], [10, 3, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::input"], [10, 3, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::max_pos"], [10, 3, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::ncols"], [10, 3, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::nrows"], [10, 3, 1, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd", "FloatToFP8Quantized_ref::output"], [0, 1, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize"], [0, 2, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::T"], [0, 3, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::dst"], [0, 3, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::len"], [0, 3, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::noise_ratio"], [0, 3, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::num_threads"], [0, 3, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::qparams"], [0, 3, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::src"], [0, 3, 1, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif", "FusedQuantizeDequantize::thread_id"], [0, 1, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise"], [0, 3, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::C"], [0, 3, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::G"], [0, 3, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::K"], [0, 2, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::LAYOUT"], [0, 2, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::T"], [0, 3, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::X"], [0, 3, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::dst"], [0, 3, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::scales"], [0, 3, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::src"], [0, 3, 1, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T", "QuantizeGroupwise::zero_points"], [13, 4, 1, "_CPPv416RocksdbWriteMode", "RocksdbWriteMode"], [13, 0, 1, "_CPPv4N16RocksdbWriteMode29BWD_L1_CNFLCT_MISS_WRITE_BACKE", "RocksdbWriteMode::BWD_L1_CNFLCT_MISS_WRITE_BACK"], [13, 0, 1, "_CPPv4N16RocksdbWriteMode5FLUSHE", "RocksdbWriteMode::FLUSH"], [13, 0, 1, "_CPPv4N16RocksdbWriteMode15FWD_L1_EVICTIONE", "RocksdbWriteMode::FWD_L1_EVICTION"], [13, 0, 1, "_CPPv4N16RocksdbWriteMode16FWD_ROCKSDB_READE", "RocksdbWriteMode::FWD_ROCKSDB_READ"], [0, 1, 1, "_CPPv46Xor128v", "Xor128"], [10, 1, 1, "_CPPv424_FP8rowwise_to_float_gpuRKN2at6TensorEbK7int64_t", "_FP8rowwise_to_float_gpu"], [10, 3, 1, "_CPPv424_FP8rowwise_to_float_gpuRKN2at6TensorEbK7int64_t", "_FP8rowwise_to_float_gpu::forward"], [10, 3, 1, "_CPPv424_FP8rowwise_to_float_gpuRKN2at6TensorEbK7int64_t", "_FP8rowwise_to_float_gpu::input"], [10, 3, 1, "_CPPv424_FP8rowwise_to_float_gpuRKN2at6TensorEbK7int64_t", "_FP8rowwise_to_float_gpu::output_dtype"], [10, 1, 1, "_CPPv422_bfloat16_to_float_gpuRKN2at6TensorE", "_bfloat16_to_float_gpu"], [10, 3, 1, "_CPPv422_bfloat16_to_float_gpuRKN2at6TensorE", "_bfloat16_to_float_gpu::input"], [10, 1, 1, "_CPPv424_float_to_FP8rowwise_gpuRK6TensorKb", "_float_to_FP8rowwise_gpu"], [10, 3, 1, "_CPPv424_float_to_FP8rowwise_gpuRK6TensorKb", "_float_to_FP8rowwise_gpu::forward"], [10, 3, 1, "_CPPv424_float_to_FP8rowwise_gpuRK6TensorKb", "_float_to_FP8rowwise_gpu::input"], [10, 1, 1, "_CPPv422_float_to_bfloat16_gpuRKN2at6TensorE", "_float_to_bfloat16_gpu"], [10, 3, 1, "_CPPv422_float_to_bfloat16_gpuRKN2at6TensorE", "_float_to_bfloat16_gpu::input"], [10, 1, 1, "_CPPv434_float_to_fused8bitrowwise_cpu_outR6TensorRK6Tensor", "_float_to_fused8bitrowwise_cpu_out"], [10, 3, 1, "_CPPv434_float_to_fused8bitrowwise_cpu_outR6TensorRK6Tensor", "_float_to_fused8bitrowwise_cpu_out::input"], [10, 3, 1, "_CPPv434_float_to_fused8bitrowwise_cpu_outR6TensorRK6Tensor", "_float_to_fused8bitrowwise_cpu_out::output"], [10, 1, 1, "_CPPv430_float_to_fused8bitrowwise_gpuRK6Tensor", "_float_to_fused8bitrowwise_gpu"], [10, 3, 1, "_CPPv430_float_to_fused8bitrowwise_gpuRK6Tensor", "_float_to_fused8bitrowwise_gpu::input"], [10, 1, 1, "_CPPv430_float_to_fusednbitrowwise_gpuRK6TensorK7int64_t", "_float_to_fusednbitrowwise_gpu"], [10, 3, 1, "_CPPv430_float_to_fusednbitrowwise_gpuRK6TensorK7int64_t", "_float_to_fusednbitrowwise_gpu::bit_rate"], [10, 3, 1, "_CPPv430_float_to_fusednbitrowwise_gpuRK6TensorK7int64_t", "_float_to_fusednbitrowwise_gpu::input"], [10, 1, 1, "_CPPv418_float_to_hfp8_gpuRKN2at6TensorEK7int64_tK7int64_tKd", "_float_to_hfp8_gpu"], [10, 3, 1, "_CPPv418_float_to_hfp8_gpuRKN2at6TensorEK7int64_tK7int64_tKd", "_float_to_hfp8_gpu::ebits"], [10, 3, 1, "_CPPv418_float_to_hfp8_gpuRKN2at6TensorEK7int64_tK7int64_tKd", "_float_to_hfp8_gpu::exponent_bias"], [10, 3, 1, "_CPPv418_float_to_hfp8_gpuRKN2at6TensorEK7int64_tK7int64_tKd", "_float_to_hfp8_gpu::input"], [10, 3, 1, "_CPPv418_float_to_hfp8_gpuRKN2at6TensorEK7int64_tK7int64_tKd", "_float_to_hfp8_gpu::max_pos"], [10, 1, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu"], [10, 3, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::bias"], [10, 3, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::bounding_box_size"], [10, 3, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::ebits"], [10, 3, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::input"], [10, 3, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::max_pos"], [10, 3, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::mbits"], [10, 3, 1, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd", "_float_to_msfp_gpu::min_pos"], [10, 1, 1, "_CPPv430_float_to_paddedFP8rowwise_gpuRK6TensorKbK7int64_t", "_float_to_paddedFP8rowwise_gpu"], [10, 3, 1, "_CPPv430_float_to_paddedFP8rowwise_gpuRK6TensorKbK7int64_t", "_float_to_paddedFP8rowwise_gpu::forward"], [10, 3, 1, "_CPPv430_float_to_paddedFP8rowwise_gpuRK6TensorKbK7int64_t", "_float_to_paddedFP8rowwise_gpu::input"], [10, 3, 1, "_CPPv430_float_to_paddedFP8rowwise_gpuRK6TensorKbK7int64_t", "_float_to_paddedFP8rowwise_gpu::row_dim"], [10, 1, 1, "_CPPv434_fused8bitrowwise_to_float_cpu_outR6TensorRK6Tensor", "_fused8bitrowwise_to_float_cpu_out"], [10, 3, 1, "_CPPv434_fused8bitrowwise_to_float_cpu_outR6TensorRK6Tensor", "_fused8bitrowwise_to_float_cpu_out::input"], [10, 3, 1, "_CPPv434_fused8bitrowwise_to_float_cpu_outR6TensorRK6Tensor", "_fused8bitrowwise_to_float_cpu_out::output"], [10, 1, 1, "_CPPv430_fused8bitrowwise_to_float_gpuRKN2at6TensorE", "_fused8bitrowwise_to_float_gpu"], [10, 3, 1, "_CPPv430_fused8bitrowwise_to_float_gpuRKN2at6TensorE", "_fused8bitrowwise_to_float_gpu::input"], [10, 1, 1, "_CPPv440_fused8bitrowwise_to_float_mixed_dim_gpuRKN2at6TensorERKN2at6TensorEK7int64_t", "_fused8bitrowwise_to_float_mixed_dim_gpu"], [10, 3, 1, "_CPPv440_fused8bitrowwise_to_float_mixed_dim_gpuRKN2at6TensorERKN2at6TensorEK7int64_t", "_fused8bitrowwise_to_float_mixed_dim_gpu::D_offsets"], [10, 3, 1, "_CPPv440_fused8bitrowwise_to_float_mixed_dim_gpuRKN2at6TensorERKN2at6TensorEK7int64_t", "_fused8bitrowwise_to_float_mixed_dim_gpu::input"], [10, 3, 1, "_CPPv440_fused8bitrowwise_to_float_mixed_dim_gpuRKN2at6TensorERKN2at6TensorEK7int64_t", "_fused8bitrowwise_to_float_mixed_dim_gpu::output_dtype"], [10, 1, 1, "_CPPv429_fused8bitrowwise_to_half_gpuRKN2at6TensorE", "_fused8bitrowwise_to_half_gpu"], [10, 3, 1, "_CPPv429_fused8bitrowwise_to_half_gpuRKN2at6TensorE", "_fused8bitrowwise_to_half_gpu::input"], [10, 1, 1, "_CPPv449_fused8bitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tKbKb", "_fused8bitrowwise_to_single_or_half_precision_gpu"], [10, 3, 1, "_CPPv449_fused8bitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tKbKb", "_fused8bitrowwise_to_single_or_half_precision_gpu::input"], [10, 3, 1, "_CPPv449_fused8bitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tKbKb", "_fused8bitrowwise_to_single_or_half_precision_gpu::output_dtype"], [10, 3, 1, "_CPPv449_fused8bitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tKbKb", "_fused8bitrowwise_to_single_or_half_precision_gpu::quant_padding_float_type"], [10, 3, 1, "_CPPv449_fused8bitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tKbKb", "_fused8bitrowwise_to_single_or_half_precision_gpu::scale_bias_last"], [10, 1, 1, "_CPPv430_fusednbitrowwise_to_float_gpuRKN2at6TensorEK7int64_t", "_fusednbitrowwise_to_float_gpu"], [10, 3, 1, "_CPPv430_fusednbitrowwise_to_float_gpuRKN2at6TensorEK7int64_t", "_fusednbitrowwise_to_float_gpu::bit_rate"], [10, 3, 1, "_CPPv430_fusednbitrowwise_to_float_gpuRKN2at6TensorEK7int64_t", "_fusednbitrowwise_to_float_gpu::input"], [10, 1, 1, "_CPPv429_fusednbitrowwise_to_half_gpuRKN2at6TensorEK7int64_t", "_fusednbitrowwise_to_half_gpu"], [10, 3, 1, "_CPPv429_fusednbitrowwise_to_half_gpuRKN2at6TensorEK7int64_t", "_fusednbitrowwise_to_half_gpu::bit_rate"], [10, 3, 1, "_CPPv429_fusednbitrowwise_to_half_gpuRKN2at6TensorEK7int64_t", "_fusednbitrowwise_to_half_gpu::input"], [10, 1, 1, "_CPPv449_fusednbitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tK7int64_t", "_fusednbitrowwise_to_single_or_half_precision_gpu"], [10, 3, 1, "_CPPv449_fusednbitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tK7int64_t", "_fusednbitrowwise_to_single_or_half_precision_gpu::bit_rate"], [10, 3, 1, "_CPPv449_fusednbitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tK7int64_t", "_fusednbitrowwise_to_single_or_half_precision_gpu::input"], [10, 3, 1, "_CPPv449_fusednbitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tK7int64_t", "_fusednbitrowwise_to_single_or_half_precision_gpu::output_dtype"], [10, 1, 1, "_CPPv429_half_to_fused8bitrowwise_gpuRK6Tensor", "_half_to_fused8bitrowwise_gpu"], [10, 3, 1, "_CPPv429_half_to_fused8bitrowwise_gpuRK6Tensor", "_half_to_fused8bitrowwise_gpu::input"], [10, 1, 1, "_CPPv429_half_to_fusednbitrowwise_gpuRKN2at6TensorEK7int64_t", "_half_to_fusednbitrowwise_gpu"], [10, 3, 1, "_CPPv429_half_to_fusednbitrowwise_gpuRKN2at6TensorEK7int64_t", "_half_to_fusednbitrowwise_gpu::bit_rate"], [10, 3, 1, "_CPPv429_half_to_fusednbitrowwise_gpuRKN2at6TensorEK7int64_t", "_half_to_fusednbitrowwise_gpu::input"], [10, 1, 1, "_CPPv418_hfp8_to_float_gpuRKN2at6TensorEK7int64_tK7int64_t", "_hfp8_to_float_gpu"], [10, 3, 1, "_CPPv418_hfp8_to_float_gpuRKN2at6TensorEK7int64_tK7int64_t", "_hfp8_to_float_gpu::ebits"], [10, 3, 1, "_CPPv418_hfp8_to_float_gpuRKN2at6TensorEK7int64_tK7int64_t", "_hfp8_to_float_gpu::exponent_bias"], [10, 3, 1, "_CPPv418_hfp8_to_float_gpuRKN2at6TensorEK7int64_tK7int64_t", "_hfp8_to_float_gpu::input"], [10, 1, 1, "_CPPv418_msfp_to_float_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_t", "_msfp_to_float_gpu"], [10, 3, 1, "_CPPv418_msfp_to_float_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_t", "_msfp_to_float_gpu::bias"], [10, 3, 1, "_CPPv418_msfp_to_float_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_t", "_msfp_to_float_gpu::ebits"], [10, 3, 1, "_CPPv418_msfp_to_float_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_t", "_msfp_to_float_gpu::input"], [10, 3, 1, "_CPPv418_msfp_to_float_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_t", "_msfp_to_float_gpu::mbits"], [10, 1, 1, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t", "_paddedFP8rowwise_to_float_gpu"], [10, 3, 1, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t", "_paddedFP8rowwise_to_float_gpu::forward"], [10, 3, 1, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t", "_paddedFP8rowwise_to_float_gpu::input"], [10, 3, 1, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t", "_paddedFP8rowwise_to_float_gpu::output_dtype"], [10, 3, 1, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t", "_paddedFP8rowwise_to_float_gpu::output_last_dim"], [10, 3, 1, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t", "_paddedFP8rowwise_to_float_gpu::row_dim"], [10, 1, 1, "_CPPv449_single_or_half_precision_to_fused8bitrowwise_gpuRK6Tensor", "_single_or_half_precision_to_fused8bitrowwise_gpu"], [10, 3, 1, "_CPPv449_single_or_half_precision_to_fused8bitrowwise_gpuRK6Tensor", "_single_or_half_precision_to_fused8bitrowwise_gpu::input"], [10, 1, 1, "_CPPv449_single_or_half_precision_to_fusednbitrowwise_gpuRK6TensorK7int64_t", "_single_or_half_precision_to_fusednbitrowwise_gpu"], [10, 3, 1, "_CPPv449_single_or_half_precision_to_fusednbitrowwise_gpuRK6TensorK7int64_t", "_single_or_half_precision_to_fusednbitrowwise_gpu::bit_rate"], [10, 3, 1, "_CPPv449_single_or_half_precision_to_fusednbitrowwise_gpuRK6TensorK7int64_t", "_single_or_half_precision_to_fusednbitrowwise_gpu::input"], [9, 1, 1, "_CPPv417all_to_one_deviceNSt6vectorIN2at6TensorEEEN2at6DeviceE", "all_to_one_device"], [9, 3, 1, "_CPPv417all_to_one_deviceNSt6vectorIN2at6TensorEEEN2at6DeviceE", "all_to_one_device::inputTensors"], [9, 3, 1, "_CPPv417all_to_one_deviceNSt6vectorIN2at6TensorEEEN2at6DeviceE", "all_to_one_device::target_device"], [6, 1, 1, "_CPPv431batched_dense_vec_jagged_2d_mulRK6TensorRK6TensorRK6Tensor", "batched_dense_vec_jagged_2d_mul"], [6, 3, 1, "_CPPv431batched_dense_vec_jagged_2d_mulRK6TensorRK6TensorRK6Tensor", "batched_dense_vec_jagged_2d_mul::a_offsets"], [6, 3, 1, "_CPPv431batched_dense_vec_jagged_2d_mulRK6TensorRK6TensorRK6Tensor", "batched_dense_vec_jagged_2d_mul::a_values"], [6, 3, 1, "_CPPv431batched_dense_vec_jagged_2d_mulRK6TensorRK6TensorRK6Tensor", "batched_dense_vec_jagged_2d_mul::v"], [3, 1, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKNSt8optionalI6TensorEERKNSt8optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda"], [3, 3, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKNSt8optionalI6TensorEERKNSt8optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::B_ofsets"], [3, 3, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKNSt8optionalI6TensorEERKNSt8optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::bounds_check_mode"], [3, 3, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKNSt8optionalI6TensorEERKNSt8optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::indices"], [3, 3, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKNSt8optionalI6TensorEERKNSt8optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::max_B"], [3, 3, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKNSt8optionalI6TensorEERKNSt8optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::offsets"], [3, 3, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKNSt8optionalI6TensorEERKNSt8optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::rows_per_table"], [3, 3, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKNSt8optionalI6TensorEERKNSt8optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::warning"], [3, 3, 1, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKNSt8optionalI6TensorEERKNSt8optionalI6TensorEEK7int64_t", "bounds_check_indices_cuda::weights"], [13, 1, 1, "_CPPv420compact_indices_cudaNSt6vectorI6TensorEE6TensorNSt6vectorI6TensorEE6Tensor6Tensor", "compact_indices_cuda"], [13, 3, 1, "_CPPv420compact_indices_cudaNSt6vectorI6TensorEE6TensorNSt6vectorI6TensorEE6Tensor6Tensor", "compact_indices_cuda::compact_count"], [13, 3, 1, "_CPPv420compact_indices_cudaNSt6vectorI6TensorEE6TensorNSt6vectorI6TensorEE6Tensor6Tensor", "compact_indices_cuda::compact_indices"], [13, 3, 1, "_CPPv420compact_indices_cudaNSt6vectorI6TensorEE6TensorNSt6vectorI6TensorEE6Tensor6Tensor", "compact_indices_cuda::count"], [13, 3, 1, "_CPPv420compact_indices_cudaNSt6vectorI6TensorEE6TensorNSt6vectorI6TensorEE6Tensor6Tensor", "compact_indices_cuda::indices"], [13, 3, 1, "_CPPv420compact_indices_cudaNSt6vectorI6TensorEE6TensorNSt6vectorI6TensorEE6Tensor6Tensor", "compact_indices_cuda::masks"], [13, 1, 1, "_CPPv418cuda_callback_func12cudaStream_t11cudaError_tPv", "cuda_callback_func"], [13, 3, 1, "_CPPv418cuda_callback_func12cudaStream_t11cudaError_tPv", "cuda_callback_func::functor"], [13, 3, 1, "_CPPv418cuda_callback_func12cudaStream_t11cudaError_tPv", "cuda_callback_func::status"], [13, 3, 1, "_CPPv418cuda_callback_func12cudaStream_t11cudaError_tPv", "cuda_callback_func::stream"], [6, 1, 1, "_CPPv415dense_to_jaggedRK6TensorRKNSt6vectorI6TensorEENSt8optionalIN2at6SymIntEEE", "dense_to_jagged"], [6, 3, 1, "_CPPv415dense_to_jaggedRK6TensorRKNSt6vectorI6TensorEENSt8optionalIN2at6SymIntEEE", "dense_to_jagged::dense"], [6, 3, 1, "_CPPv415dense_to_jaggedRK6TensorRKNSt6vectorI6TensorEENSt8optionalIN2at6SymIntEEE", "dense_to_jagged::offsets"], [6, 3, 1, "_CPPv415dense_to_jaggedRK6TensorRKNSt6vectorI6TensorEENSt8optionalIN2at6SymIntEEE", "dense_to_jagged::total_L"], [12, 1, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda"], [12, 3, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::D_offsets"], [12, 3, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::cache_index_table_map"], [12, 3, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::gather_cache_stats"], [12, 3, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::hash_size_cumsum"], [12, 3, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::linear_cache_indices"], [12, 3, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::lru_state"], [12, 3, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::lxu_cache_miss_timestamp"], [12, 3, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::lxu_cache_state"], [12, 3, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::lxu_cache_weights"], [12, 3, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::row_alignment"], [12, 3, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::time_stamp"], [12, 3, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::total_cache_hash_size"], [12, 3, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::uvm_cache_stats"], [12, 3, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::weights"], [12, 3, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::weights_offsets"], [12, 3, 1, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lru_cache_populate_byte_cuda::weights_tys"], [12, 1, 1, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lxu_cache_lookup_cuda"], [12, 3, 1, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lxu_cache_lookup_cuda::gather_cache_stats"], [12, 3, 1, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lxu_cache_lookup_cuda::invalid_index"], [12, 3, 1, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lxu_cache_lookup_cuda::linear_cache_indices"], [12, 3, 1, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lxu_cache_lookup_cuda::lxu_cache_state"], [12, 3, 1, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "direct_mapped_lxu_cache_lookup_cuda::uvm_cache_stats"], [23, 1, 1, "_CPPv4I0_NSt6size_tEE14example_method7int32_t1Tf", "example_method"], [23, 2, 1, "_CPPv4I0_NSt6size_tEE14example_method7int32_t1Tf", "example_method::Alignment"], [23, 2, 1, "_CPPv4I0_NSt6size_tEE14example_method7int32_t1Tf", "example_method::T"], [23, 3, 1, "_CPPv4I0_NSt6size_tEE14example_method7int32_t1Tf", "example_method::param1"], [23, 3, 1, "_CPPv4I0_NSt6size_tEE14example_method7int32_t1Tf", "example_method::param2"], [11, 1, 1, "_CPPv431expand_into_jagged_permute_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_t", "expand_into_jagged_permute_cuda"], [11, 3, 1, "_CPPv431expand_into_jagged_permute_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_t", "expand_into_jagged_permute_cuda::input_offsets"], [11, 3, 1, "_CPPv431expand_into_jagged_permute_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_t", "expand_into_jagged_permute_cuda::output_offsets"], [11, 3, 1, "_CPPv431expand_into_jagged_permute_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_t", "expand_into_jagged_permute_cuda::output_size"], [11, 3, 1, "_CPPv431expand_into_jagged_permute_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_t", "expand_into_jagged_permute_cuda::permute"], [10, 1, 1, "_CPPv437float_or_half_to_fused8bitrowwise_cpuRK6Tensor", "float_or_half_to_fused8bitrowwise_cpu"], [10, 3, 1, "_CPPv437float_or_half_to_fused8bitrowwise_cpuRK6Tensor", "float_or_half_to_fused8bitrowwise_cpu::input"], [10, 1, 1, "_CPPv423float_to_FP8rowwise_cpuRK6Tensorb", "float_to_FP8rowwise_cpu"], [10, 3, 1, "_CPPv423float_to_FP8rowwise_cpuRK6Tensorb", "float_to_FP8rowwise_cpu::forward"], [10, 3, 1, "_CPPv423float_to_FP8rowwise_cpuRK6Tensorb", "float_to_FP8rowwise_cpu::input"], [10, 1, 1, "_CPPv429float_to_fused8bitrowwise_cpuRK6Tensor", "float_to_fused8bitrowwise_cpu"], [10, 3, 1, "_CPPv429float_to_fused8bitrowwise_cpuRK6Tensor", "float_to_fused8bitrowwise_cpu::input"], [10, 1, 1, "_CPPv429fused8bitrowwise_to_float_cpuRK6Tensor", "fused8bitrowwise_to_float_cpu"], [10, 3, 1, "_CPPv429fused8bitrowwise_to_float_cpuRK6Tensor", "fused8bitrowwise_to_float_cpu::input"], [10, 1, 1, "_CPPv437fused8bitrowwise_to_float_or_half_cpuRK6TensorK7int64_tKbKb", "fused8bitrowwise_to_float_or_half_cpu"], [10, 3, 1, "_CPPv437fused8bitrowwise_to_float_or_half_cpuRK6TensorK7int64_tKbKb", "fused8bitrowwise_to_float_or_half_cpu::input"], [10, 3, 1, "_CPPv437fused8bitrowwise_to_float_or_half_cpuRK6TensorK7int64_tKbKb", "fused8bitrowwise_to_float_or_half_cpu::output_dtype"], [10, 3, 1, "_CPPv437fused8bitrowwise_to_float_or_half_cpuRK6TensorK7int64_tKbKb", "fused8bitrowwise_to_float_or_half_cpu::quant_padding_float_type"], [10, 3, 1, "_CPPv437fused8bitrowwise_to_float_or_half_cpuRK6TensorK7int64_tKbKb", "fused8bitrowwise_to_float_or_half_cpu::scale_bias_last"], [10, 1, 1, "_CPPv428fused8bitrowwise_to_half_cpuRK6Tensor", "fused8bitrowwise_to_half_cpu"], [10, 3, 1, "_CPPv428fused8bitrowwise_to_half_cpuRK6Tensor", "fused8bitrowwise_to_half_cpu::input"], [10, 1, 1, "_CPPv437fusednbitrowwise_sbfront_to_float_cpuRK6TensorK7int64_t", "fusednbitrowwise_sbfront_to_float_cpu"], [10, 3, 1, "_CPPv437fusednbitrowwise_sbfront_to_float_cpuRK6TensorK7int64_t", "fusednbitrowwise_sbfront_to_float_cpu::bit_rate"], [10, 3, 1, "_CPPv437fusednbitrowwise_sbfront_to_float_cpuRK6TensorK7int64_t", "fusednbitrowwise_sbfront_to_float_cpu::input"], [10, 1, 1, "_CPPv429fusednbitrowwise_to_float_cpuRK6TensorK7int64_t", "fusednbitrowwise_to_float_cpu"], [10, 3, 1, "_CPPv429fusednbitrowwise_to_float_cpuRK6TensorK7int64_t", "fusednbitrowwise_to_float_cpu::bit_rate"], [10, 3, 1, "_CPPv429fusednbitrowwise_to_float_cpuRK6TensorK7int64_t", "fusednbitrowwise_to_float_cpu::input"], [10, 1, 1, "_CPPv437fusednbitrowwise_to_float_or_half_cpuRK6TensorK7int64_tK7int64_t", "fusednbitrowwise_to_float_or_half_cpu"], [10, 3, 1, "_CPPv437fusednbitrowwise_to_float_or_half_cpuRK6TensorK7int64_tK7int64_t", "fusednbitrowwise_to_float_or_half_cpu::bit_rate"], [10, 3, 1, "_CPPv437fusednbitrowwise_to_float_or_half_cpuRK6TensorK7int64_tK7int64_t", "fusednbitrowwise_to_float_or_half_cpu::input"], [10, 3, 1, "_CPPv437fusednbitrowwise_to_float_or_half_cpuRK6TensorK7int64_tK7int64_t", "fusednbitrowwise_to_float_or_half_cpu::output_dtype"], [10, 1, 1, "_CPPv428fusednbitrowwise_to_half_cpuRK6TensorK7int64_t", "fusednbitrowwise_to_half_cpu"], [10, 3, 1, "_CPPv428fusednbitrowwise_to_half_cpuRK6TensorK7int64_t", "fusednbitrowwise_to_half_cpu::bit_rate"], [10, 3, 1, "_CPPv428fusednbitrowwise_to_half_cpuRK6TensorK7int64_t", "fusednbitrowwise_to_half_cpu::input"], [11, 1, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu"], [11, 3, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::bin_boundaries"], [11, 3, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::bin_ctr_in_use_after"], [11, 3, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::bin_ctr_weight_value"], [11, 3, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::bin_num_examples"], [11, 3, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::bin_num_positives"], [11, 3, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::logit"], [11, 3, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::num_segments"], [11, 3, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::positive_weight"], [11, 3, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::segment_lengths"], [11, 3, 1, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td", "generic_histogram_binning_calibration_by_feature_cpu::segment_value"], [12, 1, 1, "_CPPv423get_unique_indices_cudaRKN2at6TensorEK7int64_tKb", "get_unique_indices_cuda"], [12, 3, 1, "_CPPv423get_unique_indices_cudaRKN2at6TensorEK7int64_tKb", "get_unique_indices_cuda::compute_count"], [12, 3, 1, "_CPPv423get_unique_indices_cudaRKN2at6TensorEK7int64_tKb", "get_unique_indices_cuda::linear_indices"], [12, 3, 1, "_CPPv423get_unique_indices_cudaRKN2at6TensorEK7int64_tKb", "get_unique_indices_cuda::max_indices"], [12, 1, 1, "_CPPv436get_unique_indices_with_inverse_cudaRKN2at6TensorEK7int64_tKbKb", "get_unique_indices_with_inverse_cuda"], [12, 3, 1, "_CPPv436get_unique_indices_with_inverse_cudaRKN2at6TensorEK7int64_tKbKb", "get_unique_indices_with_inverse_cuda::compute_count"], [12, 3, 1, "_CPPv436get_unique_indices_with_inverse_cudaRKN2at6TensorEK7int64_tKbKb", "get_unique_indices_with_inverse_cuda::compute_inverse_indices"], [12, 3, 1, "_CPPv436get_unique_indices_with_inverse_cudaRKN2at6TensorEK7int64_tKbKb", "get_unique_indices_with_inverse_cuda::linear_indices"], [12, 3, 1, "_CPPv436get_unique_indices_with_inverse_cudaRKN2at6TensorEK7int64_tKbKb", "get_unique_indices_with_inverse_cuda::max_indices"], [4, 1, 1, "_CPPv415gqa_attn_splitkRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_tKbK7int64_t", "gqa_attn_splitk"], [4, 3, 1, "_CPPv415gqa_attn_splitkRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_tKbK7int64_t", "gqa_attn_splitk::XQ"], [4, 3, 1, "_CPPv415gqa_attn_splitkRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_tKbK7int64_t", "gqa_attn_splitk::cache_K"], [4, 3, 1, "_CPPv415gqa_attn_splitkRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_tKbK7int64_t", "gqa_attn_splitk::cache_V"], [4, 3, 1, "_CPPv415gqa_attn_splitkRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_tKbK7int64_t", "gqa_attn_splitk::cache_logical_dtype_int"], [4, 3, 1, "_CPPv415gqa_attn_splitkRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_tKbK7int64_t", "gqa_attn_splitk::kv_cache_quant_num_groups"], [4, 3, 1, "_CPPv415gqa_attn_splitkRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_tKbK7int64_t", "gqa_attn_splitk::num_split_ks"], [4, 3, 1, "_CPPv415gqa_attn_splitkRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_tKbK7int64_t", "gqa_attn_splitk::qk_scale"], [4, 3, 1, "_CPPv415gqa_attn_splitkRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_tKbK7int64_t", "gqa_attn_splitk::seq_positions"], [4, 3, 1, "_CPPv415gqa_attn_splitkRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_tKbK7int64_t", "gqa_attn_splitk::use_tensor_cores"], [10, 1, 1, "_CPPv428half_to_fused8bitrowwise_cpuRK6Tensor", "half_to_fused8bitrowwise_cpu"], [10, 3, 1, "_CPPv428half_to_fused8bitrowwise_cpuRK6Tensor", "half_to_fused8bitrowwise_cpu::input"], [13, 1, 1, "_CPPv410hash_shard7int64_t6size_t", "hash_shard"], [13, 3, 1, "_CPPv410hash_shard7int64_t6size_t", "hash_shard::id"], [13, 3, 1, "_CPPv410hash_shard7int64_t6size_t", "hash_shard::num_shards"], [11, 1, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu"], [11, 3, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::bin_ctr_in_use_after"], [11, 3, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::bin_ctr_weight_value"], [11, 3, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::bin_num_examples"], [11, 3, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::bin_num_positives"], [11, 3, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::logit"], [11, 3, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::lower_bound"], [11, 3, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::positive_weight"], [11, 3, 1, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td", "histogram_binning_calibration_cpu::upper_bound"], [12, 1, 1, "_CPPv419host_lxu_cache_slot7int64_t7int64_t", "host_lxu_cache_slot"], [12, 3, 1, "_CPPv419host_lxu_cache_slot7int64_t7int64_t", "host_lxu_cache_slot::C"], [12, 3, 1, "_CPPv419host_lxu_cache_slot7int64_t7int64_t", "host_lxu_cache_slot::h_in"], [3, 1, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::D_offsets"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::dev_weights"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::fp8_exponent_bias"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::fp8_exponent_bits"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::indice_weights"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::indices"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::lxu_cache_locations"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::lxu_cache_weights"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::max_float16_D"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::max_float32_D"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::max_float8_D"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::max_int2_D"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::max_int4_D"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::max_int8_D"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::offsets"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::output_dtype"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::pooling_mode"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::row_alignment"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::total_D"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::uvm_weights"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::weights_offsets"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::weights_placements"], [3, 3, 1, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function::weights_tys"], [3, 1, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::D_offsets"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::dev_weights"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::fp8_exponent_bias"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::fp8_exponent_bits"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::indice_weights"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::indices"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::lxu_cache_locations"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::lxu_cache_weights"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::max_float16_D"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::max_float32_D"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::max_float8_D"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::max_int2_D"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::max_int4_D"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::max_int8_D"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::offsets"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::output_dtype"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::pooling_mode"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::row_alignment"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::total_D"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::uvm_weights"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::weights_offsets"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::weights_placements"], [3, 3, 1, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE", "int_nbit_split_embedding_codegen_lookup_function_cpu::weights_tys"], [3, 1, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::D_offsets"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::cache_hash_size_cumsum"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::cache_index_table_map"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::dev_weights"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::fp8_exponent_bias"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::fp8_exponent_bits"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::indice_weights"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::indices"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::lxu_cache_locations"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::lxu_cache_state"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::lxu_cache_weights"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::lxu_state"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::max_float16_D"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::max_float32_D"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::max_float8_D"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::max_int2_D"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::max_int4_D"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::max_int8_D"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::offsets"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::output_dtype"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::pooling_mode"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::row_alignment"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::total_D"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::total_cache_hash_size"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::uvm_weights"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::weights_offsets"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::weights_placements"], [3, 3, 1, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function::weights_tys"], [3, 1, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::D_offsets"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::cache_hash_size_cumsum"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::cache_index_table_map"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::dev_weights"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::fp8_exponent_bias"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::fp8_exponent_bits"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::indice_weights"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::indices"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::lxu_cache_locations"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::lxu_cache_state"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::lxu_cache_weights"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::lxu_state"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::max_float16_D"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::max_float32_D"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::max_float8_D"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::max_int2_D"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::max_int4_D"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::max_int8_D"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::offsets"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::output_dtype"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::pooling_mode"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::row_alignment"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::total_D"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::total_cache_hash_size"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::uvm_weights"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::weights_offsets"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::weights_placements"], [3, 3, 1, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE", "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu::weights_tys"], [8, 1, 1, "_CPPv413is_uvm_tensorRK6Tensor", "is_uvm_tensor"], [8, 3, 1, "_CPPv413is_uvm_tensorRK6Tensor", "is_uvm_tensor::self"], [6, 1, 1, "_CPPv418jagged_1d_to_dense6Tensor6TensorN3c106SymIntE7int64_t", "jagged_1d_to_dense"], [6, 3, 1, "_CPPv418jagged_1d_to_dense6Tensor6TensorN3c106SymIntE7int64_t", "jagged_1d_to_dense::max_L"], [6, 3, 1, "_CPPv418jagged_1d_to_dense6Tensor6TensorN3c106SymIntE7int64_t", "jagged_1d_to_dense::offsets"], [6, 3, 1, "_CPPv418jagged_1d_to_dense6Tensor6TensorN3c106SymIntE7int64_t", "jagged_1d_to_dense::padding_value"], [6, 3, 1, "_CPPv418jagged_1d_to_dense6Tensor6TensorN3c106SymIntE7int64_t", "jagged_1d_to_dense::values"], [6, 1, 1, "_CPPv418jagged_2d_to_dense6Tensor6TensorN3c106SymIntE", "jagged_2d_to_dense"], [6, 3, 1, "_CPPv418jagged_2d_to_dense6Tensor6TensorN3c106SymIntE", "jagged_2d_to_dense::max_sequence_length"], [6, 3, 1, "_CPPv418jagged_2d_to_dense6Tensor6TensorN3c106SymIntE", "jagged_2d_to_dense::offsets"], [6, 3, 1, "_CPPv418jagged_2d_to_dense6Tensor6TensorN3c106SymIntE", "jagged_2d_to_dense::values"], [6, 1, 1, "_CPPv428jagged_dense_elementwise_addRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add"], [6, 3, 1, "_CPPv428jagged_dense_elementwise_addRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add::x_offsets"], [6, 3, 1, "_CPPv428jagged_dense_elementwise_addRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add::x_values"], [6, 3, 1, "_CPPv428jagged_dense_elementwise_addRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add::y"], [6, 1, 1, "_CPPv442jagged_dense_elementwise_add_jagged_outputRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output"], [6, 3, 1, "_CPPv442jagged_dense_elementwise_add_jagged_outputRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output::x_offsets"], [6, 3, 1, "_CPPv442jagged_dense_elementwise_add_jagged_outputRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output::x_values"], [6, 3, 1, "_CPPv442jagged_dense_elementwise_add_jagged_outputRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output::y"], [6, 1, 1, "_CPPv447jagged_dense_elementwise_add_jagged_output_cudaRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output_cuda"], [6, 3, 1, "_CPPv447jagged_dense_elementwise_add_jagged_output_cudaRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output_cuda::x_offsets"], [6, 3, 1, "_CPPv447jagged_dense_elementwise_add_jagged_output_cudaRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output_cuda::x_values"], [6, 3, 1, "_CPPv447jagged_dense_elementwise_add_jagged_output_cudaRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_add_jagged_output_cuda::y"], [6, 1, 1, "_CPPv428jagged_dense_elementwise_mulRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_mul"], [6, 3, 1, "_CPPv428jagged_dense_elementwise_mulRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_mul::x_offsets"], [6, 3, 1, "_CPPv428jagged_dense_elementwise_mulRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_mul::x_values"], [6, 3, 1, "_CPPv428jagged_dense_elementwise_mulRK6TensorRKNSt6vectorI6TensorEERK6Tensor", "jagged_dense_elementwise_mul::y"], [6, 1, 1, "_CPPv422jagged_to_padded_denseRK6TensorRKNSt6vectorI6TensorEEKN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense"], [6, 3, 1, "_CPPv422jagged_to_padded_denseRK6TensorRKNSt6vectorI6TensorEEKN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense::max_lengths"], [6, 3, 1, "_CPPv422jagged_to_padded_denseRK6TensorRKNSt6vectorI6TensorEEKN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense::offsets"], [6, 3, 1, "_CPPv422jagged_to_padded_denseRK6TensorRKNSt6vectorI6TensorEEKN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense::padding_value"], [6, 3, 1, "_CPPv422jagged_to_padded_denseRK6TensorRKNSt6vectorI6TensorEEKN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense::values"], [6, 1, 1, "_CPPv430jagged_to_padded_dense_forwardRK6TensorRKNSt6vectorI6TensorEEN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense_forward"], [6, 3, 1, "_CPPv430jagged_to_padded_dense_forwardRK6TensorRKNSt6vectorI6TensorEEN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense_forward::max_lengths"], [6, 3, 1, "_CPPv430jagged_to_padded_dense_forwardRK6TensorRKNSt6vectorI6TensorEEN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense_forward::offsets"], [6, 3, 1, "_CPPv430jagged_to_padded_dense_forwardRK6TensorRKNSt6vectorI6TensorEEN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense_forward::padding_value"], [6, 3, 1, "_CPPv430jagged_to_padded_dense_forwardRK6TensorRKNSt6vectorI6TensorEEN3c1014SymIntArrayRefEKd", "jagged_to_padded_dense_forward::values"], [13, 5, 1, "_CPPv4N5kv_db12CacheContextE", "kv_db::CacheContext"], [13, 5, 1, "_CPPv4N5kv_db13EmbeddingKVDBE", "kv_db::EmbeddingKVDB"], [13, 5, 1, "_CPPv4N5kv_db9QueueItemE", "kv_db::QueueItem"], [13, 5, 1, "_CPPv4N8l2_cache13CacheLibCacheE", "l2_cache::CacheLibCache"], [12, 1, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda"], [12, 3, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::D_offsets"], [12, 3, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::cache_hash_size_cumsum"], [12, 3, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::cache_index_table_map"], [12, 3, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::lfu_state"], [12, 3, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::linear_cache_indices"], [12, 3, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::lxu_cache_state"], [12, 3, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::lxu_cache_weights"], [12, 3, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::row_alignment"], [12, 3, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::total_cache_hash_size"], [12, 3, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::weights"], [12, 3, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::weights_offsets"], [12, 3, 1, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "lfu_cache_populate_byte_cuda::weights_tys"], [12, 1, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda"], [12, 3, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::D_offsets"], [12, 3, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::cache_hash_size_cumsum"], [12, 3, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::cache_index_table_map"], [12, 3, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::lfu_state"], [12, 3, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::linear_cache_indices"], [12, 3, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::lxu_cache_state"], [12, 3, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::lxu_cache_weights"], [12, 3, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::stochastic_rounding"], [12, 3, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::total_cache_hash_size"], [12, 3, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::weights"], [12, 3, 1, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb", "lfu_cache_populate_cuda::weights_offsets"], [12, 1, 1, "_CPPv428linearize_cache_indices_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKNSt8optionalIN2at6TensorEEEK7int64_tK7int64_t", "linearize_cache_indices_cuda"], [12, 3, 1, "_CPPv428linearize_cache_indices_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKNSt8optionalIN2at6TensorEEEK7int64_tK7int64_t", "linearize_cache_indices_cuda::B_offsets"], [12, 3, 1, "_CPPv428linearize_cache_indices_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKNSt8optionalIN2at6TensorEEEK7int64_tK7int64_t", "linearize_cache_indices_cuda::cache_hash_size_cumsum"], [12, 3, 1, "_CPPv428linearize_cache_indices_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKNSt8optionalIN2at6TensorEEEK7int64_tK7int64_t", "linearize_cache_indices_cuda::indices"], [12, 3, 1, "_CPPv428linearize_cache_indices_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKNSt8optionalIN2at6TensorEEEK7int64_tK7int64_t", "linearize_cache_indices_cuda::indices_base_offset"], [12, 3, 1, "_CPPv428linearize_cache_indices_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKNSt8optionalIN2at6TensorEEEK7int64_tK7int64_t", "linearize_cache_indices_cuda::max_B"], [12, 3, 1, "_CPPv428linearize_cache_indices_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKNSt8optionalIN2at6TensorEEEK7int64_tK7int64_t", "linearize_cache_indices_cuda::offsets"], [12, 1, 1, "_CPPv441linearize_cache_indices_from_row_idx_cudaN2at6TensorEN2at6TensorEN2at6TensorE", "linearize_cache_indices_from_row_idx_cuda"], [12, 3, 1, "_CPPv441linearize_cache_indices_from_row_idx_cudaN2at6TensorEN2at6TensorEN2at6TensorE", "linearize_cache_indices_from_row_idx_cuda::cache_hash_size_cumsum"], [12, 3, 1, "_CPPv441linearize_cache_indices_from_row_idx_cudaN2at6TensorEN2at6TensorEN2at6TensorE", "linearize_cache_indices_from_row_idx_cuda::update_row_indices"], [12, 3, 1, "_CPPv441linearize_cache_indices_from_row_idx_cudaN2at6TensorEN2at6TensorEN2at6TensorE", "linearize_cache_indices_from_row_idx_cuda::update_table_indices"], [12, 1, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorEKb", "lru_cache_find_uncached_cuda"], [12, 3, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorEKb", "lru_cache_find_uncached_cuda::compute_inverse_indices"], [12, 3, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorEKb", "lru_cache_find_uncached_cuda::gather_cache_stats"], [12, 3, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorEKb", "lru_cache_find_uncached_cuda::lock_cache_line"], [12, 3, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorEKb", "lru_cache_find_uncached_cuda::lru_state"], [12, 3, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorEKb", "lru_cache_find_uncached_cuda::lxu_cache_locking_counter"], [12, 3, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorEKb", "lru_cache_find_uncached_cuda::lxu_cache_state"], [12, 3, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorEKb", "lru_cache_find_uncached_cuda::max_indices"], [12, 3, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorEKb", "lru_cache_find_uncached_cuda::time_stamp"], [12, 3, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorEKb", "lru_cache_find_uncached_cuda::unique_indices"], [12, 3, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorEKb", "lru_cache_find_uncached_cuda::unique_indices_length"], [12, 3, 1, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorEKb", "lru_cache_find_uncached_cuda::uvm_cache_stats"], [12, 1, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda"], [12, 3, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::D_offsets"], [12, 3, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::cache_index_table_map"], [12, 3, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::gather_cache_stats"], [12, 3, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::hash_size_cumsum"], [12, 3, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::linear_cache_indices"], [12, 3, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::lru_state"], [12, 3, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::lxu_cache_state"], [12, 3, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::lxu_cache_weights"], [12, 3, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::row_alignment"], [12, 3, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::time_stamp"], [12, 3, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::total_cache_hash_size"], [12, 3, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::uvm_cache_stats"], [12, 3, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::weights"], [12, 3, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::weights_offsets"], [12, 3, 1, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_byte_cuda::weights_tys"], [12, 1, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_cuda"], [12, 3, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_cuda::D_offsets"], [12, 3, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_cuda::cache_index_table_map"], [12, 3, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_cuda::gather_cache_stats"], [12, 3, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_cuda::hash_size_cumsum"], [12, 3, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_cuda::linear_cache_indices"], [12, 3, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_cuda::lock_cache_line"], [12, 3, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_cuda::lru_state"], [12, 3, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_cuda::lxu_cache_locking_counter"], [12, 3, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_cuda::lxu_cache_state"], [12, 3, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_cuda::lxu_cache_weights"], [12, 3, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_cuda::stochastic_rounding"], [12, 3, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_cuda::time_stamp"], [12, 3, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_cuda::total_cache_hash_size"], [12, 3, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_cuda::uvm_cache_stats"], [12, 3, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_cuda::weights"], [12, 3, 1, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE", "lru_cache_populate_cuda::weights_offsets"], [12, 1, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda"], [12, 3, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::D_offsets"], [12, 3, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::cache_hash_size_cumsum"], [12, 3, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::cache_index_table_map"], [12, 3, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::lxu_cache_state"], [12, 3, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::lxu_cache_weights"], [12, 3, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::stochastic_rounding"], [12, 3, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::total_D"], [12, 3, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::uvm_weights"], [12, 3, 1, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb", "lxu_cache_flush_cuda::weights_offsets"], [12, 1, 1, "_CPPv431lxu_cache_locations_update_cudaN2at6TensorEN2at6TensorENSt8optionalIN2at6TensorEEE", "lxu_cache_locations_update_cuda"], [12, 3, 1, "_CPPv431lxu_cache_locations_update_cudaN2at6TensorEN2at6TensorENSt8optionalIN2at6TensorEEE", "lxu_cache_locations_update_cuda::lxu_cache_locations"], [12, 3, 1, "_CPPv431lxu_cache_locations_update_cudaN2at6TensorEN2at6TensorENSt8optionalIN2at6TensorEEE", "lxu_cache_locations_update_cuda::lxu_cache_locations_new"], [12, 3, 1, "_CPPv431lxu_cache_locations_update_cudaN2at6TensorEN2at6TensorENSt8optionalIN2at6TensorEEE", "lxu_cache_locations_update_cuda::num_uniq_cache_indices"], [12, 1, 1, "_CPPv440lxu_cache_locking_counter_decrement_cudaN2at6TensorEN2at6TensorE", "lxu_cache_locking_counter_decrement_cuda"], [12, 3, 1, "_CPPv440lxu_cache_locking_counter_decrement_cudaN2at6TensorEN2at6TensorE", "lxu_cache_locking_counter_decrement_cuda::lxu_cache_locations"], [12, 3, 1, "_CPPv440lxu_cache_locking_counter_decrement_cudaN2at6TensorEN2at6TensorE", "lxu_cache_locking_counter_decrement_cuda::lxu_cache_locking_counter"], [12, 1, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda"], [12, 3, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::gather_cache_stats"], [12, 3, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::invalid_index"], [12, 3, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::linear_cache_indices"], [12, 3, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::lxu_cache_locations_output"], [12, 3, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::lxu_cache_state"], [12, 3, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::num_uniq_cache_indices"], [12, 3, 1, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEE", "lxu_cache_lookup_cuda::uvm_cache_stats"], [13, 1, 1, "_CPPv421masked_index_put_cuda6Tensor6Tensor6Tensor6TensorKbK7int64_t", "masked_index_put_cuda"], [13, 3, 1, "_CPPv421masked_index_put_cuda6Tensor6Tensor6Tensor6TensorKbK7int64_t", "masked_index_put_cuda::count"], [13, 3, 1, "_CPPv421masked_index_put_cuda6Tensor6Tensor6Tensor6TensorKbK7int64_t", "masked_index_put_cuda::indices"], [13, 3, 1, "_CPPv421masked_index_put_cuda6Tensor6Tensor6Tensor6TensorKbK7int64_t", "masked_index_put_cuda::preferred_sms"], [13, 3, 1, "_CPPv421masked_index_put_cuda6Tensor6Tensor6Tensor6TensorKbK7int64_t", "masked_index_put_cuda::self"], [13, 3, 1, "_CPPv421masked_index_put_cuda6Tensor6Tensor6Tensor6TensorKbK7int64_t", "masked_index_put_cuda::use_pipeline"], [13, 3, 1, "_CPPv421masked_index_put_cuda6Tensor6Tensor6Tensor6TensorKbK7int64_t", "masked_index_put_cuda::values"], [13, 1, 1, "_CPPv424masked_index_select_cuda6Tensor6Tensor6Tensor6TensorKbK7int64_t", "masked_index_select_cuda"], [13, 3, 1, "_CPPv424masked_index_select_cuda6Tensor6Tensor6Tensor6TensorKbK7int64_t", "masked_index_select_cuda::count"], [13, 3, 1, "_CPPv424masked_index_select_cuda6Tensor6Tensor6Tensor6TensorKbK7int64_t", "masked_index_select_cuda::indices"], [13, 3, 1, "_CPPv424masked_index_select_cuda6Tensor6Tensor6Tensor6TensorKbK7int64_t", "masked_index_select_cuda::preferred_sms"], [13, 3, 1, "_CPPv424masked_index_select_cuda6Tensor6Tensor6Tensor6TensorKbK7int64_t", "masked_index_select_cuda::self"], [13, 3, 1, "_CPPv424masked_index_select_cuda6Tensor6Tensor6Tensor6TensorKbK7int64_t", "masked_index_select_cuda::use_pipeline"], [13, 3, 1, "_CPPv424masked_index_select_cuda6Tensor6Tensor6Tensor6TensorKbK7int64_t", "masked_index_select_cuda::values"], [8, 1, 1, "_CPPv422new_host_mapped_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_host_mapped_tensor"], [8, 3, 1, "_CPPv422new_host_mapped_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_host_mapped_tensor::self"], [8, 3, 1, "_CPPv422new_host_mapped_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_host_mapped_tensor::sizes"], [8, 1, 1, "_CPPv418new_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_managed_tensor"], [8, 3, 1, "_CPPv418new_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_managed_tensor::self"], [8, 3, 1, "_CPPv418new_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_managed_tensor::sizes"], [8, 1, 1, "_CPPv423new_managed_tensor_metaRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_managed_tensor_meta"], [8, 3, 1, "_CPPv423new_managed_tensor_metaRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_managed_tensor_meta::self"], [8, 3, 1, "_CPPv423new_managed_tensor_metaRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_managed_tensor_meta::sizes"], [8, 1, 1, "_CPPv418new_unified_tensorRK6TensorRKNSt6vectorINSt7int64_tEEEb", "new_unified_tensor"], [8, 3, 1, "_CPPv418new_unified_tensorRK6TensorRKNSt6vectorINSt7int64_tEEEb", "new_unified_tensor::is_host_mapped"], [8, 3, 1, "_CPPv418new_unified_tensorRK6TensorRKNSt6vectorINSt7int64_tEEEb", "new_unified_tensor::self"], [8, 3, 1, "_CPPv418new_unified_tensorRK6TensorRKNSt6vectorINSt7int64_tEEEb", "new_unified_tensor::sizes"], [8, 1, 1, "_CPPv423new_unified_tensor_metaRK6TensorRKNSt6vectorINSt7int64_tEEEb", "new_unified_tensor_meta"], [8, 3, 1, "_CPPv423new_unified_tensor_metaRK6TensorRKNSt6vectorINSt7int64_tEEEb", "new_unified_tensor_meta::is_host_mapped"], [8, 3, 1, "_CPPv423new_unified_tensor_metaRK6TensorRKNSt6vectorINSt7int64_tEEEb", "new_unified_tensor_meta::self"], [8, 3, 1, "_CPPv423new_unified_tensor_metaRK6TensorRKNSt6vectorINSt7int64_tEEEb", "new_unified_tensor_meta::sizes"], [8, 1, 1, "_CPPv426new_vanilla_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_vanilla_managed_tensor"], [8, 3, 1, "_CPPv426new_vanilla_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_vanilla_managed_tensor::self"], [8, 3, 1, "_CPPv426new_vanilla_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE", "new_vanilla_managed_tensor::sizes"], [5, 1, 1, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t", "padding_fused_tbe_input_combine_cpu"], [5, 3, 1, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t", "padding_fused_tbe_input_combine_cpu::batch_size"], [5, 3, 1, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t", "padding_fused_tbe_input_combine_cpu::include_last_offsets"], [5, 3, 1, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t", "padding_fused_tbe_input_combine_cpu::indices_list"], [5, 3, 1, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t", "padding_fused_tbe_input_combine_cpu::offsets_list"], [5, 3, 1, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t", "padding_fused_tbe_input_combine_cpu::per_sample_weights"], [9, 1, 1, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad"], [9, 3, 1, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad::inv_offset_dim_list"], [9, 3, 1, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad::inv_permute_list"], [9, 3, 1, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad::offset_dim_list"], [9, 3, 1, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad::permute_list"], [9, 3, 1, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad::pooled_embs"], [9, 1, 1, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_cpu"], [9, 3, 1, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_cpu::inv_offset_dim_list"], [9, 3, 1, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_cpu::inv_permute_list"], [9, 3, 1, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_cpu::offset_dim_list"], [9, 3, 1, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_cpu::permute_list"], [9, 3, 1, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_cpu::pooled_embs"], [9, 1, 1, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_gpu"], [9, 3, 1, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_gpu::inv_offset_dim_list"], [9, 3, 1, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_gpu::inv_permute_list"], [9, 3, 1, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_gpu::offset_dim_list"], [9, 3, 1, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_gpu::permute_list"], [9, 3, 1, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "permute_pooled_embs_auto_grad_gpu::pooled_embs"], [9, 1, 1, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_cpu"], [9, 3, 1, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_cpu::inv_offset_dim_list"], [9, 3, 1, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_cpu::inv_permute_list"], [9, 3, 1, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_cpu::offset_dim_list"], [9, 3, 1, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_cpu::permute_list"], [9, 3, 1, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_cpu::pooled_embs"], [9, 1, 1, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_gpu"], [9, 3, 1, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_gpu::inv_offset_dim_list"], [9, 3, 1, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_gpu::inv_permute_list"], [9, 3, 1, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_gpu::offset_dim_list"], [9, 3, 1, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_gpu::permute_list"], [9, 3, 1, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_auto_grad_split_gpu::pooled_embs"], [9, 1, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl"], [9, 3, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl::allow_duplicates"], [9, 3, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl::inv_offset_dim_list"], [9, 3, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl::inv_permute_list"], [9, 3, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl::offset_dim_list"], [9, 3, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl::permute_list"], [9, 3, 1, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb", "permute_pooled_embs_cpu_impl::pooled_embs"], [9, 1, 1, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_cpu"], [9, 3, 1, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_cpu::inv_offset_dim_list"], [9, 3, 1, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_cpu::inv_permute_list"], [9, 3, 1, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_cpu::offset_dim_list"], [9, 3, 1, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_cpu::permute_list"], [9, 3, 1, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_cpu::pooled_embs"], [9, 1, 1, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_gpu"], [9, 3, 1, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_gpu::inv_offset_dim_list"], [9, 3, 1, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_gpu::inv_permute_list"], [9, 3, 1, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_gpu::offset_dim_list"], [9, 3, 1, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_gpu::permute_list"], [9, 3, 1, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE", "permute_pooled_embs_split_gpu::pooled_embs"], [3, 1, 1, "_CPPv423pruned_array_lookup_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cpu"], [3, 3, 1, "_CPPv423pruned_array_lookup_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cpu::index_remappings"], [3, 3, 1, "_CPPv423pruned_array_lookup_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cpu::index_remappings_offsets"], [3, 3, 1, "_CPPv423pruned_array_lookup_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cpu::indices"], [3, 3, 1, "_CPPv423pruned_array_lookup_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cpu::offsets"], [3, 1, 1, "_CPPv424pruned_array_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cuda"], [3, 3, 1, "_CPPv424pruned_array_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cuda::index_remappings"], [3, 3, 1, "_CPPv424pruned_array_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cuda::index_remappings_offsets"], [3, 3, 1, "_CPPv424pruned_array_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cuda::indices"], [3, 3, 1, "_CPPv424pruned_array_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_array_lookup_cuda::offsets"], [3, 1, 1, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_insert_unweighted_cpu"], [3, 3, 1, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_insert_unweighted_cpu::dense_indices"], [3, 3, 1, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_insert_unweighted_cpu::hash_table"], [3, 3, 1, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_insert_unweighted_cpu::hash_table_offsets"], [3, 3, 1, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_insert_unweighted_cpu::indices"], [3, 3, 1, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_insert_unweighted_cpu::offsets"], [3, 1, 1, "_CPPv426pruned_hashmap_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_cuda"], [3, 3, 1, "_CPPv426pruned_hashmap_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_cuda::hash_table"], [3, 3, 1, "_CPPv426pruned_hashmap_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_cuda::hash_table_offsets"], [3, 3, 1, "_CPPv426pruned_hashmap_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_cuda::indices"], [3, 3, 1, "_CPPv426pruned_hashmap_lookup_cuda6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_cuda::offsets"], [3, 1, 1, "_CPPv436pruned_hashmap_lookup_unweighted_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_unweighted_cpu"], [3, 3, 1, "_CPPv436pruned_hashmap_lookup_unweighted_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_unweighted_cpu::hash_table"], [3, 3, 1, "_CPPv436pruned_hashmap_lookup_unweighted_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_unweighted_cpu::hash_table_offsets"], [3, 3, 1, "_CPPv436pruned_hashmap_lookup_unweighted_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_unweighted_cpu::indices"], [3, 3, 1, "_CPPv436pruned_hashmap_lookup_unweighted_cpu6Tensor6Tensor6Tensor6Tensor", "pruned_hashmap_lookup_unweighted_cpu::offsets"], [13, 5, 1, "_CPPv4N2ps24EmbeddingParameterServerE", "ps::EmbeddingParameterServer"], [7, 1, 1, "_CPPv432recat_embedding_grad_output_cuda6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_cuda"], [7, 3, 1, "_CPPv432recat_embedding_grad_output_cuda6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_cuda::grad_output"], [7, 3, 1, "_CPPv432recat_embedding_grad_output_cuda6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_cuda::num_features_per_rank"], [7, 1, 1, "_CPPv446recat_embedding_grad_output_mixed_D_batch_cudaRK6TensorRK6TensorRK6Tensor", "recat_embedding_grad_output_mixed_D_batch_cuda"], [7, 3, 1, "_CPPv446recat_embedding_grad_output_mixed_D_batch_cudaRK6TensorRK6TensorRK6Tensor", "recat_embedding_grad_output_mixed_D_batch_cuda::cumsum_dim_sum_per_rank"], [7, 3, 1, "_CPPv446recat_embedding_grad_output_mixed_D_batch_cudaRK6TensorRK6TensorRK6Tensor", "recat_embedding_grad_output_mixed_D_batch_cuda::dim_sum_per_rank"], [7, 3, 1, "_CPPv446recat_embedding_grad_output_mixed_D_batch_cudaRK6TensorRK6TensorRK6Tensor", "recat_embedding_grad_output_mixed_D_batch_cuda::grad_output"], [7, 1, 1, "_CPPv439recat_embedding_grad_output_mixed_D_cpuRK6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_mixed_D_cpu"], [7, 3, 1, "_CPPv439recat_embedding_grad_output_mixed_D_cpuRK6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_mixed_D_cpu::dim_sum_per_rank"], [7, 3, 1, "_CPPv439recat_embedding_grad_output_mixed_D_cpuRK6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_mixed_D_cpu::grad_output"], [7, 1, 1, "_CPPv440recat_embedding_grad_output_mixed_D_cudaRK6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_mixed_D_cuda"], [7, 3, 1, "_CPPv440recat_embedding_grad_output_mixed_D_cudaRK6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_mixed_D_cuda::dim_sum_per_rank"], [7, 3, 1, "_CPPv440recat_embedding_grad_output_mixed_D_cudaRK6TensorRKNSt6vectorI7int64_tEE", "recat_embedding_grad_output_mixed_D_cuda::grad_output"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::A_SYMMETRIC"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::BIAS_TYPE"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::B_SYMMETRIC"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::DIRECT"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::FUSE_RELU"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::HAS_BIAS"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::Q_GRAN"], [0, 3, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::block"], [0, 3, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::inp"], [0, 3, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::ld_in"], [0, 3, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::ld_out"], [0, 3, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::out"], [0, 3, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingAvx2::r"], [0, 1, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::A_SYMMETRIC"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::BIAS_TYPE"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::B_SYMMETRIC"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::C_PER_G"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::FUSE_RELU"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::HAS_BIAS"], [0, 2, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::Q_GRAN"], [0, 3, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::block"], [0, 3, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::inp"], [0, 3, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::ld_in"], [0, 3, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::ld_out"], [0, 3, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::out"], [0, 3, 1, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE", "requantizeOutputProcessingGConvAvx512::r"], [12, 1, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda"], [12, 3, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::D_offsets"], [12, 3, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::buffer_ids"], [12, 3, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::cache_hash_size_cumsum"], [12, 3, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::dev_weights"], [12, 3, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::logical_table_ids"], [12, 3, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::lxu_cache_state"], [12, 3, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::lxu_cache_weights"], [12, 3, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::momentum1_dev"], [12, 3, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::momentum1_offsets"], [12, 3, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::momentum1_placements"], [12, 3, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::momentum1_uvm"], [12, 3, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::pruned_indices"], [12, 3, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::pruned_indices_offsets"], [12, 3, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::total_cache_hash_size"], [12, 3, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::uvm_weights"], [12, 3, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::weights_offsets"], [12, 3, 1, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t", "reset_weight_momentum_cuda::weights_placements"], [13, 5, 1, "_CPPv4N3ssd16EmbeddingRocksDBE", "ssd::EmbeddingRocksDB"], [13, 1, 1, "_CPPv427ssd_generate_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_generate_row_addrs_cuda"], [13, 3, 1, "_CPPv427ssd_generate_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_generate_row_addrs_cuda::assigned_cache_slots"], [13, 3, 1, "_CPPv427ssd_generate_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_generate_row_addrs_cuda::cache_set_inverse_indices"], [13, 3, 1, "_CPPv427ssd_generate_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_generate_row_addrs_cuda::cache_set_sorted_unique_indices"], [13, 3, 1, "_CPPv427ssd_generate_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_generate_row_addrs_cuda::inserted_ssd_weights"], [13, 3, 1, "_CPPv427ssd_generate_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_generate_row_addrs_cuda::linear_index_inverse_indices"], [13, 3, 1, "_CPPv427ssd_generate_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_generate_row_addrs_cuda::lxu_cache_locations"], [13, 3, 1, "_CPPv427ssd_generate_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_generate_row_addrs_cuda::lxu_cache_weights"], [13, 3, 1, "_CPPv427ssd_generate_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_generate_row_addrs_cuda::unique_indices_count_cumsum"], [13, 3, 1, "_CPPv427ssd_generate_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_generate_row_addrs_cuda::unique_indices_length"], [13, 1, 1, "_CPPv425ssd_update_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_update_row_addrs_cuda"], [13, 3, 1, "_CPPv425ssd_update_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_update_row_addrs_cuda::cache_set_inverse_indices_curr"], [13, 3, 1, "_CPPv425ssd_update_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_update_row_addrs_cuda::inserted_ssd_weights_curr_next_map"], [13, 3, 1, "_CPPv425ssd_update_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_update_row_addrs_cuda::inserted_ssd_weights_next"], [13, 3, 1, "_CPPv425ssd_update_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_update_row_addrs_cuda::linear_index_inverse_indices_curr"], [13, 3, 1, "_CPPv425ssd_update_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_update_row_addrs_cuda::lxu_cache_locations_curr"], [13, 3, 1, "_CPPv425ssd_update_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_update_row_addrs_cuda::lxu_cache_weights"], [13, 3, 1, "_CPPv425ssd_update_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_update_row_addrs_cuda::ssd_row_addrs_curr"], [13, 3, 1, "_CPPv425ssd_update_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_update_row_addrs_cuda::unique_indices_count_cumsum_curr"], [13, 3, 1, "_CPPv425ssd_update_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor", "ssd_update_row_addrs_cuda::unique_indices_length_curr"], [5, 1, 1, "_CPPv421tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE", "tbe_input_combine_cpu"], [5, 3, 1, "_CPPv421tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE", "tbe_input_combine_cpu::include_last_offsets"], [5, 3, 1, "_CPPv421tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE", "tbe_input_combine_cpu::indices_list"], [5, 3, 1, "_CPPv421tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE", "tbe_input_combine_cpu::offsets_list"], [5, 3, 1, "_CPPv421tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE", "tbe_input_combine_cpu::per_sample_weights"], [8, 1, 1, "_CPPv419uvm_cuda_mem_adviseRK6Tensor7int64_t", "uvm_cuda_mem_advise"], [8, 3, 1, "_CPPv419uvm_cuda_mem_adviseRK6Tensor7int64_t", "uvm_cuda_mem_advise::cuda_memory_advise"], [8, 3, 1, "_CPPv419uvm_cuda_mem_adviseRK6Tensor7int64_t", "uvm_cuda_mem_advise::self"], [8, 1, 1, "_CPPv427uvm_cuda_mem_prefetch_asyncRK6TensorNSt8optionalI6TensorEE", "uvm_cuda_mem_prefetch_async"], [8, 3, 1, "_CPPv427uvm_cuda_mem_prefetch_asyncRK6TensorNSt8optionalI6TensorEE", "uvm_cuda_mem_prefetch_async::device_t"], [8, 3, 1, "_CPPv427uvm_cuda_mem_prefetch_asyncRK6TensorNSt8optionalI6TensorEE", "uvm_cuda_mem_prefetch_async::self"], [8, 1, 1, "_CPPv424uvm_mem_advice_dont_forkRK6Tensor", "uvm_mem_advice_dont_fork"], [8, 3, 1, "_CPPv424uvm_mem_advice_dont_forkRK6Tensor", "uvm_mem_advice_dont_fork::self"], [8, 1, 1, "_CPPv411uvm_storageRK6Tensor", "uvm_storage"], [8, 3, 1, "_CPPv411uvm_storageRK6Tensor", "uvm_storage::self"], [8, 1, 1, "_CPPv410uvm_to_cpuRK6Tensor", "uvm_to_cpu"], [8, 3, 1, "_CPPv410uvm_to_cpuRK6Tensor", "uvm_to_cpu::self"], [8, 1, 1, "_CPPv416uvm_to_cpu_cloneRK6Tensor", "uvm_to_cpu_clone"], [8, 3, 1, "_CPPv416uvm_to_cpu_cloneRK6Tensor", "uvm_to_cpu_clone::self"], [8, 1, 1, "_CPPv413uvm_to_deviceRK6TensorRK6Tensor", "uvm_to_device"], [8, 3, 1, "_CPPv413uvm_to_deviceRK6TensorRK6Tensor", "uvm_to_device::prototype"], [8, 3, 1, "_CPPv413uvm_to_deviceRK6TensorRK6Tensor", "uvm_to_device::self"], [19, 6, 0, "-", "fbgemm_gpu"]], "fbgemm_gpu.docs.examples": [[25, 7, 1, "", "example_method"]], "fbgemm_gpu.split_table_batched_embeddings_ops": [[19, 7, 1, "", "SplitTableBatchedEmbeddingBagsCodegen"]], "torch.ops.fbgemm": [[18, 7, 1, "", "batched_dense_vec_jagged_2d_mul"], [18, 7, 1, "", "dense_to_jagged"], [18, 7, 1, "", "jagged_1d_to_dense"], [18, 7, 1, "", "jagged_2d_to_dense"], [18, 7, 1, "", "jagged_dense_dense_elementwise_add_jagged_output"], [18, 7, 1, "", "jagged_dense_elementwise_add"], [18, 7, 1, "", "jagged_dense_elementwise_add_jagged_output"], [18, 7, 1, "", "jagged_dense_elementwise_mul"], [18, 7, 1, "", "jagged_to_padded_dense"], [18, 7, 1, "", "stacked_jagged_1d_to_dense"], [18, 7, 1, "", "stacked_jagged_2d_to_dense"]]}, "objtypes": {"0": "cpp:enumerator", "1": "cpp:function", "2": "cpp:templateParam", "3": "cpp:functionParam", "4": "cpp:enum", "5": "cpp:class", "6": "py:module", "7": "py:function"}, "objnames": {"0": ["cpp", "enumerator", "C++ enumerator"], "1": ["cpp", "function", "C++ function"], "2": ["cpp", "templateParam", "C++ template parameter"], "3": ["cpp", "functionParam", "C++ function parameter"], "4": ["cpp", "enum", "C++ enum"], "5": ["cpp", "class", "C++ class"], "6": ["py", "module", "Python module"], "7": ["py", "function", "Python function"]}, "titleterms": {"quantiz": [0, 10], "util": 0, "refer": [0, 26], "implement": [0, 1], "method": [0, 1], "avx": 0, "2": 0, "512": 0, "tbe": [1, 19], "cpu": [1, 3, 6, 7, 10, 11, 14, 15], "autovector": 1, "fp8": 1, "16": 1, "32": 1, "autovec": 1, "build": [2, 14, 24], "instruct": [2, 14, 15, 16], "fbgemm": [2, 27], "requir": 2, "hardwar": 2, "softwar": 2, "depend": 2, "asmjit": 2, "cpuinfo": 2, "googletest": 2, "set": [2, 14, 15, 24], "up": [2, 14, 15, 24], "an": [2, 14], "isol": [2, 14], "environ": [2, 14, 15, 16, 24], "instal": [2, 14, 15], "tool": [2, 14], "c": [2, 14, 23, 27], "compil": [2, 14], "other": [2, 14, 26], "librari": [2, 15], "prepar": [2, 14], "linux": 2, "maco": 2, "cmake": 2, "gcc": [2, 14], "issu": [2, 21], "12": 2, "clang": [2, 14], "bazel": 2, "window": 2, "embed": [3, 9, 12, 13, 19], "oper": [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 17, 18, 19], "cuda": [3, 6, 7, 8, 10, 11, 13, 14, 15, 16], "experiment": 4, "attent": 4, "combin": [5, 17], "input": 5, "jag": [6, 17, 18], "tensor": [6, 17, 18], "layout": 7, "transform": 7, "memori": 8, "pool": 9, "merg": 9, "permut": 9, "spars": 11, "data": 11, "tabl": [12, 19], "batch": [12, 19], "ssd": 13, "miniconda": 14, "conda": [14, 15], "onli": [14, 15], "genai": 14, "docker": [14, 15], "imag": 14, "cudnn": 14, "cutlass": 14, "rocm": [14, 15, 16], "miopen": 14, "symlink": 14, "pytorch": [14, 15], "through": [14, 15], "pip": [14, 15], "post": [14, 15], "check": [14, 15], "triton": [14, 15], "pre": 14, "setup": [14, 16], "The": 14, "process": 14, "wheel": 14, "variabl": 14, "For": 14, "develop": [14, 27], "undefin": [14, 15], "symbol": [14, 15], "glibc": 14, "version": 14, "compat": 14, "nvidia": 15, "driver": 15, "contain": 15, "runtim": 15, "amdgpu": 15, "python": [15, 25, 27], "fbgemm_gpu": [15, 16, 24, 27], "packag": 15, "public": 15, "pypi": 15, "test": 16, "run": 16, "variant": 16, "benchmark": 16, "high": 17, "level": 17, "overview": [17, 27], "format": 17, "valu": 17, "offset": 17, "max": 17, "length": 17, "exampl": 17, "arithmet": 17, "convers": 17, "dens": 17, "contact": 20, "u": 20, "github": 20, "slack": 20, "contribut": 21, "code": [21, 23, 25, 26], "conduct": 21, "pull": 21, "request": 21, "contributor": 21, "licens": [21, 22], "agreement": 21, "cla": 21, "ad": [23, 25, 26], "document": [23, 24, 25, 26, 27], "gener": [24, 25, 27], "guidelin": 24, "specif": 24, "guid": 24, "toolchain": 24, "lint": 24, "deploy": 24, "preview": 24, "todo": 25, "auto": 25, "sphinx": 26, "pointer": 26, "section": 26, "referenc": 26, "sourc": 26, "latex": 26, "graph": 26, "homepag": 27, "info": 27, "api": 27}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx.ext.todo": 2, "sphinx.ext.viewcode": 1, "sphinx": 57}, "alltitles": {"Test Instructions": [[16, "test-instructions"]], "Setup the FBGEMM_GPU Test Environment": [[16, "setup-the-fbgemm-gpu-test-environment"]], "Running FBGEMM_GPU Tests": [[16, "running-fbgemm-gpu-tests"]], "Testing with the CUDA Variant": [[16, "testing-with-the-cuda-variant"]], "Testing with the ROCm Variant": [[16, "testing-with-the-rocm-variant"]], "Running FBGEMM_GPU Benchmarks": [[16, "running-fbgemm-gpu-benchmarks"]], "Jagged Tensor Operators": [[17, "jagged-tensor-operators"], [6, "jagged-tensor-operators"], [18, "jagged-tensor-operators"]], "High Level Overview": [[17, "high-level-overview"]], "Jagged Tensor Format": [[17, "jagged-tensor-format"]], "Values": [[17, "values"]], "Offsets": [[17, "offsets"]], "Max Lengths": [[17, "max-lengths"]], "Jagged Tensor Example": [[17, "jagged-tensor-example"]], "Jagged Tensor Operations": [[17, "jagged-tensor-operations"]], "Arithmetic Operations": [[17, "arithmetic-operations"]], "Conversion Operations": [[17, "conversion-operations"]], "Jagged to Dense": [[17, "jagged-to-dense"]], "Dense to Jagged": [[17, "dense-to-jagged"]], "Combined Arithmetic + Conversion Operations": [[17, "combined-arithmetic-conversion-operations"]], "Installation Instructions": [[15, "installation-instructions"]], "Set Up CPU-Only Environment": [[15, "set-up-cpu-only-environment"]], "Set Up CUDA Environment": [[15, "set-up-cuda-environment"]], "Install NVIDIA Drivers": [[15, "install-nvidia-drivers"]], "Set Up the CUDA Docker Container and Conda Environment": [[15, "set-up-the-cuda-docker-container-and-conda-environment"]], "Install the CUDA Runtime": [[15, "install-the-cuda-runtime"]], "Set Up ROCm Environment": [[15, "set-up-rocm-environment"]], "Install AMDGPU Drivers": [[15, "install-amdgpu-drivers"]], "Set Up the ROCm Docker Container and Conda Environment": [[15, "set-up-the-rocm-docker-container-and-conda-environment"]], "Install Python Libraries": [[15, "install-python-libraries"]], "Install PyTorch": [[15, "install-pytorch"], [14, "install-pytorch"]], "Install Triton": [[15, "install-triton"]], "Install the FBGEMM_GPU Package": [[15, "install-the-fbgemm-gpu-package"]], "Install through PyTorch PIP": [[15, "install-through-pytorch-pip"]], "Install through Public PyPI": [[15, "install-through-public-pypi"]], "Post-Installation Checks": [[15, "post-installation-checks"]], "Undefined Symbols": [[15, "undefined-symbols"]], "Contact Us": [[20, "contact-us"]], "GitHub": [[20, "github"]], "Slack": [[20, "slack"]], "Table Batched Embedding (TBE) Operators": [[19, "module-fbgemm_gpu"]], "FBGEMM and FBGEMM_GPU Documentation Homepage": [[27, "fbgemm-and-fbgemm-gpu-documentation-homepage"]], "General Info": [[27, null]], "FBGEMM Development": [[27, null]], "FBGEMM_GPU Development": [[27, null]], "FBGEMM_GPU Overview": [[27, null]], "FBGEMM C++ API": [[27, null]], "FBGEMM_GPU C++ API": [[27, null]], "FBGEMM_GPU Python API": [[27, null]], "Adding Documentation to Python Code": [[25, "adding-documentation-to-python-code"]], "Todo": [[25, "id1"]], "Adding Documentation to Auto-Generated Python Code": [[25, "adding-documentation-to-auto-generated-python-code"]], "Documentation": [[24, "documentation"]], "General Documentation Guidelines": [[24, "general-documentation-guidelines"]], "Specific Documentation Guides": [[24, "specific-documentation-guides"]], "Building the Documentation": [[24, "building-the-documentation"]], "Set Up Build Environment": [[24, "set-up-build-environment"]], "Build FBGEMM_GPU": [[24, "build-fbgemm-gpu"]], "Set Up the Documentation Toolchain": [[24, "set-up-the-documentation-toolchain"]], "Build the Documentation": [[24, "build-the-documentation"]], "Linting the Documentation": [[24, "linting-the-documentation"]], "Deployment Preview": [[24, "deployment-preview"]], "Sphinx Documentation Pointers": [[26, "sphinx-documentation-pointers"]], "References Other Sections of the Documentation": [[26, "references-other-sections-of-the-documentation"]], "Referencing the Source Code": [[26, "referencing-the-source-code"]], "Adding LaTeX": [[26, "adding-latex"]], "Adding Graphs": [[26, "adding-graphs"]], "Build Instructions": [[2, "build-instructions"], [14, "build-instructions"]], "FBGEMM Requirements": [[2, "fbgemm-requirements"]], "Hardware Requirements": [[2, "hardware-requirements"]], "Software Dependencies": [[2, "software-dependencies"]], "asmjit": [[2, "asmjit"]], "cpuinfo": [[2, "cpuinfo"]], "GoogleTest": [[2, "googletest"]], "Set Up an Isolated Build Environment": [[2, "set-up-an-isolated-build-environment"], [14, "set-up-an-isolated-build-environment"]], "Install the Build Tools": [[2, "install-the-build-tools"], [14, "install-the-build-tools"]], "C/C++ Compiler": [[2, "c-c-compiler"]], "Other Build Tools": [[2, "other-build-tools"], [14, "other-build-tools"]], "Build the FBGEMM Library": [[2, "build-the-fbgemm-library"]], "Preparing the Build": [[2, "preparing-the-build"], [14, "preparing-the-build"]], "Building on Linux and macOS (CMake + GCC)": [[2, "building-on-linux-and-macos-cmake-gcc"]], "Build Issues with GCC 12+": [[2, "build-issues-with-gcc-12"]], "Building on Linux and macOS (CMake + Clang)": [[2, "building-on-linux-and-macos-cmake-clang"]], "Building on Linux (Bazel)": [[2, "building-on-linux-bazel"]], "Building on Windows": [[2, "building-on-windows"]], "TBE CPU Autovectorization": [[1, "tbe-cpu-autovectorization"]], "FP8/16/32 Autovec Implementation Methods": [[1, "fp8-16-32-autovec-implementation-methods"]], "Quantization Utilities": [[0, "quantization-utilities"]], "Reference Implementation Methods": [[0, "reference-implementation-methods"]], "AVX-2 Implementation Methods": [[0, "avx-2-implementation-methods"]], "AVX-512 Implementation Methods": [[0, "avx-512-implementation-methods"]], "Pooled Embeddings Operators": [[9, "pooled-embeddings-operators"]], "Merge Operators": [[9, "merge-operators"]], "Permutation Operators": [[9, "permutation-operators"]], "Sparse Data Operators": [[11, "sparse-data-operators"]], "CUDA Operators": [[11, "cuda-operators"], [10, "cuda-operators"], [3, "cuda-operators"], [6, "cuda-operators"], [7, "cuda-operators"], [13, "cuda-operators"]], "CPU Operators": [[11, "cpu-operators"], [10, "cpu-operators"], [3, "cpu-operators"], [6, "cpu-operators"], [7, "cpu-operators"]], "Quantization Operators": [[10, "quantization-operators"]], "Adding Documentation to C++ Code": [[23, "adding-documentation-to-c-code"]], "Contributing": [[21, "contributing"]], "Code of Conduct": [[21, "code-of-conduct"]], "Pull Requests": [[21, "pull-requests"]], "Contributor License Agreement (\u201cCLA\u201d)": [[21, "contributor-license-agreement-cla"]], "Issues": [[21, "issues"]], "License": [[21, "license"], [22, "license"]], "Combine Input Operators": [[5, "combine-input-operators"]], "Experimental Operators": [[4, "experimental-operators"]], "Attention Operators": [[4, "attention-operators"]], "Embedding Operators": [[3, "embedding-operators"]], "Layout Transformation Operators": [[7, "layout-transformation-operators"]], "CUDA Memory Operators": [[8, "cuda-memory-operators"]], "Table Batched Embedding Operators": [[12, "table-batched-embedding-operators"]], "Install Miniconda": [[14, "install-miniconda"]], "Set Up the Conda Environment": [[14, "set-up-the-conda-environment"]], "Set Up for CPU-Only Build": [[14, "set-up-for-cpu-only-build"]], "Set Up for CUDA / GenAI-Only Build": [[14, "set-up-for-cuda-genai-only-build"]], "CUDA Docker Image": [[14, "cuda-docker-image"]], "Install CUDA": [[14, "install-cuda"]], "Install cuDNN": [[14, "install-cudnn"]], "Install CUTLASS": [[14, "install-cutlass"]], "Set Up for ROCm Build": [[14, "set-up-for-rocm-build"]], "ROCm Docker Image": [[14, "rocm-docker-image"]], "Install ROCm": [[14, "install-rocm"]], "Install MIOpen": [[14, "install-miopen"]], "C/C++ Compiler (GCC)": [[14, "c-c-compiler-gcc"]], "C/C++ Compiler (Clang)": [[14, "c-c-compiler-clang"]], "Compiler Symlinks": [[14, "compiler-symlinks"]], "Installation Through Conda": [[14, "installation-through-conda"]], "Installation Through PyTorch PIP": [[14, "installation-through-pytorch-pip"]], "Post-Install Checks": [[14, "post-install-checks"]], "Install PyTorch-Triton": [[14, "install-pytorch-triton"]], "Other Pre-Build Setup": [[14, "other-pre-build-setup"]], "The Build Process": [[14, "the-build-process"]], "Set Wheel Build Variables": [[14, "set-wheel-build-variables"]], "CPU-Only Build": [[14, "cpu-only-build"]], "CUDA Build": [[14, "cuda-build"]], "GenAI-Only Build": [[14, "genai-only-build"]], "ROCm Build": [[14, "rocm-build"]], "Post-Build Checks (For Developers)": [[14, "post-build-checks-for-developers"]], "Undefined Symbols Check": [[14, "undefined-symbols-check"]], "GLIBC Version Compatibility Check": [[14, "glibc-version-compatibility-check"]], "SSD Embedding Operators": [[13, "ssd-embedding-operators"]]}, "indexentries": {"findminmax (c++ function)": [[0, "_CPPv410FindMinMaxPKfPfPf7int64_t"]], "floatorhalftofusednbitrowwisequantizedsbhalf (c++ function)": [[0, "_CPPv4I0E44FloatOrHalfToFusedNBitRowwiseQuantizedSBHalfviPK9InputType6size_tiPNSt7uint8_tE"]], "fusedquantizedequantize (c++ function)": [[0, "_CPPv4I0E23FusedQuantizeDequantizevPKfPfNSt7int64_tERK24TensorQuantizationParamsiif"]], "quantizegroupwise (c++ function)": [[0, "_CPPv4I0_8layout_tE17QuantizeGroupwisevPKfiiiiPKfPKNSt7int32_tEP1T"]], "xor128 (c++ function)": [[0, "_CPPv46Xor128v"]], "requantizeoutputprocessingavx2 (c++ function)": [[0, "_CPPv4I_b_b_23QuantizationGranularity_b_b0_bE30requantizeOutputProcessingAvx2vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE"]], "requantizeoutputprocessinggconvavx512 (c++ function)": [[0, "_CPPv4I_b_b_23QuantizationGranularity_b_b_i0E37requantizeOutputProcessingGConvAvx512vPNSt7uint8_tEPKNSt7int32_tERK12block_type_tiiRK22requantizationParams_tI9BIAS_TYPEE"]], "embeddingspmdmfp8_autovec (c++ function)": [[1, "_CPPv4I000E25EmbeddingSpMDMFP8_autovecbK7int64_tK7int64_tK7int64_tK7int64_tPK7uint8_tPK9IndexTypePK10OffsetTypePKfbP7OutTypebb7int64_t7int64_tiib"]], "embeddingspmdm_autovec (c++ function)": [[1, "_CPPv4I0000E22EmbeddingSpMDM_autovecbKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEKNSt7int64_tEPK6InTypePK9IndexTypePK10OffsetTypePKfbP7OutTypebbNSt7int64_tENSt7int64_tEbbbb"]], "bounds_check_indices_cuda (c++ function)": [[3, "_CPPv425bounds_check_indices_cudaR6TensorR6TensorR6Tensor7int64_tR6TensorRKNSt8optionalI6TensorEERKNSt8optionalI6TensorEEK7int64_t"]], "int_nbit_split_embedding_codegen_lookup_function (c++ function)": [[3, "_CPPv448int_nbit_split_embedding_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE"]], "int_nbit_split_embedding_codegen_lookup_function_cpu (c++ function)": [[3, "_CPPv452int_nbit_split_embedding_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEE"]], "int_nbit_split_embedding_uvm_caching_codegen_lookup_function (c++ function)": [[3, "_CPPv460int_nbit_split_embedding_uvm_caching_codegen_lookup_function6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE"]], "int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu (c++ function)": [[3, "_CPPv464int_nbit_split_embedding_uvm_caching_codegen_lookup_function_cpu6Tensor6Tensor6Tensor6Tensor6Tensor6Tensor7int64_t7int64_t7int64_t7int64_t7int64_t7int64_t6Tensor6Tensor7int64_tNSt8optionalI6TensorEE7int64_tNSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI7int64_tEENSt8optionalI6TensorEENSt8optionalI6TensorEENSt8optionalI6TensorEE"]], "pruned_array_lookup_cpu (c++ function)": [[3, "_CPPv423pruned_array_lookup_cpu6Tensor6Tensor6Tensor6Tensor"]], "pruned_array_lookup_cuda (c++ function)": [[3, "_CPPv424pruned_array_lookup_cuda6Tensor6Tensor6Tensor6Tensor"]], "pruned_hashmap_insert_unweighted_cpu (c++ function)": [[3, "_CPPv436pruned_hashmap_insert_unweighted_cpu6Tensor6Tensor6Tensor6Tensor6Tensor"]], "pruned_hashmap_lookup_cuda (c++ function)": [[3, "_CPPv426pruned_hashmap_lookup_cuda6Tensor6Tensor6Tensor6Tensor"]], "pruned_hashmap_lookup_unweighted_cpu (c++ function)": [[3, "_CPPv436pruned_hashmap_lookup_unweighted_cpu6Tensor6Tensor6Tensor6Tensor"]], "gqa_attn_splitk (c++ function)": [[4, "_CPPv415gqa_attn_splitkRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorEKdK7int64_tK7int64_tKbK7int64_t"]], "padding_fused_tbe_input_combine_cpu (c++ function)": [[5, "_CPPv435padding_fused_tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE7int64_t"]], "tbe_input_combine_cpu (c++ function)": [[5, "_CPPv421tbe_input_combine_cpuRKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKNSt6vectorIN2at6TensorEEERKN2at6TensorE"]], "batched_dense_vec_jagged_2d_mul (c++ function)": [[6, "_CPPv431batched_dense_vec_jagged_2d_mulRK6TensorRK6TensorRK6Tensor"]], "dense_to_jagged (c++ function)": [[6, "_CPPv415dense_to_jaggedRK6TensorRKNSt6vectorI6TensorEENSt8optionalIN2at6SymIntEEE"]], "jagged_1d_to_dense (c++ function)": [[6, "_CPPv418jagged_1d_to_dense6Tensor6TensorN3c106SymIntE7int64_t"]], "jagged_2d_to_dense (c++ function)": [[6, "_CPPv418jagged_2d_to_dense6Tensor6TensorN3c106SymIntE"]], "jagged_dense_elementwise_add (c++ function)": [[6, "_CPPv428jagged_dense_elementwise_addRK6TensorRKNSt6vectorI6TensorEERK6Tensor"]], "jagged_dense_elementwise_add_jagged_output (c++ function)": [[6, "_CPPv442jagged_dense_elementwise_add_jagged_outputRK6TensorRKNSt6vectorI6TensorEERK6Tensor"]], "jagged_dense_elementwise_add_jagged_output_cuda (c++ function)": [[6, "_CPPv447jagged_dense_elementwise_add_jagged_output_cudaRK6TensorRKNSt6vectorI6TensorEERK6Tensor"]], "jagged_dense_elementwise_mul (c++ function)": [[6, "_CPPv428jagged_dense_elementwise_mulRK6TensorRKNSt6vectorI6TensorEERK6Tensor"]], "jagged_to_padded_dense (c++ function)": [[6, "_CPPv422jagged_to_padded_denseRK6TensorRKNSt6vectorI6TensorEEKN3c1014SymIntArrayRefEKd"]], "jagged_to_padded_dense_forward (c++ function)": [[6, "_CPPv430jagged_to_padded_dense_forwardRK6TensorRKNSt6vectorI6TensorEEN3c1014SymIntArrayRefEKd"]], "recat_embedding_grad_output_cuda (c++ function)": [[7, "_CPPv432recat_embedding_grad_output_cuda6TensorRKNSt6vectorI7int64_tEE"]], "recat_embedding_grad_output_mixed_d_batch_cuda (c++ function)": [[7, "_CPPv446recat_embedding_grad_output_mixed_D_batch_cudaRK6TensorRK6TensorRK6Tensor"]], "recat_embedding_grad_output_mixed_d_cpu (c++ function)": [[7, "_CPPv439recat_embedding_grad_output_mixed_D_cpuRK6TensorRKNSt6vectorI7int64_tEE"]], "recat_embedding_grad_output_mixed_d_cuda (c++ function)": [[7, "_CPPv440recat_embedding_grad_output_mixed_D_cudaRK6TensorRKNSt6vectorI7int64_tEE"]], "is_uvm_tensor (c++ function)": [[8, "_CPPv413is_uvm_tensorRK6Tensor"]], "new_host_mapped_tensor (c++ function)": [[8, "_CPPv422new_host_mapped_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE"]], "new_managed_tensor (c++ function)": [[8, "_CPPv418new_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE"]], "new_managed_tensor_meta (c++ function)": [[8, "_CPPv423new_managed_tensor_metaRK6TensorRKNSt6vectorINSt7int64_tEEE"]], "new_unified_tensor (c++ function)": [[8, "_CPPv418new_unified_tensorRK6TensorRKNSt6vectorINSt7int64_tEEEb"]], "new_unified_tensor_meta (c++ function)": [[8, "_CPPv423new_unified_tensor_metaRK6TensorRKNSt6vectorINSt7int64_tEEEb"]], "new_vanilla_managed_tensor (c++ function)": [[8, "_CPPv426new_vanilla_managed_tensorRK6TensorRKNSt6vectorINSt7int64_tEEE"]], "uvm_cuda_mem_advise (c++ function)": [[8, "_CPPv419uvm_cuda_mem_adviseRK6Tensor7int64_t"]], "uvm_cuda_mem_prefetch_async (c++ function)": [[8, "_CPPv427uvm_cuda_mem_prefetch_asyncRK6TensorNSt8optionalI6TensorEE"]], "uvm_mem_advice_dont_fork (c++ function)": [[8, "_CPPv424uvm_mem_advice_dont_forkRK6Tensor"]], "uvm_storage (c++ function)": [[8, "_CPPv411uvm_storageRK6Tensor"]], "uvm_to_cpu (c++ function)": [[8, "_CPPv410uvm_to_cpuRK6Tensor"]], "uvm_to_cpu_clone (c++ function)": [[8, "_CPPv416uvm_to_cpu_cloneRK6Tensor"]], "uvm_to_device (c++ function)": [[8, "_CPPv413uvm_to_deviceRK6TensorRK6Tensor"]], "all_to_one_device (c++ function)": [[9, "_CPPv417all_to_one_deviceNSt6vectorIN2at6TensorEEEN2at6DeviceE"]], "permute_pooled_embs_auto_grad (c++ function)": [[9, "_CPPv429permute_pooled_embs_auto_gradRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor"]], "permute_pooled_embs_auto_grad_cpu (c++ function)": [[9, "_CPPv433permute_pooled_embs_auto_grad_cpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor"]], "permute_pooled_embs_auto_grad_gpu (c++ function)": [[9, "_CPPv433permute_pooled_embs_auto_grad_gpuRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor"]], "permute_pooled_embs_auto_grad_split_cpu (c++ function)": [[9, "_CPPv439permute_pooled_embs_auto_grad_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE"]], "permute_pooled_embs_auto_grad_split_gpu (c++ function)": [[9, "_CPPv439permute_pooled_embs_auto_grad_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE"]], "permute_pooled_embs_cpu_impl (c++ function)": [[9, "_CPPv428permute_pooled_embs_cpu_implRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKb"]], "permute_pooled_embs_split_cpu (c++ function)": [[9, "_CPPv429permute_pooled_embs_split_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE"]], "permute_pooled_embs_split_gpu (c++ function)": [[9, "_CPPv429permute_pooled_embs_split_gpuRKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorERKN2at6TensorE"]], "fp8quantizedtofloat_ref (c++ function)": [[10, "_CPPv423FP8QuantizedToFloat_refPCK7uint8_tK6size_tK6size_tPCfKiKi"]], "fp8rowwise_to_float_cpu (c++ function)": [[10, "_CPPv423FP8rowwise_to_float_cpuRK6TensorbK7int64_t"]], "floattofp8quantized_ref (c++ function)": [[10, "_CPPv423FloatToFP8Quantized_refPCKfK6size_tK6size_tPC7uint8_tKiKiKd"]], "_fp8rowwise_to_float_gpu (c++ function)": [[10, "_CPPv424_FP8rowwise_to_float_gpuRKN2at6TensorEbK7int64_t"]], "_bfloat16_to_float_gpu (c++ function)": [[10, "_CPPv422_bfloat16_to_float_gpuRKN2at6TensorE"]], "_float_to_fp8rowwise_gpu (c++ function)": [[10, "_CPPv424_float_to_FP8rowwise_gpuRK6TensorKb"]], "_float_to_bfloat16_gpu (c++ function)": [[10, "_CPPv422_float_to_bfloat16_gpuRKN2at6TensorE"]], "_float_to_fused8bitrowwise_cpu_out (c++ function)": [[10, "_CPPv434_float_to_fused8bitrowwise_cpu_outR6TensorRK6Tensor"]], "_float_to_fused8bitrowwise_gpu (c++ function)": [[10, "_CPPv430_float_to_fused8bitrowwise_gpuRK6Tensor"]], "_float_to_fusednbitrowwise_gpu (c++ function)": [[10, "_CPPv430_float_to_fusednbitrowwise_gpuRK6TensorK7int64_t"]], "_float_to_hfp8_gpu (c++ function)": [[10, "_CPPv418_float_to_hfp8_gpuRKN2at6TensorEK7int64_tK7int64_tKd"]], "_float_to_msfp_gpu (c++ function)": [[10, "_CPPv418_float_to_msfp_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_tK7int64_tKdKd"]], "_float_to_paddedfp8rowwise_gpu (c++ function)": [[10, "_CPPv430_float_to_paddedFP8rowwise_gpuRK6TensorKbK7int64_t"]], "_fused8bitrowwise_to_float_cpu_out (c++ function)": [[10, "_CPPv434_fused8bitrowwise_to_float_cpu_outR6TensorRK6Tensor"]], "_fused8bitrowwise_to_float_gpu (c++ function)": [[10, "_CPPv430_fused8bitrowwise_to_float_gpuRKN2at6TensorE"]], "_fused8bitrowwise_to_float_mixed_dim_gpu (c++ function)": [[10, "_CPPv440_fused8bitrowwise_to_float_mixed_dim_gpuRKN2at6TensorERKN2at6TensorEK7int64_t"]], "_fused8bitrowwise_to_half_gpu (c++ function)": [[10, "_CPPv429_fused8bitrowwise_to_half_gpuRKN2at6TensorE"]], "_fused8bitrowwise_to_single_or_half_precision_gpu (c++ function)": [[10, "_CPPv449_fused8bitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tKbKb"]], "_fusednbitrowwise_to_float_gpu (c++ function)": [[10, "_CPPv430_fusednbitrowwise_to_float_gpuRKN2at6TensorEK7int64_t"]], "_fusednbitrowwise_to_half_gpu (c++ function)": [[10, "_CPPv429_fusednbitrowwise_to_half_gpuRKN2at6TensorEK7int64_t"]], "_fusednbitrowwise_to_single_or_half_precision_gpu (c++ function)": [[10, "_CPPv449_fusednbitrowwise_to_single_or_half_precision_gpuRKN2at6TensorEK7int64_tK7int64_t"]], "_half_to_fused8bitrowwise_gpu (c++ function)": [[10, "_CPPv429_half_to_fused8bitrowwise_gpuRK6Tensor"]], "_half_to_fusednbitrowwise_gpu (c++ function)": [[10, "_CPPv429_half_to_fusednbitrowwise_gpuRKN2at6TensorEK7int64_t"]], "_hfp8_to_float_gpu (c++ function)": [[10, "_CPPv418_hfp8_to_float_gpuRKN2at6TensorEK7int64_tK7int64_t"]], "_msfp_to_float_gpu (c++ function)": [[10, "_CPPv418_msfp_to_float_gpuRKN2at6TensorEK7int64_tK7int64_tK7int64_t"]], "_paddedfp8rowwise_to_float_gpu (c++ function)": [[10, "_CPPv430_paddedFP8rowwise_to_float_gpuRKN2at6TensorEKbK7int64_tK7int64_tK7int64_t"]], "_single_or_half_precision_to_fused8bitrowwise_gpu (c++ function)": [[10, "_CPPv449_single_or_half_precision_to_fused8bitrowwise_gpuRK6Tensor"]], "_single_or_half_precision_to_fusednbitrowwise_gpu (c++ function)": [[10, "_CPPv449_single_or_half_precision_to_fusednbitrowwise_gpuRK6TensorK7int64_t"]], "float_or_half_to_fused8bitrowwise_cpu (c++ function)": [[10, "_CPPv437float_or_half_to_fused8bitrowwise_cpuRK6Tensor"]], "float_to_fp8rowwise_cpu (c++ function)": [[10, "_CPPv423float_to_FP8rowwise_cpuRK6Tensorb"]], "float_to_fused8bitrowwise_cpu (c++ function)": [[10, "_CPPv429float_to_fused8bitrowwise_cpuRK6Tensor"]], "fused8bitrowwise_to_float_cpu (c++ function)": [[10, "_CPPv429fused8bitrowwise_to_float_cpuRK6Tensor"]], "fused8bitrowwise_to_float_or_half_cpu (c++ function)": [[10, "_CPPv437fused8bitrowwise_to_float_or_half_cpuRK6TensorK7int64_tKbKb"]], "fused8bitrowwise_to_half_cpu (c++ function)": [[10, "_CPPv428fused8bitrowwise_to_half_cpuRK6Tensor"]], "fusednbitrowwise_sbfront_to_float_cpu (c++ function)": [[10, "_CPPv437fusednbitrowwise_sbfront_to_float_cpuRK6TensorK7int64_t"]], "fusednbitrowwise_to_float_cpu (c++ function)": [[10, "_CPPv429fusednbitrowwise_to_float_cpuRK6TensorK7int64_t"]], "fusednbitrowwise_to_float_or_half_cpu (c++ function)": [[10, "_CPPv437fusednbitrowwise_to_float_or_half_cpuRK6TensorK7int64_tK7int64_t"]], "fusednbitrowwise_to_half_cpu (c++ function)": [[10, "_CPPv428fusednbitrowwise_to_half_cpuRK6TensorK7int64_t"]], "half_to_fused8bitrowwise_cpu (c++ function)": [[10, "_CPPv428half_to_fused8bitrowwise_cpuRK6Tensor"]], "expand_into_jagged_permute_cuda (c++ function)": [[11, "_CPPv431expand_into_jagged_permute_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_t"]], "generic_histogram_binning_calibration_by_feature_cpu (c++ function)": [[11, "_CPPv452generic_histogram_binning_calibration_by_feature_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorE7int64_tRKN2at6TensorERKN2at6TensorERKN2at6TensorEd7int64_td"]], "histogram_binning_calibration_cpu (c++ function)": [[11, "_CPPv433histogram_binning_calibration_cpuRKN2at6TensorERKN2at6TensorERKN2at6TensorEddd7int64_td"]], "direct_mapped_lru_cache_populate_byte_cuda (c++ function)": [[12, "_CPPv442direct_mapped_lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE"]], "direct_mapped_lxu_cache_lookup_cuda (c++ function)": [[12, "_CPPv435direct_mapped_lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE"]], "get_unique_indices_cuda (c++ function)": [[12, "_CPPv423get_unique_indices_cudaRKN2at6TensorEK7int64_tKb"]], "get_unique_indices_with_inverse_cuda (c++ function)": [[12, "_CPPv436get_unique_indices_with_inverse_cudaRKN2at6TensorEK7int64_tKbKb"]], "host_lxu_cache_slot (c++ function)": [[12, "_CPPv419host_lxu_cache_slot7int64_t7int64_t"]], "lfu_cache_populate_byte_cuda (c++ function)": [[12, "_CPPv428lfu_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t"]], "lfu_cache_populate_cuda (c++ function)": [[12, "_CPPv423lfu_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEb"]], "linearize_cache_indices_cuda (c++ function)": [[12, "_CPPv428linearize_cache_indices_cudaRKN2at6TensorERKN2at6TensorERKN2at6TensorERKNSt8optionalIN2at6TensorEEEK7int64_tK7int64_t"]], "linearize_cache_indices_from_row_idx_cuda (c++ function)": [[12, "_CPPv441linearize_cache_indices_from_row_idx_cudaN2at6TensorEN2at6TensorEN2at6TensorE"]], "lru_cache_find_uncached_cuda (c++ function)": [[12, "_CPPv428lru_cache_find_uncached_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tN2at6TensorEbN2at6TensorEbN2at6TensorEKb"]], "lru_cache_populate_byte_cuda (c++ function)": [[12, "_CPPv428lru_cache_populate_byte_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEE"]], "lru_cache_populate_cuda (c++ function)": [[12, "_CPPv423lru_cache_populate_cudaN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEbbNSt8optionalIN2at6TensorEEEbNSt8optionalIN2at6TensorEEE"]], "lxu_cache_flush_cuda (c++ function)": [[12, "_CPPv420lxu_cache_flush_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_tN2at6TensorEN2at6TensorEb"]], "lxu_cache_locations_update_cuda (c++ function)": [[12, "_CPPv431lxu_cache_locations_update_cudaN2at6TensorEN2at6TensorENSt8optionalIN2at6TensorEEE"]], "lxu_cache_locking_counter_decrement_cuda (c++ function)": [[12, "_CPPv440lxu_cache_locking_counter_decrement_cudaN2at6TensorEN2at6TensorE"]], "lxu_cache_lookup_cuda (c++ function)": [[12, "_CPPv421lxu_cache_lookup_cudaN2at6TensorEN2at6TensorE7int64_tbNSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEENSt8optionalIN2at6TensorEEE"]], "reset_weight_momentum_cuda (c++ function)": [[12, "_CPPv426reset_weight_momentum_cudaN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorEN2at6TensorE7int64_t"]], "rocksdbwritemode (c++ enum)": [[13, "_CPPv416RocksdbWriteMode"]], "rocksdbwritemode::bwd_l1_cnflct_miss_write_back (c++ enumerator)": [[13, "_CPPv4N16RocksdbWriteMode29BWD_L1_CNFLCT_MISS_WRITE_BACKE"]], "rocksdbwritemode::flush (c++ enumerator)": [[13, "_CPPv4N16RocksdbWriteMode5FLUSHE"]], "rocksdbwritemode::fwd_l1_eviction (c++ enumerator)": [[13, "_CPPv4N16RocksdbWriteMode15FWD_L1_EVICTIONE"]], "rocksdbwritemode::fwd_rocksdb_read (c++ enumerator)": [[13, "_CPPv4N16RocksdbWriteMode16FWD_ROCKSDB_READE"]], "compact_indices_cuda (c++ function)": [[13, "_CPPv420compact_indices_cudaNSt6vectorI6TensorEE6TensorNSt6vectorI6TensorEE6Tensor6Tensor"]], "cuda_callback_func (c++ function)": [[13, "_CPPv418cuda_callback_func12cudaStream_t11cudaError_tPv"]], "hash_shard (c++ function)": [[13, "_CPPv410hash_shard7int64_t6size_t"]], "kv_db::cachecontext (c++ class)": [[13, "_CPPv4N5kv_db12CacheContextE"]], "kv_db::embeddingkvdb (c++ class)": [[13, "_CPPv4N5kv_db13EmbeddingKVDBE"]], "kv_db::queueitem (c++ struct)": [[13, "_CPPv4N5kv_db9QueueItemE"]], "l2_cache::cachelibcache (c++ class)": [[13, "_CPPv4N8l2_cache13CacheLibCacheE"]], "masked_index_put_cuda (c++ function)": [[13, "_CPPv421masked_index_put_cuda6Tensor6Tensor6Tensor6TensorKbK7int64_t"]], "masked_index_select_cuda (c++ function)": [[13, "_CPPv424masked_index_select_cuda6Tensor6Tensor6Tensor6TensorKbK7int64_t"]], "ps::embeddingparameterserver (c++ class)": [[13, "_CPPv4N2ps24EmbeddingParameterServerE"]], "ssd::embeddingrocksdb (c++ class)": [[13, "_CPPv4N3ssd16EmbeddingRocksDBE"]], "ssd_generate_row_addrs_cuda (c++ function)": [[13, "_CPPv427ssd_generate_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor"]], "ssd_update_row_addrs_cuda (c++ function)": [[13, "_CPPv425ssd_update_row_addrs_cudaRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6TensorRK6Tensor"]], "batched_dense_vec_jagged_2d_mul() (in module torch.ops.fbgemm)": [[18, "torch.ops.fbgemm.batched_dense_vec_jagged_2d_mul"]], "dense_to_jagged() (in module torch.ops.fbgemm)": [[18, "torch.ops.fbgemm.dense_to_jagged"]], "jagged_1d_to_dense() (in module torch.ops.fbgemm)": [[18, "torch.ops.fbgemm.jagged_1d_to_dense"]], "jagged_2d_to_dense() (in module torch.ops.fbgemm)": [[18, "torch.ops.fbgemm.jagged_2d_to_dense"]], "jagged_dense_dense_elementwise_add_jagged_output() (in module torch.ops.fbgemm)": [[18, "torch.ops.fbgemm.jagged_dense_dense_elementwise_add_jagged_output"]], "jagged_dense_elementwise_add() (in module torch.ops.fbgemm)": [[18, "torch.ops.fbgemm.jagged_dense_elementwise_add"]], "jagged_dense_elementwise_add_jagged_output() (in module torch.ops.fbgemm)": [[18, "torch.ops.fbgemm.jagged_dense_elementwise_add_jagged_output"]], "jagged_dense_elementwise_mul() (in module torch.ops.fbgemm)": [[18, "torch.ops.fbgemm.jagged_dense_elementwise_mul"]], "jagged_to_padded_dense() (in module torch.ops.fbgemm)": [[18, "torch.ops.fbgemm.jagged_to_padded_dense"]], "stacked_jagged_1d_to_dense() (in module torch.ops.fbgemm)": [[18, "torch.ops.fbgemm.stacked_jagged_1d_to_dense"]], "stacked_jagged_2d_to_dense() (in module torch.ops.fbgemm)": [[18, "torch.ops.fbgemm.stacked_jagged_2d_to_dense"]], "splittablebatchedembeddingbagscodegen() (in module fbgemm_gpu.split_table_batched_embeddings_ops)": [[19, "fbgemm_gpu.split_table_batched_embeddings_ops.SplitTableBatchedEmbeddingBagsCodegen"]], "fbgemm_gpu": [[19, "module-fbgemm_gpu"]], "module": [[19, "module-fbgemm_gpu"]], "example_method (c++ function)": [[23, "_CPPv4I0_NSt6size_tEE14example_method7int32_t1Tf"]], "example_method() (in module fbgemm_gpu.docs.examples)": [[25, "fbgemm_gpu.docs.examples.example_method"]]}}) \ No newline at end of file