diff --git a/examples/Project.toml b/examples/Project.toml index af61e7e90..415c0ec18 100644 --- a/examples/Project.toml +++ b/examples/Project.toml @@ -1,5 +1,8 @@ [deps] AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c" +Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" +BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" Chairmarks = "0ca39b1e-fe0b-4e98-acfc-b1656634c4de" EinExprs = "b1794770-133b-4de1-afb4-526377e9f4c5" Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9" diff --git a/examples/cuda.ipynb b/examples/cuda.ipynb new file mode 100644 index 000000000..ba8b9c97d --- /dev/null +++ b/examples/cuda.ipynb @@ -0,0 +1,183 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# CUDA tensor network contraction demo" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Requirements\n", + "- The system must have a CUDA GPU available." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "┌ Warning: You are using a non-official build of Julia. This may cause issues with CUDA.jl.\n", + "│ Please consider using an official build from https://julialang.org/downloads/.\n", + "└ @ CUDA /home/bsc/bsc021386/.julia/packages/CUDA/75aiI/src/initialization.jl:180\n" + ] + } + ], + "source": [ + "using Tenet\n", + "using EinExprs\n", + "using Adapt\n", + "using CUDA\n", + "using BenchmarkTools" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create a random tensor network and find its contraction path:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "SizedEinExpr{Symbol}(EinExpr{Symbol}(Symbol[], EinExpr{Symbol}[EinExpr{Symbol}([:P, :A, :c, :Y], EinExpr{Symbol}[]), EinExpr{Symbol}([:P, :A, :c, :Y], EinExpr{Symbol}[EinExpr{Symbol}([:H, :U, :F, :Z, :X, :O, :A, :c], EinExpr{Symbol}[]), EinExpr{Symbol}([:H, :P, :U, :F, :Z, :X, :O, :Y], EinExpr{Symbol}[EinExpr{Symbol}([:E, :K, :U, :V, :I, :a, :F, :Z, :b], EinExpr{Symbol}[]), EinExpr{Symbol}([:E, :H, :P, :K, :V, :I, :a, :b, :X, :O, :Y], EinExpr{Symbol}[EinExpr{Symbol}([:D, :E, :H, :M, :P], EinExpr{Symbol}[]), EinExpr{Symbol}([:D, :M, :K, :V, :I, :a, :b, :X, :O, :Y], EinExpr{Symbol}[EinExpr{Symbol}([:D, :J, :G, :a, :O, :T, :Y], EinExpr{Symbol}[EinExpr{Symbol}([:D, :J, :R], EinExpr{Symbol}[]), EinExpr{Symbol}([:R, :G, :a, :O, :T, :Y], EinExpr{Symbol}[])]), EinExpr{Symbol}([:J, :M, :G, :K, :V, :I, :b, :X, :T], EinExpr{Symbol}[EinExpr{Symbol}([:M, :G, :S, :K, :Q], EinExpr{Symbol}[]), EinExpr{Symbol}([:J, :S, :Q, :V, :I, :b, :X, :T], EinExpr{Symbol}[EinExpr{Symbol}([:V, :b, :N, :C, :L, :X, :B], EinExpr{Symbol}[]), EinExpr{Symbol}([:J, :S, :Q, :I, :N, :C, :L, :B, :T], EinExpr{Symbol}[EinExpr{Symbol}([:Q, :N, :C, :B, :d, :W], EinExpr{Symbol}[]), EinExpr{Symbol}([:J, :S, :I, :L, :d, :W, :T], EinExpr{Symbol}[])])])])])])])])]), Dict(:b => 9, :F => 2, :D => 7, :B => 3, :V => 2, :c => 9, :K => 4, :S => 8, :X => 9, :E => 5…))" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Initialize random tensor network\n", + "regularity = 6\n", + "ntensors = 10\n", + "tn = rand(TensorNetwork, ntensors, regularity)\n", + "path = einexpr(tn; optimizer=Exhaustive())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Transform the tensors' data types to `CuArray`s:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "TensorNetwork (#tensors=10, #inds=30)" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "cudatn = adapt(CuArray, tn)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Benchmark CUDA tensor network contraction:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "BenchmarkTools.Trial: 1355 samples with 1 evaluation.\n", + " Range \u001b[90m(\u001b[39m\u001b[36m\u001b[1mmin\u001b[22m\u001b[39m … \u001b[35mmax\u001b[39m\u001b[90m): \u001b[39m\u001b[36m\u001b[1m854.907 μs\u001b[22m\u001b[39m … \u001b[35m 9.324 ms\u001b[39m \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmin … max\u001b[90m): \u001b[39m0.00% … 36.81%\n", + " Time \u001b[90m(\u001b[39m\u001b[34m\u001b[1mmedian\u001b[22m\u001b[39m\u001b[90m): \u001b[39m\u001b[34m\u001b[1m 3.749 ms \u001b[22m\u001b[39m\u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmedian\u001b[90m): \u001b[39m0.00%\n", + " Time \u001b[90m(\u001b[39m\u001b[32m\u001b[1mmean\u001b[22m\u001b[39m ± \u001b[32mσ\u001b[39m\u001b[90m): \u001b[39m\u001b[32m\u001b[1m 3.673 ms\u001b[22m\u001b[39m ± \u001b[32m796.963 μs\u001b[39m \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmean ± σ\u001b[90m): \u001b[39m0.85% ± 3.09%\n", + "\n", + " \u001b[39m▂\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[34m█\u001b[39m\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \n", + " \u001b[39m█\u001b[39m▃\u001b[39m▁\u001b[39m▃\u001b[39m▁\u001b[39m▃\u001b[39m▃\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▃\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▃\u001b[39m▃\u001b[39m▄\u001b[39m▃\u001b[39m▇\u001b[39m▆\u001b[39m▁\u001b[39m▁\u001b[39m▆\u001b[34m█\u001b[39m\u001b[39m▃\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▇\u001b[39m \u001b[39m▇\n", + " 855 μs\u001b[90m \u001b[39m\u001b[90mHistogram: \u001b[39m\u001b[90m\u001b[1mlog(\u001b[22m\u001b[39m\u001b[90mfrequency\u001b[39m\u001b[90m\u001b[1m)\u001b[22m\u001b[39m\u001b[90m by time\u001b[39m 7.1 ms \u001b[0m\u001b[1m<\u001b[22m\n", + "\n", + " Memory estimate\u001b[90m: \u001b[39m\u001b[33m251.72 KiB\u001b[39m, allocs estimate\u001b[90m: \u001b[39m\u001b[33m4344\u001b[39m." + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "@benchmark contract(cudatn; path)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Benchmark regular tensor network contraction:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "BenchmarkTools.Trial: 12 samples with 1 evaluation.\n", + " Range \u001b[90m(\u001b[39m\u001b[36m\u001b[1mmin\u001b[22m\u001b[39m … \u001b[35mmax\u001b[39m\u001b[90m): \u001b[39m\u001b[36m\u001b[1m325.174 ms\u001b[22m\u001b[39m … \u001b[35m966.103 ms\u001b[39m \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmin … max\u001b[90m): \u001b[39m 1.81% … 57.31%\n", + " Time \u001b[90m(\u001b[39m\u001b[34m\u001b[1mmedian\u001b[22m\u001b[39m\u001b[90m): \u001b[39m\u001b[34m\u001b[1m334.514 ms \u001b[22m\u001b[39m\u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmedian\u001b[90m): \u001b[39m 1.78%\n", + " Time \u001b[90m(\u001b[39m\u001b[32m\u001b[1mmean\u001b[22m\u001b[39m ± \u001b[32mσ\u001b[39m\u001b[90m): \u001b[39m\u001b[32m\u001b[1m433.969 ms\u001b[22m\u001b[39m ± \u001b[32m194.491 ms\u001b[39m \u001b[90m┊\u001b[39m GC \u001b[90m(\u001b[39mmean ± σ\u001b[90m): \u001b[39m19.64% ± 19.62%\n", + "\n", + " \u001b[39m█\u001b[34m \u001b[39m\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[32m \u001b[39m\u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \u001b[39m \n", + " \u001b[39m█\u001b[34m▄\u001b[39m\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▄\u001b[39m▁\u001b[39m▁\u001b[32m▁\u001b[39m\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▄\u001b[39m▁\u001b[39m▄\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▁\u001b[39m▄\u001b[39m \u001b[39m▁\n", + " 325 ms\u001b[90m Histogram: frequency by time\u001b[39m 966 ms \u001b[0m\u001b[1m<\u001b[22m\n", + "\n", + " Memory estimate\u001b[90m: \u001b[39m\u001b[33m926.79 MiB\u001b[39m, allocs estimate\u001b[90m: \u001b[39m\u001b[33m2786\u001b[39m." + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "@benchmark contract(tn; path)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Julia 1.10.0", + "language": "julia", + "name": "julia-1.10" + }, + "language_info": { + "file_extension": ".jl", + "mimetype": "application/julia", + "name": "julia", + "version": "1.10.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}