From 2d761c2e08729d54dacacbbc3a653623be5dbbf8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20=C5=9Amierzchalski?= <53766192+tomsmierz@users.noreply.github.com> Date: Wed, 3 Apr 2024 13:46:21 +0200 Subject: [PATCH] Master to be -> master (#16) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * clean up types * clean up types * rm AbstracDitcs * rm Abstract from Matrix * rm AbstracDitcs and add zeros * clean up tensors more * clean up sparse * clean up virtual * clean up linear alg ext * simplify lin alg ext * comments * transpose * corewct types * simplify more * transpose * clean up #1 * clean up #2 * clean up #3 * clean up #4 * add canonise.jl * formatt code * clean up #5 * remove trans * add new func * clean up * start cleaning gauges * clean up * add comments * output env variational * MpoTensor * structs * MpoTensor * fix issues with types * fix nothing type * length for MpoTensor to be verified * clean up * fix subtyping in tensors * clean up types * add N explicitte in some places * rm lenght * MpoTensor * transpose * clea up * ideas * mv ideas to attic * make MpoTensor external const * renaming * add tsvd example * zipper * improve bench * clean up exports * add comments * add basic tests for QMps and Canonise * clean up stuff * measure_memory * add measure_memory * add format_bytes * zipper * sparse zipper * add eltype for mps / mps * fix some issues * zipper psvd * zipper psvd * zipper sparse svd * tsvd * change svd_corner_matrix * correct Adjoint for CornerTenso * to cuda * device * device * edvice * diag * fix Diagona * fix device * fix array 2 * fix diag 3 * works * add comments * toward cuda * fix kwargs * cuda contractons * cuda central * cleanup cuda * patch array centraltensor * patching dense central tensor * vector memory * start cleaning * add GPU flags, clean up * clean up * zipper QMps on GPU * start working on qr * qr on GPU works * add basic tests for QMps * more basic tests * aux * change site * clean up * clean up CPU <-> GPU transfer * gauges on gpu * change types in gauges * reordering * clean up * fix bug * clean up * fix * rm unnecassry permusts * dense site * clean up * clean up * zipper central * virtual * test * central * cuproj * aux_saprse * add @view in site * clean up * add bench for multi gpu * add examples * add commenst * add @inbounds in some places * towards cpu contractions * to cpu * benchmarks for memoizations of cusparse * added comment explaining convention * SparseCSC * move stuff to attic * clean up a bit * clean types a bit * added measure_memory for memoization caches * added handling of sparse matrices * hotfix in sparse matix CSR * clean up * clean up 2 * fix types * sitetensor * SiteTensor Sparse * VirtualTensor * poolofprojectors * memory * copied from "leg-ordering" branch * start working on SCR problem * project_ket_on_bra virtual * add SparseArrays to test cuSparse * virtual update_env_left * add CUDA.:* (commented) * virtual update_env_right * cirtual update_reduced_env_right * add bench for CSR * add bench * site * info * reduced_env_right sitetensor * measure_spectrum * input mps is in the correct canonical form * test measure_spectrum * virtual * test measure spectrum * move_to_CPU * schmidt * view virtual * rand qmpo * toward allocation * allocate site * site working * speed up virtual (update_env_right) * virtual alloc * virtual no cusparse * virtual no cusparse * proj * site * test * working site * cleanup * new zipper * new zipper * stable zipper * building blocks of new zipper-variational * fix type * fix typo * start new zipper * new env * new zipper * new_zipper clear env * kill attic * fix new zipper * args in zipper * clean up * repeat psvd * sparse * corner matrix for virtual * virtualtest * restart virtual * env_levt_v1 * new virtual * my_batched_mul * virtual with 2 step projectors * WIP * cases without central tensor * fix update_reduced_env_right virtual * central batched_mul * measure_memory(EnvironmentMixed) * change fg to cl_h * PoolOfProjectors in clustered_hamiltonian * clean up tests * split long lines * add docs * docs * clean up toml * clean up * add the docs * add flag for RMF in zipper * add docs * add depth of sweep in zipper * Rename aux.jl to utils.jl, becouse aux.* is restricted filename in windows * Update SpinGlassTensors.jl, changed aux to utils * moved projectors.jl from SpinGlassNetworks * remove networks from Project * moved test * add projectors to tests * update julia * added projectors.jl to runtest * moved rank_reveal from SpinGlassNetworks * bugfix * update to CUDA 4.4.1, TensorOperations 4 and TensorCast 0.4. SEE README * reformat * fix ci * onlcy set self-hosted * fix nothing comparsion (#18) * Fixes creation of sparse matrices (#19) * change CUDA sparse to CSR, rename function createing sparse matrices * up * update ci runner * fix runs-on ci * fix ci * set proper rev * add flags * fix project in TransmuteDims * fix typo --------- Co-authored-by: bartekGardas Co-authored-by: marekrams Co-authored-by: annamariadziubyna Co-authored-by: annamariadziubyna <73058800+annamariadziubyna@users.noreply.github.com> Co-authored-by: Łukasz Pawela Co-authored-by: Łukasz Pawela <3093117+lpawela@users.noreply.github.com> --- .github/workflows/CI.yml | 16 +- Project.toml | 31 ++- README.md | 1 + bench_mm.jl | 29 ++ benchmark/args.jl | 12 + benchmark/cuda_matrix_mul.jl | 29 ++ benchmark/gpu_slicing.jl | 15 ++ benchmark/memoization_cusparse.jl | 46 ++++ benchmark/memoization_test.jl | 38 +++ benchmark/mulit_gpu.jl | 25 ++ benchmark/mulit_gpu2.jl | 10 + benchmark/psvd.jl | 86 ++++++ benchmark/qr.jl | 18 ++ benchmark/sparse_mul.jl | 24 ++ benchmark/sparse_mul_bench.jl | 31 +++ benchmark/svd.jl | 73 +++++ benchmark/svd2.jl | 102 +++++++ docs/make.jl | 16 +- docs/src/api.md | 27 +- docs/src/index.md | 4 +- docs/src/mpo.md | 5 + src/SpinGlassTensors.jl | 42 ++- src/base.jl | 318 ++++++++-------------- src/compressions.jl | 240 ----------------- src/contractions.jl | 151 ----------- src/contractions/central.jl | 111 ++++++++ src/contractions/dense.jl | 204 ++++++++++++++ src/contractions/diagonal.jl | 21 ++ src/contractions/site.jl | 195 ++++++++++++++ src/contractions/sparse.jl | 70 +++++ src/contractions/virtual.jl | 434 ++++++++++++++++++++++++++++++ src/environment.jl | 293 ++++++++++++++++++++ src/gauges.jl | 155 +++++++++++ src/identities.jl | 45 ---- src/linear_algebra_ext.jl | 85 +++--- src/mps/base.jl | 110 ++++++++ src/mps/canonise.jl | 98 +++++++ src/mps/dot.jl | 53 ++++ src/mps/identity.jl | 34 +++ src/mps/rand.jl | 45 ++++ src/mps/transpose.jl | 21 ++ src/mps/utils.jl | 67 +++++ src/projectors.jl | 140 ++++++++++ src/transfer.jl | 84 ++++++ src/utils/memory.jl | 47 ++++ src/utils/utils.jl | 108 ++++++++ src/variational.jl | 97 +++++++ src/zipper.jl | 278 +++++++++++++++++++ test/attic/canonise.jl | 25 ++ test/attic/compressions.jl | 35 +++ test/attic/contractions.jl | 61 +++++ test/attic/environment.jl | 10 + test/attic/linear_algebra_ext.jl | 46 ++++ test/{ => attic}/memoization.jl | 0 test/attic/mps.jl | 45 ++++ test/attic/runtests.jl | 20 ++ test/base.jl | 187 ------------- test/canonise.jl | 51 ++++ test/compressions.jl | 100 ------- test/contractions.jl | 111 -------- test/identities.jl | 80 ------ test/projectors.jl | 53 ++++ test/runtests.jl | 7 +- test/variational.jl | 52 ++++ 64 files changed, 3840 insertions(+), 1227 deletions(-) create mode 100755 bench_mm.jl create mode 100644 benchmark/args.jl create mode 100644 benchmark/cuda_matrix_mul.jl create mode 100644 benchmark/gpu_slicing.jl create mode 100644 benchmark/memoization_cusparse.jl create mode 100644 benchmark/memoization_test.jl create mode 100644 benchmark/mulit_gpu.jl create mode 100644 benchmark/mulit_gpu2.jl create mode 100644 benchmark/psvd.jl create mode 100644 benchmark/qr.jl create mode 100644 benchmark/sparse_mul.jl create mode 100644 benchmark/sparse_mul_bench.jl create mode 100644 benchmark/svd.jl create mode 100644 benchmark/svd2.jl create mode 100644 docs/src/mpo.md delete mode 100644 src/compressions.jl delete mode 100644 src/contractions.jl create mode 100644 src/contractions/central.jl create mode 100644 src/contractions/dense.jl create mode 100644 src/contractions/diagonal.jl create mode 100644 src/contractions/site.jl create mode 100644 src/contractions/sparse.jl create mode 100644 src/contractions/virtual.jl create mode 100644 src/environment.jl create mode 100644 src/gauges.jl delete mode 100644 src/identities.jl create mode 100644 src/mps/base.jl create mode 100644 src/mps/canonise.jl create mode 100644 src/mps/dot.jl create mode 100644 src/mps/identity.jl create mode 100644 src/mps/rand.jl create mode 100644 src/mps/transpose.jl create mode 100644 src/mps/utils.jl create mode 100644 src/projectors.jl create mode 100644 src/transfer.jl create mode 100644 src/utils/memory.jl create mode 100644 src/utils/utils.jl create mode 100644 src/variational.jl create mode 100644 src/zipper.jl create mode 100644 test/attic/canonise.jl create mode 100644 test/attic/compressions.jl create mode 100644 test/attic/contractions.jl create mode 100644 test/attic/environment.jl create mode 100644 test/attic/linear_algebra_ext.jl rename test/{ => attic}/memoization.jl (100%) create mode 100644 test/attic/mps.jl create mode 100644 test/attic/runtests.jl delete mode 100644 test/base.jl create mode 100644 test/canonise.jl delete mode 100644 test/compressions.jl delete mode 100644 test/contractions.jl delete mode 100644 test/identities.jl create mode 100644 test/projectors.jl create mode 100644 test/variational.jl diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 56ad45a..4c92a49 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -4,25 +4,21 @@ on: - pull_request jobs: test: - name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - runs-on: ${{ matrix.os }} + name: Julia ${{ matrix.version }} + runs-on: [self-hosted,titan,gpu] strategy: fail-fast: false matrix: version: - - '1.7' - - '1.8' - os: - - ubuntu-latest - - macOS-latest - arch: - - x64 + - '1.9' + - '1.10' steps: - uses: actions/checkout@v2 - uses: julia-actions/setup-julia@v1 with: version: ${{ matrix.version }} - arch: ${{ matrix.arch }} + - name: Fix TransmuteDims + run: julia --project=@. --color=yes -e 'using Pkg; Pkg.add(name="TransmuteDims", rev="strided2")' - uses: julia-actions/julia-buildpkg@latest - uses: julia-actions/julia-runtest@latest env: diff --git a/Project.toml b/Project.toml index 4a57f53..8ade337 100644 --- a/Project.toml +++ b/Project.toml @@ -1,25 +1,34 @@ name = "SpinGlassTensors" uuid = "7584fc6a-5a23-4eeb-8277-827aab0146ea" -authors = [ - "Łukasz Pawela ", - "Konrad Jałowiecki ", - "Bartłomiej Gardas " - ] -version = "0.3.0" +authors = ["Anna Maria Dziubyna ", "Tomasz Śmierzchalski ", "Bartłomiej Gardas ", "Konrad Jałowiecki ", "Łukasz Pawela ", "Marek M. Rams "] +version = "1.0.0" [deps] +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" -Memoize = "c03570c3-d221-55d1-a50c-7939bbd78826" +LowRankApprox = "898213cb-b102-5a47-900c-97e73b919f73" +MKL = "33e6dc65-8f57-5167-99aa-e5a354878fb2" +Memoization = "6fafb56a-5788-4b4e-91ca-c0cea6611c73" +NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd" +SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" +TSVD = "9449cd9e-2762-5aa3-a617-5413e99d722e" TensorCast = "02d47bb6-7ce6-556a-be16-bb1710789e2b" TensorOperations = "6aa20fa7-93e2-5fca-9bc0-fbd0db3c71a2" +TransmuteDims = "24ddb15e-299a-5cc3-8414-dbddc482d9ca" +cuTENSOR = "011b41b2-24ef-40a8-b3eb-fa098493e9e1" [compat] -DocStringExtensions = "0.8" -Memoize = "0.4" +CUDA = "4.4.1" +DocStringExtensions = "0.9.3" +LowRankApprox = "0.5.5" +MKL = "0.4.2" +Memoization = "0.2.1" +SparseArrays = "1.9" TensorCast = "0.4" -TensorOperations = "3.0.1" -julia = "1.7, 1.8" +TensorOperations = "4" +cuTENSOR = "1.1.0" +julia = "1.9, 1.10" [extras] Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" diff --git a/README.md b/README.md index 9f1d9a5..7623f30 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,3 @@ [![Coverage Status](https://coveralls.io/repos/github/iitis/SpinGlassTensors.jl/badge.svg?branch=master)](https://coveralls.io/github/iitis/SpinGlassTensors.jl?branch=master) # SpinGlassTensors.jl +This works with CUDA v4.4.1. You need to manually `]add TransmuteDims#strided2` \ No newline at end of file diff --git a/bench_mm.jl b/bench_mm.jl new file mode 100755 index 0000000..cf21ba5 --- /dev/null +++ b/bench_mm.jl @@ -0,0 +1,29 @@ +using TensorCast, TensorOperations +function time_mm() + M = rand(100, 100, 100) + L = rand(100, 100) + R = rand(100, 100) + @time begin + @matmul M1[x, σ, α] := sum(β) L[x, β] * M[β, σ, α] + @matmul MM[x, σ, y] := sum(α) M1[x, σ, α] * R[α, y] + end +end + +function time_tensor() + M = rand(100, 100, 100) + L = rand(100, 100) + R = rand(100, 100) + + @time begin + @tensor M̃[x, σ, y] := L[x, β] * M[β, σ, α] * R[α, y] order = (α, β) + # @cast B[(x, σ), y] |= M̃[x, σ, y] + end +end + +println("matmul") +time_mm() +time_mm() + +println("\n tensor") +time_tensor() +time_tensor() diff --git a/benchmark/args.jl b/benchmark/args.jl new file mode 100644 index 0000000..516cd3d --- /dev/null +++ b/benchmark/args.jl @@ -0,0 +1,12 @@ +using LinearAlgebra + +function my_svd(A; kwargs...) + svd(A; kwargs...) +end + + +T = Float64 +n = 2 +A = rand(T, 2, 2) + +my_svd(A, full = true) diff --git a/benchmark/cuda_matrix_mul.jl b/benchmark/cuda_matrix_mul.jl new file mode 100644 index 0000000..165cf5d --- /dev/null +++ b/benchmark/cuda_matrix_mul.jl @@ -0,0 +1,29 @@ +using CUDA +using LinearAlgebra + +CUDA.allowscalar(false) + +nnz = 100 +Val = CUDA.rand(Float64, nnz) +Ptr = CuArray(1:nnz+1) +Ind = CuArray(rand(1:100, nnz)) + +A = CUDA.CUSPARSE.CuSparseMatrixCSR(Ptr, Ind, Val, (100, 100)) +B = CUDA.rand(Float64, 100, 100) +C = CUDA.CUSPARSE.CuSparseMatrixCSC(Ptr, Ind, Val, (100, 100)) + +A * B # no scalar indexing +CUDA.@allowscalar B * A # scalar indexing + +C * B # no scalar indexing +CUDA.@allowscalar B * C # scalar indexing + +A' * B # no scalar indexing +CUDA.@allowscalar B * A' # scalar indexing + +transpose(A) * B # no scalar indexing +CUDA.@allowscalar B * transpose(A) # scalar indexing +# problem is when we multiply dense x sparse + +D = rand(Float64, (100, 100)) +CUDA.@allowscalar D * A # scalar indexing diff --git a/benchmark/gpu_slicing.jl b/benchmark/gpu_slicing.jl new file mode 100644 index 0000000..2e6f8c3 --- /dev/null +++ b/benchmark/gpu_slicing.jl @@ -0,0 +1,15 @@ +using CUDA + +T = Float64 +n = 10000 +k = 500 + +a = CUDA.rand(T, n, n) +p = reverse(collect(1:k)) +p_d = CuArray(p) + +@time A = a[:, p] +@time @inbounds A = a[:, p] +@time A = a[:, p_d] +@time @inbounds A = a[:, p_d] +nothing diff --git a/benchmark/memoization_cusparse.jl b/benchmark/memoization_cusparse.jl new file mode 100644 index 0000000..01f51b3 --- /dev/null +++ b/benchmark/memoization_cusparse.jl @@ -0,0 +1,46 @@ +using Memoization +using LinearAlgebra +using CUDA +using BenchmarkTools + +# Functions from constactions_cuda/sparse.jl which are not exported + +@memoize Dict function aux_cusparse(::Type{R}, n::Int64) where {R<:Real} + println("entering aux_cusparse function") + CuArray(1:n+1), CUDA.ones(R, n) +end + +@memoize Dict function CUDA.CUSPARSE.CuSparseMatrixCSC( + ::Type{R}, + p::Vector{Int}, +) where {R<:Real} + println("entering cusparse") + n = length(p) + cn, co = aux_cusparse(R, n) + CUDA.CUSPARSE.CuSparseMatrixCSC(cn, CuArray(p), co, (maximum(p), n)) +end + + +function CuSparseMatrixCSC_no_memo(::Type{R}, p::Vector{Int}) where {R<:Real} + println("entering no memo") + n = length(p) + cn, co = aux_cusparse(R, n) + CUDA.CUSPARSE.CuSparseMatrixCSC(cn, CuArray(p), co, (maximum(p), n)) +end + +# test of their memoization + +p = sort(rand(1:5000, 10000000)) +p2 = sort(rand(1:5000, 10000000)) +@time A = CuSparseMatrixCSC_no_memo(Float64, p) +@time B = CuSparseMatrixCSC_no_memo(Float64, p) + +@time C = CUDA.CUSPARSE.CuSparseMatrixCSC(Float64, p) # compilation time? + +@time D = CUDA.CUSPARSE.CuSparseMatrixCSC(Float64, p) +@time E = CUDA.CUSPARSE.CuSparseMatrixCSC(Float64, p2) +@time F = CUDA.CUSPARSE.CuSparseMatrixCSC(Float64, p2) +CUDA.memory_status() +Memoization.empty_all_caches!() +CUDA.memory_status() +# clearing memoization caches doeas not free GPU memory diff --git a/benchmark/memoization_test.jl b/benchmark/memoization_test.jl new file mode 100644 index 0000000..750d6fb --- /dev/null +++ b/benchmark/memoization_test.jl @@ -0,0 +1,38 @@ +using SpinGlassTensors +using Memoization +using CUDA + + +@memoize Dict function example_cuda_array(::Type{R}, size::Int64) where {R<:Real} + CUDA.rand(R, (size, size)) +end + + +@memoize Dict function example_array(::Type{R}, size::Int64) where {R<:Real} + rand(R, size, size) +end + + +@memoize Dict function aux_cusparse(::Type{R}, n::Int64) where {R<:Real} + CuArray(1:n+1), CUDA.ones(R, n) +end + + +@memoize Dict function CUDA.CUSPARSE.CuSparseMatrixCSC( + ::Type{R}, + p::Vector{Int}, +) where {R<:Real} + n = length(p) + cn, co = aux_cusparse(R, n) + CUDA.CUSPARSE.CuSparseMatrixCSC(cn, CuArray(p), co, (maximum(p), n)) +end + + +A = example_cuda_array(Float64, 10000) +B = example_cuda_array(Float64, 1100) +C = example_array(Float64, 1000) +p = rand(1:5000, 100000000) +D = CUDA.CUSPARSE.CuSparseMatrixCSC(Float64, p) +CUDA.memory_status() +println("/n") +measure_memory(Memoization.caches) diff --git a/benchmark/mulit_gpu.jl b/benchmark/mulit_gpu.jl new file mode 100644 index 0000000..16748c7 --- /dev/null +++ b/benchmark/mulit_gpu.jl @@ -0,0 +1,25 @@ +using CUDA + +function move_to_CUDA(a::Array{T,N}) where {T,N} + buf_a = Mem.alloc(Mem.Unified, sizeof(a)) + d_a = unsafe_wrap(CuArray{T,N}, convert(CuPtr{T}, buf_a), size(a)) + finalizer(d_a) do _ + Mem.free(buf_a) + end + copyto!(d_a, a) + d_a +end + +T = Float64 +n = 100 +gpus = Int(length(devices())) + +a = rand(T, n, n, gpus) +a_d = move_to_CUDA(a) + +for (gpu, dev) ∈ enumerate(devices()) + device!(dev) + @views a_d[:, :, gpu] .= 2 .* a_d[:, :, gpu] +end + +a_d diff --git a/benchmark/mulit_gpu2.jl b/benchmark/mulit_gpu2.jl new file mode 100644 index 0000000..bbbcf8c --- /dev/null +++ b/benchmark/mulit_gpu2.jl @@ -0,0 +1,10 @@ +using CUDA + +T = Float64 +n = 100 +gpus = Int(length(devices())) + +a = rand(T, n, n, gpus) +a_d = cu(a, unified = true) + +a_d diff --git a/benchmark/psvd.jl b/benchmark/psvd.jl new file mode 100644 index 0000000..2f30108 --- /dev/null +++ b/benchmark/psvd.jl @@ -0,0 +1,86 @@ +using LinearAlgebra, MKL +using TensorOperations +using TensorCast +using TSVD +using LowRankApprox +using RandomizedLinAlg +using FameSVD + +N = 100 +cut = 8 + +mat = rand(100, 100); +U, S, V = svd(mat); +S = exp.(collect(0:N-1) * log(4 / 5)); + +mat = U * Diagonal(S) * V'; +U, S, V = svd(mat); + +U, S, V = U[:, 1:cut], S[1:cut], V[:, 1:cut] +mat1 = U * Diagonal(S) * V' +println(S[1:cut]) +println(norm(mat - mat1)) + +Up, Sp, Vp = psvd(mat, rank = 2 * cut) + +mat2 = Up[:, 1:cut] * Diagonal(Sp[1:cut]) * Vp[:, 1:cut]' + +println(Sp[1:cut]) +println(Sp[1:cut] - S[1:cut]) +println(norm(mat - mat2)) + +# Vp = V + +C = mat * Vp +println(size(C)) +Ut, _ = qr(C) +Ut = Ut[:, 1:cut] +println(size(Ut)) +C = Ut' * mat +Vp, _ = qr(C') +Vp = Vp[:, 1:cut] + + + +C = mat * Vp +Uf, Sf, Vf = svd(C); +Uf, Sf, Vf = Uf[:, 1:cut], Sf[1:cut], Vf[:, 1:cut] +mat3 = Uf * Diagonal(Sf) * Vf' * Vp' +println(Sf - S[1:cut]) +println(norm(mat - mat3)) + +nothing + + +iter = 5 +Up, Sp, Vp = [], [], [] +for i = 1:iter + Utemp, Stemp, Vtemp = psvd(mat, rank = 2 * cut) + push!(Up, Utemp) + push!(Sp, Stemp) + push!(Vp, Vtemp) +end + +Ups = hcat(Up...) +Vps = hcat(Vp...) +Sps = vcat(Sp...) / iter +println(size(Ups), " ", size(Vps), " ", size(Sps)) +println(size(Up[1]), " ", size(Vp[1]), " ", size(Sp[1])) + +Uq, Ur = qr(Ups) +Vq, Vr = qr(Vps) + +Ut, St, Vt = svd(Ur * Diagonal(Sps) * Vr') + +U2 = Uq * Ut[:, 1:cut] +V2 = Vq * Vt[:, 1:cut] +S2 = St[1:cut] +println(St) +println(S2) + +mat4 = U2 * Diagonal(S2) * V2' + + +println(norm(mat1 - mat2)) +println(norm(mat1 - mat3)) +println(norm(mat1 - mat4)) diff --git a/benchmark/qr.jl b/benchmark/qr.jl new file mode 100644 index 0000000..62b34c9 --- /dev/null +++ b/benchmark/qr.jl @@ -0,0 +1,18 @@ +using LinearAlgebra +using CUDA + + +T = Float64 +n, m = 10000, 10000 + +A = rand(T, n, m) +Ad = CuArray(A) + +@time q, r = qr(A); +@time qd, rd = qr(Ad); + +println(size(q), " ", size(r)) +println(size(qd), " ", size(rd)) + +@assert A ≈ q * r +@assert Ad ≈ qd * rd diff --git a/benchmark/sparse_mul.jl b/benchmark/sparse_mul.jl new file mode 100644 index 0000000..dec7c10 --- /dev/null +++ b/benchmark/sparse_mul.jl @@ -0,0 +1,24 @@ +using CUDA +using LinearAlgebra + +function CUDA.:*(Md::DenseCuMatrix{T}, Mcsc::CUSPARSE.CuSparseMatrixCSC{T}) where {T} + ret = CUDA.zeros(T, size(Mcsc, 1), size(Md, 2)) + CUSPARSE.mm!('N', 'N', one(T), Mcsc, Md, zero(T), ret, 'O') + ret +end + +T = Float64 +nnz = 100 +Val = CUDA.rand(T, nnz) +Ptr = CuArray(1:nnz+1) +Ind = CuArray(rand(1:nnz, nnz)) + +Mcsr = CUSPARSE.CuSparseMatrixCSR(Ptr, Ind, Val, (nnz, nnz)) +Md = CUDA.rand(T, nnz, nnz) +Mcsc = CUSPARSE.CuSparseMatrixCSC(Ptr, Ind, Val, (nnz, nnz)) + +X = Mcsr * Md +Y = Md * Mcsc + +@assert X ≈ CuArray(Mcsr) * Md +@assert Y ≈ Md * CuArray(Mcsc) diff --git a/benchmark/sparse_mul_bench.jl b/benchmark/sparse_mul_bench.jl new file mode 100644 index 0000000..79deb4b --- /dev/null +++ b/benchmark/sparse_mul_bench.jl @@ -0,0 +1,31 @@ +using CUDA +using LinearAlgebra +using SparseArrays + +function dense_x_CSC(Md::DenseCuMatrix{T}, Mcsc::CUSPARSE.CuSparseMatrixCSC{T}) where {T} + ret = CUDA.zeros(T, size(Md, 1), size(Mcsc, 2)) + CUSPARSE.mm!('N', 'N', one(T), Mcsc, Md, zero(T), ret, 'O') + ret +end + +T = Float64 +nnz = 2^14 +Val = CUDA.rand(T, nnz) +Ptr = CuArray(1:nnz+1) +Ind = CuArray(rand(1:nnz, nnz)) + +Mcsr = CUSPARSE.CuSparseMatrixCSR(Ptr, Ind, Val, (nnz, nnz)) +Md = CUDA.rand(T, nnz, nnz) +Mcsc = CUSPARSE.CuSparseMatrixCSC(Ptr, Ind, Val, (nnz, nnz)) + +@time CUDA.@sync X = Mcsr * Md +#@time CUDA.@sync Y = dense_x_CSC(Md, Mcsc) +@time CUDA.@sync Z = (Mcsc' * Md')' + +println() + +@time CUDA.@sync X = Mcsr * Md +#@time CUDA.@sync Y = dense_x_CSC(Md, Mcsc) +@time CUDA.@sync Z = (Mcsc' * Md')' + +nothing diff --git a/benchmark/svd.jl b/benchmark/svd.jl new file mode 100644 index 0000000..52cc593 --- /dev/null +++ b/benchmark/svd.jl @@ -0,0 +1,73 @@ +using LinearAlgebra, MKL +using TensorOperations +using TensorCast +using TSVD +using LowRankApprox +using RandomizedLinAlg +using FameSVD + + +# C = A * B +struct MyTensor{T<:Number} + A::Array{T,2} + B::Array{T,2} +end + +Base.Array(ten::MyTensor) = ten.A * ten.B + +# this is for tsvd to work +Base.eltype(ten::MyTensor{T}) where {T} = T +Base.size(ten::MyTensor) = (size(ten.A, 1), size(ten.B, 2)) +Base.size(ten::MyTensor, n::Int) = size(ten)[n] +Base.adjoint(ten::MyTensor{T}) where {T} = MyTensor{T}(adjoint(ten.B), adjoint(ten.A)) +Base.:(*)(ten::MyTensor{T}, v::Vector{T}) where {T} = (ten.A * (ten.B * v)) + +# this is for psvd to work +LinearAlgebra.ishermitian(ten::MyTensor) = ishermitian(ten.A) && ishermitian(ten.B) +LinearAlgebra.mul!(y, ten::MyTensor, x) = mul!(y, ten.B, ten.A * x) + +n = 2^12 +cut = 2^6 +T = Float64 + +ten = MyTensor(rand(T, n, n), rand(T, n, n)) + +println("MyTensor:") +println("tsvd:") +@time U, Σ, V = tsvd(ten, cut) + +println("psvd:") +@time U, Σ, V = psvd(ten, rank = cut) + +println("Array:") +println("tsvd:") +@time begin + C = Array(ten) + U, Σ, V = tsvd(C, cut) +end + +println("svd:") +@time begin + C = Array(ten) + U, Σ, V = svd(C) +end + +println("psvd:") +@time begin + C = Array(ten) + U, Σ, V = psvd(C, rank = cut) +end + +println("rsvd:") +@time begin + C = Array(ten) + U, Σ, V = rsvd(C, cut, 0) +end + +println("fsvd:") +@time begin + C = Array(ten) + U, Σ, V = fsvd(C) +end + +nothing diff --git a/benchmark/svd2.jl b/benchmark/svd2.jl new file mode 100644 index 0000000..84b0e20 --- /dev/null +++ b/benchmark/svd2.jl @@ -0,0 +1,102 @@ +using LinearAlgebra, MKL +using TensorOperations +using TensorCast +using TSVD +using LowRankApprox +using RandomizedLinAlg +using FameSVD + + +# C = A * B +struct MyTensor{T<:Number} + A::Array{T,2} + B::Array{T,2} +end + +struct AMyTensor{T<:Number} + A::Array{T,2} + B::Array{T,2} +end + + +Base.Array(ten::MyTensor) = kron(ten.A, ten.B) + +# this is for tsvd to work +Base.eltype(ten::MyTensor{T}) where {T} = T +Base.size(ten::MyTensor) = + (size(ten.A, 1) * size(ten.B, 1), size(ten.A, 2) * size(ten.B, 2)) +Base.size(ten::MyTensor, n::Int) = size(ten)[n] +# Base.adjoint(ten::MyTensor{T}) where T = MyTensor{T}(adjoint(ten.A), adjoint(ten.B)) + +# Base.:(*)(ten::MyTensor{T}, v::Vector{T}) where T = (kron(ten.A, ten.B) * v) + +Base.adjoint(ten::MyTensor{T}) where {T} = AMyTensor{T}(ten.A, ten.B) + + +function Base.:(*)(ten::MyTensor{T}, v::Vector{T}) where {T} + println("M") + vv = reshape(v, size(ten.A, 2), size(ten.B, 2)) + println(size(vv)) + @tensor x[x1, y1] := ten.A[x1, x2] * ten.B[y1, y2] * vv[x2, y2] + reshape(x, size(ten.A, 1) * size(ten.B, 1)) +end + +function Base.:(*)(ten::AMyTensor{T}, v::Vector{T}) where {T} + println("A") + vv = reshape(v, size(ten.A, 1), size(ten.B, 1)) + println(size(vv)) + + @tensor x[x1, y1] := ten.A[x2, x1] * ten.B[y2, y1] * vv[x2, y2] + reshape(x, size(ten.A, 2) * size(ten.B, 2)) +end + + +# this is for psvd to work +LinearAlgebra.ishermitian(ten::MyTensor) = false + + +function LinearAlgebra.mul!(y, ten::MyTensor, v) + println("K") + vv = reshape(v, size(ten.A, 2), size(ten.B, 2), :) + println(size(vv)) + @tensor x[x1, y1, z1] := ten.A[x1, x2] * ten.B[y1, y2] * vv[x2, y2, z1] + y[:, :] = reshape(x, size(ten.A, 1) * size(ten.B, 1), :) +end + +function LinearAlgebra.mul!(y, ten::AMyTensor, v) + println("L") + vv = reshape(v, size(ten.A, 1), size(ten.B, 1), :) + println(size(vv)) + @tensor x[x1, y1, z1] := ten.A[x2, x1] * ten.B[y2, y1] * vv[x2, y2, z1] + y[:, :] = reshape(x, size(ten.A, 2) * size(ten.B, 2), :) +end + + +n = 2^2 +cut = 2^1 +T = Float64 + +ten = MyTensor(rand(T, n + 1, n), rand(T, n + 2, n - 1)) + +println("tsvd:") +@time U, Σ1, V = tsvd(ten, cut) + +println("psvd:") +@time U, Σ2, V = psvd(ten, rank = cut) + +println("svd:") +@time begin + C = Array(ten) + U, Σ3, V = svd(C) +end + +# println("psvd:") +# @time begin +# C = Array(ten) +# U, Σ4, V = psvd(C, rank=cut) +# end + +println(Σ1) +println(Σ2) +println(Σ3[1:cut]) +# println(Σ4) diff --git a/docs/make.jl b/docs/make.jl index 3cc0eed..d417f52 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -1,20 +1,20 @@ using Documenter, SpinGlassTensors _pages = [ - "Introduction" => "index.md", - "API Reference" => "api.md" + "User guide" => "index.md", + "Matrix Product States and Matrix Product Operations" => "mpo.md", + "API Reference" => "api.md", ] # ============================ -format = Documenter.HTML(edit_link = "master", - prettyurls = get(ENV, "CI", nothing) == "true", -) +format = + Documenter.HTML(edit_link = "master", prettyurls = get(ENV, "CI", nothing) == "true") # format = Documenter.LaTeX(platform="none") makedocs( - sitename="SpinGlassTensors.jl", + sitename = "SpinGlassTensors.jl", modules = [SpinGlassTensors], pages = _pages, - format = format - ) \ No newline at end of file + format = format, +) diff --git a/docs/src/api.md b/docs/src/api.md index 120ec4f..48c7ce6 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -6,29 +6,16 @@ CurrentModule = SpinGlassTensors ``` ## Additional methods for `Base` and `LinearAlgebra` ```@docs -dot -norm -randn -rank +left_nbrs_site +right_nbrs_site +project_ket_on_bra ``` - ## MPS -```@docs -MPS -is_left_normalized -is_right_normalized -physical_dim -verify_bonds -verify_physical_dims -``` -## Compresions and Contractions +## Compresions and Contractions ```@docs -canonise! -compress! -left_env -right_env -truncate! - +update_env_left +update_env_right +update_reduced_env_right ``` \ No newline at end of file diff --git a/docs/src/index.md b/docs/src/index.md index 27ca46e..2452d07 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -3,4 +3,6 @@ Part of [SpinGlassPEPS](https://github.com/euro-hpc-pl/SpinGlassPEPS.jl) package. It constitutes the basis for the preparation of tensors and operations on them. !!! info - We don't expect the user to interact with this package, as it is more of a "back-end" type. Nevertheless, we provide API references should the need arise. \ No newline at end of file + We don't expect the user to interact with this package, as it is more of a "back-end" type. Nevertheless, we provide API references should the need arise. + +This section of the package encompasses supplementary functionalities that serve as support for the main solver. It includes the creation and manipulation of tensors, with a particular emphasis on implementing Matrix Product States (MPS) and Matrix Product Operators (MPO). \ No newline at end of file diff --git a/docs/src/mpo.md b/docs/src/mpo.md new file mode 100644 index 0000000..66211e4 --- /dev/null +++ b/docs/src/mpo.md @@ -0,0 +1,5 @@ +# Matrix Product States and Matrix Product Operations + +```@docs +MpoTensor +``` \ No newline at end of file diff --git a/src/SpinGlassTensors.jl b/src/SpinGlassTensors.jl index 08b0ef1..f8d54ad 100644 --- a/src/SpinGlassTensors.jl +++ b/src/SpinGlassTensors.jl @@ -1,13 +1,43 @@ module SpinGlassTensors -using LinearAlgebra +using LinearAlgebra, MKL using TensorOperations, TensorCast -using Memoize - +using LowRankApprox, TSVD +using CUDA, CUDA.CUSPARSE +using cuTENSOR +using NNlib +using Memoization +using SparseArrays using DocStringExtensions +using Base.Cartesian + +import Base.Prehashed +# using SpinGlassNetworks + +CUDA.allowscalar(false) +include("projectors.jl") include("base.jl") include("linear_algebra_ext.jl") -include("compressions.jl") -include("identities.jl") -include("contractions.jl") +include("utils/utils.jl") +include("./mps/base.jl") +include("./mps/transpose.jl") +include("./mps/dot.jl") +include("./mps/identity.jl") +include("./mps/utils.jl") +include("./mps/rand.jl") +include("transfer.jl") +include("environment.jl") +include("utils/memory.jl") +include("./mps/canonise.jl") +include("variational.jl") +include("zipper.jl") +include("gauges.jl") +include("contractions/sparse.jl") +include("contractions/dense.jl") +include("contractions/central.jl") +include("contractions/diagonal.jl") +include("contractions/site.jl") +include("contractions/virtual.jl") + + end # module diff --git a/src/base.jl b/src/base.jl index c8d789e..8a0dc39 100644 --- a/src/base.jl +++ b/src/base.jl @@ -1,237 +1,131 @@ -export bond_dimension, is_left_normalized, is_right_normalized -export verify_bonds, verify_physical_dims, tensor, rank, physical_dim -export State, dropindices - -const State = Union{Vector,NTuple} - -abstract type AbstractTensorNetwork{T} end - -for (T, N) ∈ ((:PEPSRow, 5), (:MPO, 4), (:MPS, 3)) - AT = Symbol(:Abstract, T) - @eval begin - export $AT - export $T - - abstract type $AT{T} <: AbstractTensorNetwork{T} end - - struct $T{T<:Number} <: $AT{T} - tensors::Vector{Array{T,$N}} - end - - # consturctors - $T(::Type{T}, L::Int) where {T} = $T(Vector{Array{T,$N}}(undef, L)) - $T(L::Int) = $T(Float64, L) - - @inline Base.setindex!(a::$AT, A::AbstractArray{<:Number,$N}, i::Int) = - a.tensors[i] = A - @inline bond_dimension(a::$AT) = maximum(size.(a.tensors, $N)) - Base.hash(a::$T, h::UInt) = hash(a.tensors, h) - @inline Base.:(==)(a::$T, b::$T) = a.tensors == b.tensors - @inline Base.:(≈)(a::$T, b::$T) = a.tensors ≈ b.tensors - Base.copy(a::$T) = $T(copy(a.tensors)) - - @inline Base.eltype(::$AT{T}) where {T} = T +# base.jl: This file defines basic tensor structures to be used with SpinGlassEngine + +export Tensor, + SiteTensor, + VirtualTensor, + DiagonalTensor, + CentralTensor, + CentralOrDiagonal, + dense_central + +abstract type AbstractSparseTensor{T,N} end + +mutable struct SiteTensor{T<:Real,N} <: AbstractSparseTensor{T,N} + lp::PoolOfProjectors + loc_exp::AbstractVector{T} + projs::NTuple{4,Int} # pl, pt, pr, pb + dims::Dims{N} + + function SiteTensor(lp::PoolOfProjectors, loc_exp, projs::NTuple{4,Vector{Int}}) + T = eltype(loc_exp) + ks = Tuple(add_projector!(lp, p) for p ∈ projs) + dims = size.(Ref(lp), ks) + new{T,4}(lp, loc_exp, ks, dims) end -end - - -@inline Base.getindex(a::AbstractTensorNetwork, i) = getindex(a.tensors, i) -@inline Base.iterate(a::AbstractTensorNetwork) = iterate(a.tensors) -@inline Base.iterate(a::AbstractTensorNetwork, state) = iterate(a.tensors, state) -@inline Base.lastindex(a::AbstractTensorNetwork) = lastindex(a.tensors) -@inline Base.length(a::AbstractTensorNetwork) = length(a.tensors) -@inline Base.size(a::AbstractTensorNetwork) = (length(a.tensors),) -@inline Base.eachindex(a::AbstractTensorNetwork) = eachindex(a.tensors) - -""" - LinearAlgebra.rank(ψ::AbstractMPS) - -Returns rank of MPS tensors. -""" -@inline LinearAlgebra.rank(ψ::AbstractMPS) = Tuple(size(A, 2) for A ∈ ψ) - -""" - physical_dim(ψ::AbstractMPS, i::Int) - -Returns physical dimension of MPS tensors at given site i. -""" -@inline physical_dim(ψ::AbstractMPS, i::Int) = size(ψ[i], 2) - - - -@inline MPS(A::AbstractArray) = MPS(A, :right) - - - -""" - MPS(A::AbstractArray, s::Symbol, Dcut::Int = typemax(Int)) - -Construct a matrix product state (MPS) using the provided tensor array `A`. - -## Arguments - -- `A::AbstractArray`: The tensor array that defines the MPS. -- `s::Symbol`: The direction to canonically transform the MPS. Must be either `:left` or `:right`. -- `Dcut::Int`: The maximum bond dimension allowed during the truncation step. -## Returns - -- `ψ::AbstractMPS`: The constructed MPS. - -## Details - -This function constructs a matrix product state (MPS) using the provided tensor array `A`, -and then canonically transforms it in the direction specified by the `s` argument. If `s` is `:right`, -the MPS is right-canonized, while if `s` is `:left`, the MPS is left-canonized. -The `Dcut` argument determines the maximum bond dimension allowed during the truncation step. -If neither `Dcut` nor `s` is specified, it will construct right-canonized MPS with default Dcut value. - -## Example - -```@repl -A = rand(2, 3, 2) -ψ = MPS(A, :left, 2); -typeof(ψ) -length(ψ) -bond_dimension(ψ) -``` -""" -@inline function MPS(A::AbstractArray, s::Symbol, Dcut::Int = typemax(Int)) - @assert s ∈ (:left, :right) - if s == :right - ψ = _right_sweep(A) - _left_sweep!(ψ, Dcut) - else - ψ = _left_sweep(A) - _right_sweep!(ψ, Dcut) + function SiteTensor( + lp::PoolOfProjectors, + loc_exp, + projs::NTuple{4,Int}, + dims::NTuple{4,Int}, + ) + T = eltype(loc_exp) + new{T,4}(lp, loc_exp, projs, dims) end - ψ -end - -@inline dropindices(ψ::AbstractMPS, i::Int = 2) = (dropdims(A, dims = i) for A ∈ ψ) - - -""" - MPS(states::Vector{Vector{T}}) where {T<:Number} - -Create a matrix product state (MPS) object from a vector of states. -""" -function MPS(states::Vector{Vector{T}}) where {T<:Number} - state_arrays = [reshape(copy(v), (1, length(v), 1)) for v ∈ states] - MPS(state_arrays) -end - -function (::Type{T})(ψ::AbstractMPS) where {T<:AbstractMPO} - _verify_square(ψ) - T([@cast W[x, σ, y, η] |= A[x, (σ, η), y] (σ ∈ 1:isqrt(size(A, 2))) for A in ψ]) end -function (::Type{T})(O::AbstractMPO) where {T<:AbstractMPS} - T([@cast A[x, (σ, η), y] := W[x, σ, y, η] for W in O]) -end -""" - Base.randn(::Type{MPS{T}}, D::Int, rank::Union{Vector,NTuple}) where {T} - -Create random MPS.The argument `D` specifies the physical dimension of the MPS -(i.e. the dimension of the vectors at each site), `rank` specifies rank of each site. -""" -function Base.randn(::Type{MPS{T}}, D::Int, rank::Union{Vector,NTuple}) where {T} - MPS([ - randn(T, 1, first(rank), D), - randn.(T, D, rank[begin+1:end-1], D)..., - rand(T, D, last(rank), 1), - ]) +function mpo_transpose(ten::SiteTensor) + perm = [1, 4, 3, 2] + SiteTensor(ten.lp, ten.loc_exp, ten.projs[perm], ten.dims[perm]) end -function Base.randn(::Type{MPS{T}}, L::Int, D::Int, d::Int) where {T} - MPS([randn(T, 1, d, D), (randn(T, D, d, D) for _ = 2:L-1)..., randn(T, D, d, 1)]) +mutable struct CentralTensor{T<:Real,N} <: AbstractSparseTensor{T,N} + e11::AbstractMatrix{T} + e12::AbstractMatrix{T} + e21::AbstractMatrix{T} + e22::AbstractMatrix{T} + dims::Dims{N} + + function CentralTensor(e11, e12, e21, e22) + s11, s12, s21, s22 = size.((e11, e12, e21, e22)) + @assert s11[1] == s12[1] && s21[1] == s22[1] && s11[2] == s21[2] && s12[2] == s22[2] + dims = (s11[1] * s21[1], s11[2] * s12[2]) + T = promote_type(eltype.((e11, e12, e21, e22))...) + new{T,2}(e11, e12, e21, e22, dims) + end end +Base.adjoint(M::CentralTensor{R,2}) where {R<:Real} = + CentralTensor(M.e11', M.e21', M.e12', M.e22') -Base.randn(::Type{MPS}, args...) = randn(MPS{Float64}, args...) - -function Base.randn(::Type{MPO{T}}, L::Int, D::Int, d::Int) where {T} - MPO(randn(MPS{T}, L, D, d^2)) -end +mpo_transpose(ten::CentralTensor) = + CentralTensor(permutedims.((ten.e11, ten.e21, ten.e12, ten.e22), Ref((2, 1)))...) -function Base.randn(::Type{MPO{T}}, D::Int, rank::Union{Vector,NTuple}) where {T} - MPO(randn(MPS{T}, D, rank .^ 2)) -end +const MatOrCentral{T,N} = Union{AbstractMatrix{T},CentralTensor{T,N}} -""" - is_left_normalized(ψ::MPS) - -Check whether MPS is left normalized. -""" -is_left_normalized(ψ::MPS) = all( - I(size(A, 3)) ≈ @tensor Id[x, y] := conj(A[α, σ, x]) * A[α, σ, y] order = (α, σ) for - A ∈ ψ -) - -""" - is_right_normalized(ϕ::MPS) - -Check whether MPS is right normalized. -""" -is_right_normalized(ϕ::MPS) = all( - I(size(B, 1)) ≈ @tensor Id[x, y] := B[x, σ, α] * conj(B[y, σ, α]) order = (α, σ) for - B in ϕ -) - -function _verify_square(ψ::AbstractMPS) - dims = physical_dim.(Ref(ψ), eachindex(ψ)) - @assert isqrt.(dims) .^ 2 == dims "Incorrect MPS dimensions" +# TODO: to be removed eventually +function dense_central(ten::CentralTensor) + @cast V[(u1, u2), (d1, d2)] := + ten.e11[u1, d1] * ten.e21[u2, d1] * ten.e12[u1, d2] * ten.e22[u2, d2] + V ./ maximum(V) end +dense_central(ten::AbstractArray) = ten -""" - verify_physical_dims(ψ::AbstractMPS, dims::NTuple) +mutable struct DiagonalTensor{T<:Real,N} <: AbstractSparseTensor{T,N} + e1::MatOrCentral{T,N} + e2::MatOrCentral{T,N} + dims::Dims{N} -Check whether MPS has correct physical dimension at given site. -""" -function verify_physical_dims(ψ::AbstractMPS, dims::NTuple) - for i ∈ eachindex(ψ) - @assert physical_dim(ψ, i) == dims[i] "Incorrect physical dim at site $(i)." + function DiagonalTensor(e1, e2) + dims = (size(e1, 1) * size(e2, 1), size(e1, 2) * size(e2, 2)) + T = promote_type(eltype.((e1, e2))...) + new{T,2}(e1, e2, dims) end end -""" - verify_bonds(ψ::AbstractMPS) - -Check whether MPS has correct sizes. -""" -function verify_bonds(ψ::AbstractMPS) - L = length(ψ) - - @assert size(ψ[1], 1) == 1 "Incorrect size on the left boundary." - @assert size(ψ[end], 3) == 1 "Incorrect size on the right boundary." +mpo_transpose(ten::DiagonalTensor) = DiagonalTensor(mpo_transpose.((ten.e2, ten.e1))...) + +mutable struct VirtualTensor{T<:Real,N} <: AbstractSparseTensor{T,N} + lp::PoolOfProjectors + con::MatOrCentral{T,2} + projs::NTuple{6,Int} # == (p_lb, p_l, p_lt, p_rb, p_r, p_rt) + dims::Dims{N} + + function VirtualTensor(lp::PoolOfProjectors, con, projs::NTuple{6,Vector{Int}}) + T = eltype(con) + ks = Tuple(add_projector!(lp, p) for p ∈ projs) + dims = ( + length(lp, ks[2]), + size(lp, ks[3]) * size(lp, ks[6]), + length(lp, ks[5]), + size(lp, ks[1]) * size(lp, ks[4]), + ) + new{T,4}(lp, con, ks, dims) + end - for i ∈ 1:L-1 - @assert size(ψ[i], 3) == size(ψ[i+1], 1) "Incorrect link between $i and $(i+1)." + function VirtualTensor( + lp::PoolOfProjectors, + con, + projs::NTuple{6,Int}, + dims::NTuple{4,Int}, + ) + T = eltype(con) + new{T,4}(lp, con, projs, dims) end end -function Base.show(io::IO, ψ::AbstractTensorNetwork) - L = length(ψ) - dims = [size(A) for A ∈ ψ] - - println(io, "Matrix product state on $L sites:") - _show_sizes(io, dims) - println(io, " ") -end +mpo_transpose(ten::VirtualTensor) = + VirtualTensor(ten.lp, ten.con, ten.projs[[3, 2, 1, 6, 5, 4]], ten.dims[[1, 4, 3, 2]]) +mpo_transpose(ten::AbstractArray{T,4}) where {T} = permutedims(ten, (1, 4, 3, 2)) +mpo_transpose(ten::AbstractArray{T,2}) where {T} = permutedims(ten, (2, 1)) +const SparseTensor{T,N} = + Union{SiteTensor{T,N},VirtualTensor{T,N},CentralTensor{T,N},DiagonalTensor{T,N}} +const Tensor{T,N} = Union{AbstractArray{T,N},SparseTensor{T,N}} +const CentralOrDiagonal{T,N} = Union{CentralTensor{T,N},DiagonalTensor{T,N}} -function _show_sizes(io::IO, dims::Vector, sep::String = " x ", Lcut::Int = 8) - L = length(dims) - if L > Lcut - for i ∈ 1:Lcut - print(io, " ", dims[i], sep) - end - print(io, " ... × ", dims[end]) - else - for i ∈ 1:(L-1) - print(io, dims[i], sep) - end - println(io, dims[end]) - end -end +Base.eltype(ten::Tensor{T,N}) where {T,N} = T +Base.ndims(ten::Tensor{T,N}) where {T,N} = N +Base.size(ten::SparseTensor, n::Int) = ten.dims[n] +Base.size(ten::SparseTensor) = ten.dims diff --git a/src/compressions.jl b/src/compressions.jl deleted file mode 100644 index 8a69d42..0000000 --- a/src/compressions.jl +++ /dev/null @@ -1,240 +0,0 @@ -export canonise!, truncate!, compress!, compress - - -# This is for backwards compatibility -function compress( - ϕ::AbstractMPS, - Dcut::Int, - tol::Number = 1E-8, - max_sweeps::Int = 4, - args..., -) - ψ = copy(ϕ) - compress!(ψ, Dcut, tol, max_sweeps, args...) - ψ -end - -""" - compress!( - ϕ::AbstractMPS, - Dcut::Int, - tol::Number = 1E-8, - max_sweeps::Int = 4, - args..., - ) - -# Arguments - -- `ϕ::AbstractMPS`: the input MPS to be compressed -- `Dcut::Int`: the maximum bond dimension of the compressed MPS -- `tol::Number = 1E-8`: the tolerance threshold for convergence of the iterative compression process (default value: 1E-8) -- `max_sweeps::Int = 4`: the maximum number of iterations allowed for the compression process (default value: 4) - -# Output -- `overlap`: The overlap of the compressed MPS with the original input MPS. -""" -function compress!( - ϕ::AbstractMPS, - Dcut::Int, - tol::Number = 1E-8, - max_sweeps::Int = 4, - args..., -) - # right canonise ϕ - _left_sweep!(ϕ, args...) - - # Initial guess - truncated ϕ - ψ = copy(ϕ) - _right_sweep!(ϕ, Dcut, args...) - - # Create environment - env = left_env(ϕ, ψ) - - # Variational compression - overlap = Inf - overlap_before = -Inf - - @info "Compressing state down to" Dcut - - for sweep ∈ 1:max_sweeps - _left_sweep_var!!(ϕ, env, ψ, args...) - overlap = _right_sweep_var!!(ϕ, env, ψ, args...) - - diff = abs(overlap_before - abs(overlap)) - @info "Convergence" diff - - if diff < tol - @info "Finished in $sweep sweeps of $(max_sweeps)." - return overlap - else - overlap_before = overlap - end - end - overlap -end - -""" - truncate!(ψ::AbstractMPS, s::Symbol, Dcut::Int = typemax(Int), args...) - -Truncate the bond dimension of a matrix product state (MPS) in either -the left or right canonical form, depending on the value of the `s` input argument. - -# Arguments - -- `ψ::AbstractMPS`: the input MPS to be truncated -- `s::Symbol`: determines whether to truncate the MPS in the left or right canonical form. Must be one of the following values: - - `:left`: truncate in left canonical form - - `:right`: truncate in right canonical form -- `Dcut::Int`: the maximum bond dimension to which the MPS should be truncated. - -""" -function truncate!(ψ::AbstractMPS, s::Symbol, Dcut::Int = typemax(Int), args...) - @assert s ∈ (:left, :right) - if s == :right - _right_sweep!(ψ, args...) - _left_sweep!(ψ, Dcut, args...) - else - _left_sweep!(ψ, args...) - _right_sweep!(ψ, Dcut, args...) - end -end - -""" - canonise!(ψ::AbstractMPS, s::Symbol) - -canonizes a matrix product state (MPS) in either the left or right canonical form, -depending on the value of the `s` input argument. Must be one of the following values: -- `:left`: canonize in left canonical form -- `:right`: canonize in right canonical form - -""" -canonise!(ψ::AbstractMPS, s::Symbol) = canonise!(ψ, Val(s)) - -canonise!(ψ::AbstractMPS, ::Val{:right}) = _left_sweep!(ψ, typemax(Int)) -canonise!(ψ::AbstractMPS, ::Val{:left}) = _right_sweep!(ψ, typemax(Int)) - - -function _right_sweep!(ψ::AbstractMPS, Dcut::Int = typemax(Int), args...) - R = ones(eltype(ψ), 1, 1) - for (i, A) ∈ enumerate(ψ) - @matmul M̃[(x, σ), y] := sum(α) R[x, α] * A[α, σ, y] - Q, R = qr_fact(M̃, Dcut, args...) - R = R ./ maximum(abs.(R)) - @cast A[x, σ, y] := Q[(x, σ), y] (σ ∈ 1:size(A, 2)) - ψ[i] = A - end -end - - -function _left_sweep!(ψ::AbstractMPS, Dcut::Int = typemax(Int), args...) - R = ones(eltype(ψ), 1, 1) - for i ∈ length(ψ):-1:1 - B = ψ[i] - @matmul M̃[x, (σ, y)] := sum(α) B[x, σ, α] * R[α, y] - R, Q = rq_fact(M̃, Dcut, args...) - R = R ./ maximum(abs.(R)) - @cast B[x, σ, y] := Q[x, (σ, y)] (σ ∈ 1:size(B, 2)) - ψ[i] = B - end -end - - -function _left_sweep_var!!( - ϕ::AbstractMPS, - env::Vector{<:AbstractMatrix}, - ψ::AbstractMPS, - args..., -) - env[end] = ones(eltype(ϕ), 1, 1) - - for i ∈ length(ψ):-1:1 - L, R = env[i], env[i+1] - - # optimize site - M = ψ[i] - @tensor MM[x, σ, α] := L[x, β] * M[β, σ, α] - @matmul MM[x, (σ, y)] := sum(α) MM[x, σ, α] * R[α, y] - - _, Q = rq_fact(MM, args...) - @cast B[x, σ, y] := Q[x, (σ, y)] (σ ∈ 1:size(M, 2)) - - # update ϕ and right environment - ϕ[i] = B - A = ψ[i] - - @tensor RR[x, y] := A[x, σ, α] * R[α, β] * conj(B)[y, σ, β] order = (β, α, σ) - env[i] = RR - end - env[1][1] -end - - -function _right_sweep_var!!( - ϕ::AbstractMPS, - env::Vector{<:AbstractMatrix}, - ψ::AbstractMPS, - args..., -) - env[1] = ones(eltype(ϕ), 1, 1) - - for (i, M) ∈ enumerate(ψ) - L, R = env[i], env[i+1] - - # optimize site - @tensor M̃[x, σ, α] := L[x, β] * M[β, σ, α] - @matmul B[(x, σ), y] := sum(α) M̃[x, σ, α] * R[α, y] - - Q, _ = qr_fact(B, args...) - @cast A[x, σ, y] := Q[(x, σ), y] (σ ∈ 1:size(M, 2)) - - # update ϕ and left environment - ϕ[i] = A - B = ψ[i] - - @tensor LL[x, y] := conj(A[β, σ, x]) * L[β, α] * B[α, σ, y] order = (α, β, σ) - env[i+1] = LL - end - env[end][1] -end - - -function _right_sweep( - A::AbstractArray, - Dcut::Int = typemax(Int), - args..., -) - rank = ndims(A) - ψ = MPS(eltype(A), rank) - R = reshape(copy(A), (1, length(A))) - - for i ∈ 1:rank - d = size(A, i) - @cast M[(x, σ), y] := R[x, (σ, y)] (σ ∈ 1:d) - Q, R = qr_fact(M, Dcut, args...) - R = R ./ maximum(abs.(R)) - @cast B[x, σ, y] := Q[(x, σ), y] (σ ∈ 1:d) - ψ[i] = B - end - ψ -end - - -function _left_sweep( - A::AbstractArray, - Dcut::Int = typemax(Int), - args..., -) - rank = ndims(A) - ψ = MPS(eltype(A), rank) - R = reshape(copy(A), (length(A), 1)) - - for i ∈ rank:-1:1 - d = size(A, i) - @cast M[x, (σ, y)] := R[(x, σ), y] (σ ∈ 1:d) - R, Q = rq_fact(M, Dcut, args...) - R = R ./ maximum(abs.(R)) - @cast B[x, σ, y] := Q[x, (σ, y)] (σ ∈ 1:d) - ψ[i] = B - end - ψ -end diff --git a/src/contractions.jl b/src/contractions.jl deleted file mode 100644 index 69813df..0000000 --- a/src/contractions.jl +++ /dev/null @@ -1,151 +0,0 @@ -export left_env, right_env, dot! - -# --------------------------- Conventions ----------------------- -# -# MPS MPS* MPO left env right env -# 2 2 2 - 1 2 - -# 1 - A - 3 1 - B - 3 1 - W - 3 L R -# 4 - 2 1 - -# --------------------------------------------------------------- -# - -function LinearAlgebra.dot(ϕ::AbstractMPS, ψ::AbstractMPS) - T = promote_type(eltype(ψ), eltype(ϕ)) - C = ones(T, 1, 1) - for (A, B) ∈ zip(ψ, ϕ) - @tensor C[x, y] := conj(B)[β, σ, x] * C[β, α] * A[α, σ, y] order = (α, β, σ) - end - tr(C) -end - -""" - left_env(ϕ::AbstractMPS, ψ::AbstractMPS) - -Creates left environment (ϕ - bra, ψ - ket) -""" -function left_env(ϕ::AbstractMPS, ψ::AbstractMPS) - T = promote_type(eltype(ψ), eltype(ϕ)) - S = typeof(similar(ψ[1], T, (1, 1))) - L = Vector{S}(undef, length(ψ) + 1) - L[1] = similar(ψ[1], T, (1, 1)) - L[1][1, 1] = one(T) - for (i, (A, B)) ∈ enumerate(zip(ψ, ϕ)) - C = L[i] - @tensor C[x, y] := conj(B)[β, σ, x] * C[β, α] * A[α, σ, y] order = (α, β, σ) - L[i+1] = C - end - L -end - - -# TODO: remove it (after SpinGlassEngine is updated) -@memoize Dict function left_env(ϕ::AbstractMPS, σ::Vector{Int}) - l = length(σ) - if l == 0 - return ones(eltype(ϕ), 1) - end - m = σ[l] - L̃ = left_env(ϕ, σ[1:l-1]) - M = ϕ[l] - @matmul L[x] := sum(α) L̃[α] * M[α, $m, x] - L -end - -""" - right_env(ϕ::AbstractMPS, ψ::AbstractMPS) - -Creates right environment (ϕ - bra, ψ - ket) -""" -function right_env(ϕ::AbstractMPS, ψ::AbstractMPS) - L = length(ψ) - T = promote_type(eltype(ψ), eltype(ϕ)) - S = typeof(similar(ψ[1], T, (1, 1))) - R = Vector{S}(undef, L + 1) - R[end] = similar(ψ[1], T, (1, 1)) - R[end][1, 1] = one(T) - for i ∈ L:-1:1 - M = ψ[i] - M̃ = ϕ[i] - D = R[i+1] - @tensor D[x, y] := M[x, σ, α] * D[α, β] * conj(M̃)[y, σ, β] order = (β, α, σ) - R[i] = D - end - R -end - -# TODO: remove it (after SpinGlassEngine is updated) -@memoize Dict function right_env( - ϕ::AbstractMPS{T}, - W::AbstractMPO{T}, - σ::Union{Vector,NTuple}, -) where {T} - l = length(σ) - if l == 0 - R = similar(ϕ[1], T, (1, 1)) - R[1, 1] = one(T) - return R - end - k = length(W) - R̃ = right_env(ϕ, W, σ[2:l]) - M = ϕ[k-l+1] - M̃ = W[k-l+1] - K = @view M̃[:, σ[1], :, :] - @tensor R[x, y] := K[y, β, γ] * M[x, γ, α] * R̃[α, β] order = (β, γ, α) - R -end - -""" -$(TYPEDSIGNATURES) - -Calculates the norm of an MPS \$\\ket{\\phi}\$ -""" -LinearAlgebra.norm(ψ::AbstractMPS) = sqrt(abs(dot(ψ, ψ))) - -""" -$(TYPEDSIGNATURES) - -Calculates \$\\bra{\\phi} O \\ket{\\psi}\$ - -# Details - -Calculates the matrix element of \$O\$ -```math -\\bra{\\phi} O \\ket{\\psi} -``` -in one pass, utlizing `TensorOperations`. -""" -function LinearAlgebra.dot(ϕ::AbstractMPS, O::Union{Vector,NTuple}, ψ::AbstractMPS) #where T <: AbstractMatrix - S = promote_type(eltype(ψ), eltype(ϕ), eltype(O[1])) - C = similar(ψ[1], S, (1, 1)) - C[1, 1] = one(S) - for (A, W, B) ∈ zip(ϕ, O, ψ) - @tensor C[x, y] := conj(A)[β, σ, x] * W[σ, η] * C[β, α] * B[α, η, y] order = - (α, η, β, σ) - end - tr(C) -end - -function LinearAlgebra.dot(O::AbstractMPO, ψ::AbstractMPS) - S = promote_type(eltype(ψ), eltype(O)) - T = typeof(ψ) - ϕ = T.name.wrapper(S, length(ψ)) - for (i, (A, B)) ∈ enumerate(zip(O, ψ)) - @matmul N[(x, a), σ, (y, b)] := sum(η) A[x, σ, y, η] * B[a, η, b] - ϕ[i] = N - end - ϕ -end - - -function LinearAlgebra.dot(O1::AbstractMPO, O2::AbstractMPO) - S = promote_type(eltype(O1), eltype(O2)) - T = typeof(O1) - O = T.name.wrapper(S, length(O1)) - for (i, (A, B)) ∈ enumerate(zip(O1, O2)) - @matmul V[(x, a), σ, (y, b), η] := sum(γ) A[x, σ, y, γ] * B[a, γ, b, η] - O[i] = V - end - O -end - -Base.:(*)(A::AbstractTensorNetwork, B::AbstractTensorNetwork) = dot(A, B) diff --git a/src/contractions/central.jl b/src/contractions/central.jl new file mode 100644 index 0000000..9397449 --- /dev/null +++ b/src/contractions/central.jl @@ -0,0 +1,111 @@ +# contractions with CentralTensor on CPU and CUDA + +export contract_tensor3_matrix, contract_matrix_tensor3, update_reduced_env_right +# my_batched_mul! + +function contract_tensor3_matrix(LE::Tensor{R,3}, M::CentralTensor{R,2}) where {R<:Real} + contract_tensor3_central(LE, M.e11, M.e12, M.e21, M.e22) +end + +function contract_matrix_tensor3(M::CentralTensor{R,2}, RE::Tensor{R,3}) where {R<:Real} + contract_tensor3_central(RE, M.e11', M.e21', M.e12', M.e22') +end + +function update_reduced_env_right(RR::Tensor{R,2}, M::CentralTensor{R,2}) where {R<:Real} + RR = reshape(RR, size(RR, 1), 1, size(RR, 2)) + dropdims(contract_matrix_tensor3(M, RR), dims = 2) +end + + +function contract_tensor3_central(LE, e11, e12, e21, e22) + sb, st = size(LE) + sbt = sb * st + sl1, sl2, sr1, sr2 = size(e11, 1), size(e22, 1), size(e11, 2), size(e22, 2) + sinter = sbt * max(sl1 * sl2 * min(sr1, sr2), sr1 * sr2 * min(sl1, sl2)) + if sl1 * sl2 * sr1 * sr2 < sinter + @cast E[(l1, l2), (r1, r2)] := e11[l1, r1] * e21[l2, r1] * e12[l1, r2] * e22[l2, r2] + return reshape(reshape(LE, (sbt, sl1 * sl2)) * E, (sb, st, sr1 * sr2)) + elseif sr1 <= sr2 && sl1 <= sl2 + LE = reshape(LE, sbt, sl1, 1, sl2) .* reshape(e21', 1, 1, sr1, sl2) # [tb, l1, r1, l2] + LE = reshape(reshape(LE, sbt * sl1 * sr1, sl2) * e22, (sbt, sl1, sr1, sr2)) # [tb, l1, r1, r2] + LE .*= reshape(e11, 1, sl1, sr1, 1) # [tb, l1, r1, r2] .* [:, l1, r1, :] + LE .*= reshape(e12, 1, sl1, 1, sr2) # [tb, l1, r1, r2] .* [:, l1, :, r2] + LE = sum(LE, dims = 2) + elseif sr1 <= sr2 && sl2 <= sl1 + LE = permutedims(reshape(LE, (sbt, sl1, sl2)), (1, 3, 2)) # [tb, l2, l1] + LE = reshape(LE, sbt, sl2, 1, sl1) .* reshape(e11', 1, 1, sr1, sl1) # [tb, l2, r1, l1] + LE = reshape(reshape(LE, sbt * sl2 * sr1, sl1) * e12, (sbt, sl2, sr1, sr2)) # [tb, l2, r1, r2] + LE .*= reshape(e21, 1, sl2, sr1, 1) # [tb, l2, r1, r2] .* [:, l2, r1, :] + LE .*= reshape(e22, 1, sl2, 1, sr2) # [tb, l2, r1, r2] .* [:, l2, :, r2] + LE = sum(LE, dims = 2) + elseif sr2 <= sr1 && sl1 <= sl2 + LE = reshape(LE, sbt, sl1, 1, sl2) .* reshape(e22', 1, 1, sr2, sl2) # [tb, l1, r2, l2] + LE = reshape(reshape(LE, sbt * sl1 * sr2, sl2) * e21, (sbt, sl1, sr2, sr1)) # [tb, l1, r2, r1] + LE .*= reshape(e11, 1, sl1, 1, sr1) # [tb, l1, r2, r1] .* [:, l1, :, r1] + LE .*= reshape(e12, 1, sl1, sr2, 1) # [tb, l1, r2, r1] .* [:, l1, r2, :] + LE = permutedims(dropdims(sum(LE, dims = 2), dims = 2), (1, 3, 2)) + else # sr2 <= sr1 && sl2 <= sl1 + LE = permutedims(reshape(LE, (sbt, sl1, sl2)), (1, 3, 2)) # [tb, l2, l1] + LE = reshape(LE, sbt, sl2, 1, sl1) .* reshape(e12', 1, 1, sr2, sl1) # [tb, l2, r2, l1] + LE = reshape(reshape(LE, sbt * sl2 * sr2, sl1) * e11, (sbt, sl2, sr2, sr1)) # [tb, l2, r2, r1] + LE .*= reshape(e21, 1, sl2, 1, sr1) # [tb, l2, r2, r1] .* [:, l2, :, r1] + LE .*= reshape(e22, 1, sl2, sr2, 1) # [tb, l2, r2, r1] .* [:, l2, r2, :] + LE = permutedims(dropdims(sum(LE, dims = 2), dims = 2), (1, 3, 2)) + end + reshape(LE, sb, st, sr1 * sr2) +end + +function batched_mul!( + newLE::Tensor{R,3}, + LE::Tensor{R,3}, + M::AbstractArray{R,2}, +) where {R<:Real} + N1, N2 = size(M) + new_M = CUDA.CuArray(M) # TODO: this is a hack to solve problem with types; + new_M = reshape(new_M, (N1, N2, 1)) + NNlib.batched_mul!(newLE, LE, new_M) +end + +function batched_mul!( + newLE::Tensor{R,3}, + LE::Tensor{R,3}, + M::CentralTensor{R,2}, +) where {R<:Real} + sb, _, st = size(LE) + sl1, sl2, sr1, sr2 = size(M.e11, 1), size(M.e22, 1), size(M.e11, 2), size(M.e22, 2) + sinter = sb * st * max(sl1 * sl2 * min(sr1, sr2), sr1 * sr2 * min(sl1, sl2)) + if sl1 * sl2 * sr1 * sr2 < sinter + @cast E[(l1, l2), (r1, r2)] := + M.e11[l1, r1] * M.e21[l2, r1] * M.e12[l1, r2] * M.e22[l2, r2] + E = reshape(E, (sl1 * sl2, sr1 * sr2, 1)) + NNlib.batched_mul!(newLE, LE, E) + elseif sr1 <= sr2 && sl1 <= sl2 + LE = reshape(LE, sb * sl1, 1, sl2, st) .* reshape(M.e21', 1, sr1, sl2, 1) # [b * l1, r1, l2, t] + LE = batched_mul(reshape(LE, sb * sl1 * sr1, sl2, st), M.e22) # [(b, l1, r1), r2, t] + LE = reshape(LE, (sb, sl1, sr1, sr2, st)) # [b, l1, r1, r2, t] + LE .*= reshape(M.e11, 1, sl1, sr1, 1, 1) # [b, l1, r1, r2, t] .* [:, l1, r1, :, :] + LE .*= reshape(M.e12, 1, sl1, 1, sr2, 1) # [b, l1, r1, r2, t] .* [:, l1, :, r2, :] + sum!(reshape(newLE, (sb, 1, sr1, sr2, st)), LE) + elseif sr1 <= sr2 && sl2 <= sl1 + LE = reshape(LE, sb, 1, sl1, sl2 * st) .* reshape(M.e11', 1, sr1, sl1, 1) # [b, r1, l1, l2, t] + LE = batched_mul(reshape(LE, sb * sr1, sl1, sl2 * st), M.e12) # [(b, r1), r2, (l2, t)] + LE = reshape(LE, (sb, sr1, sr2, sl2, st)) # [b, r1, r2, l2, t] + LE .*= reshape(M.e21', 1, sr1, 1, sl2, 1) # [b, r1, r2, l2, t] .* [:, r1, :, l2, :] + LE .*= reshape(M.e22', 1, 1, sr2, sl2, 1) # [b, r1, r2, l2, t] .* [:, :, r2, l2, :] + sum!(reshape(newLE, (sb, sr1, sr2, 1, st)), LE) + elseif sr2 <= sr1 && sl1 <= sl2 + LE = reshape(LE, sb * sl1, sl2, 1, st) .* reshape(M.e22, 1, sl2, sr2, 1) # [b, l1, l2, r2, t] + LE = batched_mul(reshape(LE, sb * sl1, sl2, sr2 * st), M.e21) # [(b, l1), r1, (r2, t)] + LE = reshape(LE, (sb, sl1, sr1, sr2, st)) # [b, l1, r1, r2, t] + LE .*= reshape(M.e11, 1, sl1, sr1, 1, 1) # [b, l1, r1, r2, t] .* [:, l1, r1, :, :] + LE .*= reshape(M.e12, 1, sl1, 1, sr2, 1) # [b, l1, r1, r2, t] .* [:, l1, :, r2, :] + sum!(reshape(newLE, (sb, 1, sr1, sr2, st)), LE) + else # sr2 <= sr1 && sl2 <= sl1 + LE = reshape(LE, sb, sl1, sl2, 1, st) .* reshape(M.e12, 1, sl1, 1, sr2, 1) # [b, l1, l2, r2, t] + LE = batched_mul(reshape(LE, sb, sl1, sl2 * sr2 * st), M.e11) # [b, r1, (l2, r2, t)] + LE = reshape(LE, (sb, sr1, sl2, sr2, st)) # [b, r1, l2, r2, t] + LE .*= reshape(M.e21', 1, sr1, sl2, 1, 1) # [b, r1, l2, r2, t] .* [:, l2, :, r1] + LE .*= reshape(M.e22, 1, 1, sl2, sr2, 1) # [b, r1, l2, r2, t] .* [:, :, l2, r2, :] + sum!(reshape(newLE, (sb, sr1, 1, sr2, st)), LE) + end +end diff --git a/src/contractions/dense.jl b/src/contractions/dense.jl new file mode 100644 index 0000000..faa6f97 --- /dev/null +++ b/src/contractions/dense.jl @@ -0,0 +1,204 @@ +# contractions of dense objects on CPU and CUDA +# export +# update_reduced_env_right2 + +const MatrixOrCuMatrix{R} = Union{ + CuMatrix{R}, + Matrix{R}, + Diagonal{R,CuArray{R,1,Mem.DeviceBuffer}}, + Diagonal{R,Vector{R}}, +} + +function contract_tensor3_matrix(A::Tensor{R,3}, M::MatrixOrCuMatrix{R}) where {R<:Real} + sl1, sl2, sl3 = size(A) + A = reshape(A, sl1 * sl2, sl3) + reshape(A * M, sl1, sl2, :) +end + +function contract_matrix_tensor3(M::MatrixOrCuMatrix{R}, A::Tensor{R,3}) where {R<:Real} + sl1, sl2, sl3 = size(A) + A = reshape(A, sl1 * sl2, sl3) + reshape(A * M', sl1, sl2, :) +end + +""" + -- A -- + | | + L = LE -- M -- + | | + -- B -- +""" +function update_env_left( + LE::S, + A::S, + M::T, + B::S, +) where {S<:Tensor{R,3},T<:Tensor{R,4}} where {R<:Real} + @tensor order = (ot, α, oc, β, ob) LE[nb, nt, nc] := + LE[ob, ot, oc] * A[ot, nt, α] * M[oc, α, nc, β] * B[ob, nb, β] # TODO: split the line +end + +""" + -- A -- + | | + L = LE | + | | + -- B -- +""" +function update_env_left( + LE::T, + A::S, + B::S, +) where {S<:Tensor{R,3},T<:Tensor{R,2}} where {R<:Real} + @tensor order = (ot, α, ob) LE[nb, nt] := LE[ob, ot] * A[ot, nt, α] * B[ob, nb, α] +end + +""" + -- A -- + | | + L = LE + | + +""" +function update_env_left(LE::T, A::S) where {S<:Tensor{R,3},T<:Tensor{R,2}} where {R<:Real} + @tensor A[nb, nt, nc] := LE[nb, ot] * A[ot, nt, nc] +end + +""" + -- A -- + | | + R = -- M -- RE + | | + -- B -- +""" +function update_env_right( + RE::S, + A::S, + M::T, + B::S, +) where {T<:Tensor{R,4},S<:Tensor{R,3}} where {R<:Real} + @tensor order = (ot, α, oc, β, ob) RE[nb, nt, nc] := + RE[ob, ot, oc] * A[nt, ot, α] * M[nc, α, oc, β] * B[nb, ob, β] +end + +""" + -- A -- + | | + R = | RE + | | + -- B -- +""" +function update_env_right( + RE::T, + A::S, + B::S, +) where {T<:Tensor{R,2},S<:Tensor{R,3}} where {R<:Real} + @tensor order = (ot, α, ob) RE[nb, nt] := RE[ob, ot] * A[nt, ot, α] * B[nb, ob, α] +end + +""" + -- A -- + | | + R = --- RE + | + +""" +function update_env_right(RE::S, C::S) where {S<:Tensor{R,3}} where {R<:Real} + @tensor order = (ot, oc) RR[nb, nt] := RE[nb, ot, oc] * C[nt, ot, oc] +end + +""" + | | | + LE -- M -- RE + | | | + -- B -- +""" +function project_ket_on_bra( + LE::S, + B::S, + M::T, + RE::S, +) where {T<:Tensor{R,4},S<:Tensor{R,3}} where {R<:Real} + @tensor order = (ol, lc, oc, or, rc) A[nl, nr, nc] := + LE[ol, nl, lc] * B[ol, or, oc] * M[lc, nc, rc, oc] * RE[or, nr, rc] +end + +""" + LE - - RE + | | | + -- B -- +""" +function project_ket_on_bra( + LE::T, + B::S, + RE::T, +) where {T<:Tensor{R,2},S<:Tensor{R,3}} where {R<:Real} + @tensor order = (ol, or) A[nl, nr, nc] := LE[ol, nl] * B[ol, or, nc] * RE[or, nr] +end + +""" + | | + LE ---- RE -- +""" +function project_ket_on_bra( + LE::T, + RE::S, +) where {T<:Tensor{R,2},S<:Tensor{R,3}} where {R<:Real} + @tensor A[nl, nr, nc] := LE[ol, nl] * RE[ol, nr, nc] +end + +""" + K + | + -- M -- RE + | | + -- B --- +""" +function update_reduced_env_right( + RE::Tensor{R,2}, + m::Int, + M::MpoTensor{R,4}, + B::Tensor{R,3}, +) where {R<:Real} + K = zeros(R, size(M, 2)) + K[m] = one(R) + if typeof(RE) <: CuArray + K = CuArray(K) + end + K = reshape(K, 1, 1, size(K, 1)) + for v ∈ M.top + K = contract_tensor3_matrix(K, v) + end + K = dropdims(K, dims = (1, 2)) + + for v ∈ reverse(M.bot) + B = contract_matrix_tensor3(v, B) # TODO do we ever enter here? in mpo layers that we have now, we don't + end + update_reduced_env_right(K, RE, M.ctr, B) +end + +function update_reduced_env_right( + K::Tensor{R,1}, + RE::Tensor{R,2}, + M::Tensor{R,4}, + B::Tensor{R,3}, +) where {R<:Real} + @tensor order = (d, β, γ, α) RE[x, y] := K[d] * M[y, d, β, γ] * B[x, α, γ] * RE[α, β] +end + +function update_reduced_env_right(RR::S, M0::S) where {S<:Tensor{<:Real,2}} + @tensor RR[x, y] := M0[y, z] * RR[x, z] +end + +function contract_tensors43(B::Tensor{R,4}, A::Tensor{R,3}) where {R<:Real} + @matmul C[(x, y), (b, a), z] := sum(σ) B[y, z, a, σ] * A[x, b, σ] +end + +function corner_matrix( + C::S, + M::T, + B::S, +) where {S<:Tensor{R,3},T<:Tensor{R,4}} where {R<:Real} + @tensor order = (rr, mb, mr) Cnew[ll, ml, tt, mt] := + M[ml, mt, mr, mb] * B[ll, rr, mb] * C[rr, tt, mr] +end diff --git a/src/contractions/diagonal.jl b/src/contractions/diagonal.jl new file mode 100644 index 0000000..ce41966 --- /dev/null +++ b/src/contractions/diagonal.jl @@ -0,0 +1,21 @@ +# diagonal.jl: contractions with DiagonalTensor on CPU and CUDA + +function contract_tensor3_matrix(B::Tensor{R,3}, C::DiagonalTensor{R}) where {R<:Real} + @cast B[l, (r, s1), s2] := B[l, r, (s1, s2)] (s2 ∈ 1:size(C.e2, 1)) + B = contract_tensor3_matrix(B, C.e2) + @cast B[l, r, s1, q2] := B[l, (r, s1), q2] (s1 ∈ 1:size(C.e1, 1)) + B = permutedims(B, (1, 2, 4, 3)) + @cast B[l, (r, q2), s1] := B[l, r, q2, s1] + B = contract_tensor3_matrix(B, C.e1) + @cast B[l, r, (q2, q1)] := B[l, (r, q2), q1] (q2 ∈ 1:size(C.e2, 2)) +end + +function contract_matrix_tensor3(C::DiagonalTensor{R}, B::Tensor{R,3}) where {R<:Real} + @cast B[l, (r, s2), s1] := B[l, r, (s2, s1)] (s1 ∈ 1:size(C.e1, 2)) + B = contract_matrix_tensor3(C.e1, B) + @cast B[l, r, s2, q1] := B[l, (r, s2), q1] (s2 ∈ 1:size(C.e2, 2)) + B = permutedims(B, (1, 2, 4, 3)) + @cast B[l, (r, q1), s2] := B[l, r, q1, s2] + B = contract_matrix_tensor3(C.e2, B) + @cast B[l, r, (q1, q2)] := B[l, (r, q1), q2] (q1 ∈ 1:size(C.e1, 1)) +end diff --git a/src/contractions/site.jl b/src/contractions/site.jl new file mode 100644 index 0000000..bae7c6c --- /dev/null +++ b/src/contractions/site.jl @@ -0,0 +1,195 @@ +# site.jl: contractions with SiteTensor on CPU and CUDA + +# TODO make sure slicing is done right, +# cf. https://discourse.julialang.org/t/correct-implementation-of-cuarrays-slicing-operations/90600 + +function contract_sparse_with_three( + lp, + X1::S, + X2::S, + X3::S, + loc_exp::T, + k1::Q, + k2::Q, + k3::Q, + kout::Q, +) where {S<:Tensor{R,3},T<:Tensor{R,1},Q<:Integer} where {R<:Real} + s1, s2, _ = size(X1) + s3, s4, _ = size(X3) + + device = typeof(loc_exp) <: CuArray ? :GPU : :CPU + p1 = get_projector!(lp, k1, device) + p2 = get_projector!(lp, k2, device) + p3 = get_projector!(lp, k3, device) + + total_memory = 2^32 # TODO add better handling for this; also depending on device + batch_size = max( + Int( + floor( + total_memory / + (8 * (s1 * s2 + s2 * s3 + s3 * s4 + s4 * s1 + min(s1 * s3, s2 * s4))), + ), + ), + 1, + ) + batch_size = Int(2^floor(log2(batch_size) + 1e-6)) + + total_size = length(p1) + batch_size = min(batch_size, total_size) + + onGPU = typeof(loc_exp) <: CuArray + out = onGPU ? CUDA.zeros(R, size(lp, kout), s1, s4) : zeros(R, size(lp, kout), s1, s4) + + from = 1 + while from <= total_size + to = min(total_size, from + batch_size - 1) + + vp1 = @view p1[from:to] + vp2 = @view p2[from:to] + vp3 = @view p3[from:to] + + X1p = X1[:, :, vp1] + X2p = X2[:, :, vp2] + X3p = X3[:, :, vp3] + + if s1 * s3 < s2 * s4 + Xtmp = batched_mul(X1p, X2p) + outp = batched_mul(Xtmp, X3p) + else + Xtmp = batched_mul(X2p, X3p) + outp = batched_mul(X1p, Xtmp) + end + + le = @view loc_exp[from:to] + outp .*= reshape(le, 1, 1, :) + outpp = reshape(outp, s1 * s4, :) + ipr, rf, rt = sparse(R, lp, kout, device; from, to) + @inbounds out[rf:rt, :, :] .+= reshape(ipr * outpp', :, s1, s4) + from = to + 1 + end + permutedims(out, (2, 3, 1)) +end + +function update_env_left( + LE::S, + A::S, + M::T, + B::S, +) where {S<:Tensor{R,3},T<:SiteTensor{R,4}} where {R<:Real} + contract_sparse_with_three( + M.lp, + permutedims(B, (2, 1, 3)), + LE, + A, + M.loc_exp, + M.projs[[4, 1, 2, 3]]..., + ) +end + +function update_env_right( + RE::S, + A::S, + M::SiteTensor{R,4}, + B::S, +) where {S<:Tensor{R,3}} where {R<:Real} + contract_sparse_with_three( + M.lp, + B, + RE, + permutedims(A, (2, 1, 3)), + M.loc_exp, + M.projs[[4, 3, 2, 1]]..., + ) +end + +function project_ket_on_bra( + LE::S, + B::S, + M::SiteTensor{R,4}, + RE::S, +) where {S<:Tensor{R,3}} where {R<:Real} + contract_sparse_with_three( + M.lp, + permutedims(LE, (2, 1, 3)), + B, + RE, + M.loc_exp, + M.projs[[1, 4, 3, 2]]..., + ) +end + +function update_reduced_env_right( + K::Tensor{R,1}, + RE::Tensor{R,2}, + M::SiteTensor{R,4}, + B::Tensor{R,3}, +) where {R<:Real} + device = typeof(M.loc_exp) <: CuArray ? :GPU : :CPU + s1, s2, _ = size(B) + + p2, p3, p4 = (get_projector!(M.lp, x, device) for x in M.projs[2:4]) + k1 = M.projs[1] + total_memory = 2^32 # TODO add better handling for this; also depending on device + + batch_size = max(Int(floor(total_memory / (8 * (s1 * s2 + s1 + s2 + 1)))), 1) + batch_size = Int(2^floor(log2(batch_size) + 1e-6)) + + out = + typeof(M.loc_exp) <: CuArray ? CUDA.zeros(R, size(M.lp, k1), s1) : + zeros(R, size(M.lp, k1), s1) + RE = reshape(RE, size(RE, 1), 1, size(RE, 2)) + + from = 1 + total_size = length(p4) + while from <= total_size + to = min(total_size, from + batch_size - 1) + vp2 = @view p2[from:to] + vp3 = @view p3[from:to] + vp4 = @view p4[from:to] + + @inbounds Kp = K[vp2] + @inbounds REp = RE[:, :, vp3] + @inbounds Bp = B[:, :, vp4] + le = @view M.loc_exp[from:to] + + outp = dropdims(Bp ⊠ REp, dims = 2) + outp .*= reshape(le .* Kp, 1, :) + + ipr, rf, rt = sparse(R, M.lp, k1, device; from, to) + @inbounds out[rf:rt, :] .+= ipr * outp' + from = to + 1 + end + permutedims(out, (2, 1)) +end + +function contract_tensors43(M::SiteTensor{R,4}, B::Tensor{R,3}) where {R<:Real} + device = typeof(M.loc_exp) <: CuArray ? :GPU : :CPU + p4 = get_projector!(M.lp, M.projs[4], device) + sb1, sb2, _ = size(B) + sm1, sm2, sm3 = size.(Ref(M.lp), M.projs[1:3]) + @inbounds Bp = B[:, :, p4] .* reshape(M.loc_exp, 1, 1, :) + @cast Bp[(x, y), z] := Bp[x, y, z] + ip123 = sparse(R, M.lp, M.projs[1], M.projs[2], M.projs[3], device) + out = reshape(ip123 * Bp', sm1, sm2, sm3, sb1, sb2) + out = permutedims(out, (4, 1, 5, 3, 2)) + reshape(out, sb1 * sm1, sb2 * sm3, sm2) +end + +function corner_matrix( + C::S, + M::T, + B::S, +) where {S<:Tensor{R,3},T<:SiteTensor{R,4}} where {R<:Real} + device = typeof(M.loc_exp) <: CuArray ? :GPU : :CPU + projs = [get_projector!(M.lp, x, device) for x in M.projs] + @inbounds Bp = B[:, :, projs[4]] + @inbounds Cp = C[:, :, projs[3]] + outp = Bp ⊠ Cp + outp .*= reshape(M.loc_exp, 1, 1, :) + @cast outp[(x, y), z] := outp[x, y, z] + sm1, sm2 = maximum(projs[1]), maximum(projs[2]) + @inbounds p12 = projs[1] .+ (projs[2] .- 1) .* sm1 + ip12 = sparse(R, p12; mp = sm1 * sm2) + out = reshape(ip12 * outp', sm1, maximum(projs[2]), size(B, 1), size(C, 2)) + permutedims(out, (3, 1, 4, 2)) +end diff --git a/src/contractions/sparse.jl b/src/contractions/sparse.jl new file mode 100644 index 0000000..786585e --- /dev/null +++ b/src/contractions/sparse.jl @@ -0,0 +1,70 @@ +#TODO add support for CuSparseMatrixCSR (cf. https://github.com/JuliaGPU/CUDA.jl/issues/1113) + +# TODO This function is a patch and may not provide any advantage - to be tested +#= +function CUDA.:*(Md::DenseCuMatrix{T}, Mcsr::CUSPARSE.CuSparseMatrixCSR{T}) where T + ret = CUDA.zeros(T, size(Md, 1), size(Mcsr, 2)) + CUSPARSE.mm!('T', 'T', one(T), Mcsr, Md, zero(T), ret, 'O') + ret' +end +=# +# +# TODO shouldn't we have CSR format instead? +function SparseArrays.sparse(::Type{R}, p::CuArray{Int64,1}; mp = nothing) where {R<:Real} + n = length(p) + if isnothing(mp) + mp = maximum(p) + end + cn = CuArray(1:n+1) # aux_cusparse(R, n) + co = CUDA.ones(R, n) + CuSparseMatrixCSR(CuSparseMatrixCSC(cn, p, co, (mp, n))) # TODO: Change when CUDA.jl is fixed +end + +function SparseArrays.sparse(::Type{R}, p::Vector{Int64}; mp = nothing) where {R<:Real} + n = length(p) + if isnothing(mp) + mp = maximum(p) + end + cn = collect(1:n) + co = ones(R, n) + sparse(p, cn, co, mp, n) +end + +@memoize Dict function SparseArrays.sparse( + ::Type{T}, + lp::PoolOfProjectors, + k1::R, + k2::R, + k3::R, + device::Symbol, +) where {T<:Real,R<:Int} + p1 = get_projector!(lp, k1) #, device) + p2 = get_projector!(lp, k2) #, device) + p3 = get_projector!(lp, k3) #, device) + @assert length(p1) == length(p2) == length(p3) + s1, s2, s3 = size(lp, k1), size(lp, k2), size(lp, k3) + p = p1 .+ s1 * (p2 .- 1) .+ s1 * s2 * (p3 .- 1) + if device == :GPU + p = CuArray(p) + end + sparse(T, p; mp = s1 * s2 * s3) +end + +@memoize Dict function SparseArrays.sparse( + ::Type{R}, + lp::PoolOfProjectors, + k::Int, + device::Symbol; + from::Int = 1, + to::Int = length(lp, k), +) where {R<:Real} + p = get_projector!(lp, k) + pp = @view p[from:to] + rf = minimum(pp) + rt = maximum(pp) + if device == :GPU + pp = CuArray(pp) + end + ipr = sparse(R, pp .- (rf - 1)) + (ipr, rf, rt) +end diff --git a/src/contractions/virtual.jl b/src/contractions/virtual.jl new file mode 100644 index 0000000..78d6f57 --- /dev/null +++ b/src/contractions/virtual.jl @@ -0,0 +1,434 @@ +# virtual.jl: contractions with VirtualTensor on CPU and CUDA +# export update_env_left2, update_env_right2, project_ket_on_bra2 + +# @memoize Dict +alloc_undef(R, onGPU, shape) = onGPU ? CuArray{R}(undef, shape) : Array{R}(undef, shape) +alloc_zeros(R, onGPU, shape) = onGPU ? CUDA.zeros(R, shape) : zeros(R, shape) + +function proj_out(lp, k1, k2, k3, device) + p1 = get_projector!(lp, k1, device) + p2 = get_projector!(lp, k2, device) + p3 = get_projector!(lp, k3, device) + @assert length(p1) == length(p2) == length(p3) + s1, s2 = size(lp, k1), size(lp, k2) + p1 .+ s1 * (p2 .- 1) .+ s1 * s2 * (p3 .- 1) +end + +function proj_2step_12(lp, (k1, k2), k3, device) + p1 = get_projector!(lp, k1, :CPU) + p2 = get_projector!(lp, k2, :CPU) + p3 = get_projector!(lp, k3, device) + @assert length(p1) == length(p2) == length(p3) + s1 = size(lp, k1) + + p12, transitions_matrix = rank_reveal(hcat(p1, p2), :PE) + (p1, p2) = Tuple(Array(t) for t ∈ eachcol(transitions_matrix)) + + s12 = maximum(p12) + + if device == :CPU + p12 = CuArray(p12) + p1 = CuArray(p1) + p2 = CuArray(p2) + end + + pf1 = p12 .+ s12 .* (p3 .- 1) + pf2 = p1 .+ s1 .* (p2 .- 1) + + pf1, pf2, s12 +end + +function proj_2step_23(lp, k1, (k2, k3), device) + p1 = get_projector!(lp, k1, device) + p2 = get_projector!(lp, k2, :CPU) + p3 = get_projector!(lp, k3, :CPU) + @assert length(p1) == length(p2) == length(p3) + + s1, s2 = size(lp, k1), size(lp, k2) + + p23, transitions_matrix = rank_reveal(hcat(p2, p3), :PE) + (p2, p3) = Tuple(Array(t) for t ∈ eachcol(transitions_matrix)) + + s23 = maximum(p23) + + if device == :CPU + p23 = CuArray(p23) + p2 = CuArray(p2) + p3 = CuArray(p3) + end + + pf1 = p1 .+ s1 .* (p23 .- 1) + pf2 = p2 .+ s2 .* (p3 .- 1) + + pf1, pf2, s23 +end + +function merge_projectors_inter(lp, p1, p2, p3, onGPU; order = "1_23") + s1 = size(lp, p1) + s2 = size(lp, p2) + device = onGPU ? :GPU : :CPU + p1 = get_projector!(lp, p1, device) + p2 = get_projector!(lp, p2, :CPU) + p3 = get_projector!(lp, p3, :CPU) + + p23, transitions_matrix = rank_reveal(hcat(p2, p3), :PE) + s23 = maximum(p23) + (p2, p3) = Tuple(Array(t) for t ∈ eachcol(transitions_matrix)) + if onGPU + p23 = CuArray(p23) + p2 = CuArray(p2) + p3 = CuArray(p3) + end + p2_3 = p2 .+ s2 .* (p3 .- 1) + p123 = order == "1_23" ? p1 .+ s1 .* (p23 .- 1) : p23 .+ s23 .* (p1 .- 1) # else "23_1" + p123, p2_3, s23 +end + +function update_env_left( + LE::S, + A::S, + M::VirtualTensor{R,4}, + B::S, +) where {S<:Tensor{R,3}} where {R<:Real} + p_lb, p_lc, p_lt, p_rb, p_rc, p_rt = M.projs + slb, srb = size(B, 1), size(B, 2) + slt, srt = size(A, 1), size(A, 2) + src = length(M.lp, p_rc) + + slpb, slpc, slpt = size(M.lp, p_lb), size(M.lp, p_lc), size(M.lp, p_lt) + srpb, srpc, srpt = size(M.lp, p_rb), size(M.lp, p_rc), size(M.lp, p_rt) + + onGPU = typeof(LE) <: CuArray + + A = reshape(A, (slt, srt, slpt, srpt)) + B = reshape(B, (slb, srb, slpb, srpb)) + Lout = alloc_zeros(R, onGPU, (srb, srt, src)) + + if slpb * srpt >= slpt * srpb + pl_b_ct, pl_c_t, slpct = + merge_projectors_inter(M.lp, p_lb, p_lc, p_lt, onGPU; order = "1_23") + pr_bc_t, pr_b_c, srpbc = + merge_projectors_inter(M.lp, p_rt, p_rb, p_rc, onGPU; order = "23_1") + + B2 = permutedims(B, (1, 3, 2, 4)) # [lb, lpb, rb, rpb] + B2 = reshape(B2, (slb * slpb, srb * srpb)) # [(lb, lpb), (rb, rpb)] + + tmp1 = alloc_zeros(R, onGPU, (slb, slpb * slpct)) + tmp2 = alloc_undef(R, onGPU, (srb * srpb, slpct)) + tmp3 = alloc_zeros(R, onGPU, (srb * srpb, slpt * slpc)) + tmp5 = alloc_undef(R, onGPU, (srb * srpb, srpc, slpt)) + tmp8 = alloc_undef(R, onGPU, (srb * srpbc, srpt)) + + for ilt ∈ 1:slt + tmp1[:, pl_b_ct] = (@view LE[:, ilt, :]) # [lb, (lpb, lpct)] + mul!(tmp2, B2', reshape(tmp1, (slb * slpb, slpct))) # [(rb, rpb), lpct] + tmp3[:, pl_c_t] = tmp2 # [(rb, rpb), (lpc, lpt)] + tmp4 = reshape(tmp3, (srb * srpb, slpc, slpt)) # [(rb, rpb), lpc, lpt] + batched_mul!(tmp5, tmp4, M.con) + tmp6 = reshape(tmp5, (srb, srpb * srpc, slpt)) # [rb, (rpb, rpc), lpt] + tmp7 = reshape(tmp6[:, pr_b_c, :], (srb * srpbc, slpt)) # [(rb, rpbc), lpt] + for irt ∈ 1:srt + mul!(tmp8, tmp7, (@view A[ilt, irt, :, :])) + tmp9 = reshape(tmp8, (srb, srpbc * srpt)) + Lout[:, irt, :] .+= tmp9[:, pr_bc_t] # [rb, rc] + end + end + else + pl_t_cb, pl_c_b, slpcb = + merge_projectors_inter(M.lp, p_lt, p_lc, p_lb, onGPU; order = "1_23") + pr_tc_b, pr_t_c, srptc = + merge_projectors_inter(M.lp, p_rb, p_rt, p_rc, onGPU; order = "23_1") + + A2 = permutedims(A, (1, 3, 2, 4)) # [lt, lpt, rt, rpt] + A2 = reshape(A2, (slt * slpt, srt * srpt)) # [(lt, lpt), (rt, rpt)] + + tmp1 = alloc_zeros(R, onGPU, (slt, slpt * slpcb)) + tmp2 = alloc_undef(R, onGPU, (srt * srpt, slpcb)) + tmp3 = alloc_zeros(R, onGPU, (srt * srpt, slpc * slpb)) + tmp5 = alloc_undef(R, onGPU, (srt * srpt, srpc, slpb)) + tmp8 = alloc_zeros(R, onGPU, (srt * srptc, srpb)) + + for ilb ∈ 1:slb + tmp1[:, pl_t_cb] = (@view LE[ilb, :, :]) # [lt, (lpt, lpcb)] + mul!(tmp2, A2', reshape(tmp1, (slt * slpt, slpcb))) # [(rt, rpt), lpcb] + tmp3[:, pl_c_b] = tmp2 + tmp4 = reshape(tmp3, (srt * srpt, slpc, slpb)) # [(rt, rpt), lpc, lpb] + batched_mul!(tmp5, tmp4, M.con) # [(rt, rpt), lpb, rpc] + tmp6 = reshape(tmp5, (srt, srpt * srpc, slpb)) # [(rt, rpt * rpc), lcb] + tmp7 = reshape(tmp6[:, pr_t_c, :], (srt * srptc, slpb)) # [(rt, rptc), lpb] + for irb ∈ 1:srb + mul!(tmp8, tmp7, (@view B[ilb, irb, :, :])) + tmp9 = reshape(tmp8, (srt, srptc * srpb)) + Lout[irb, :, :] .+= tmp9[:, pr_tc_b] # [rt, rc] + end + end + end + Lout +end + + +function project_ket_on_bra( + LE::S, + B::S, + M::VirtualTensor{R,4}, + RE::S, +) where {S<:Tensor{R,3}} where {R<:Real} + p_lb, p_lc, p_lt, p_rb, p_rc, p_rt = M.projs + slb, slt = size(LE, 1), size(LE, 2) + srb, srt = size(RE, 1), size(RE, 2) + slpb, slpc, slpt = size(M.lp, p_lb), size(M.lp, p_lc), size(M.lp, p_lt) + srpb, srpc, srpt = size(M.lp, p_rb), size(M.lp, p_rc), size(M.lp, p_rt) + + onGPU = typeof(LE) <: CuArray + + B = reshape(B, (slb, srb, slpb, srpb)) + B2 = permutedims(B, (1, 3, 2, 4)) # [lb, lpb, rb, rpb] + B2 = reshape(B2, (slb * slpb, srb * srpb)) # [(lb, lpb), (rb, rpb)] + LR = alloc_zeros(R, onGPU, (slt, srt, slpt, srpt)) + + if slpb >= srpb + pl_b_ct, pl_c_t, slpct = + merge_projectors_inter(M.lp, p_lb, p_lc, p_lt, onGPU; order = "1_23") + pr_bc_t, pr_b_c, srpbc = + merge_projectors_inter(M.lp, p_rt, p_rb, p_rc, onGPU; order = "23_1") + + tmp1 = alloc_zeros(R, onGPU, (slb, slpb * slpct)) + tmp2 = alloc_undef(R, onGPU, (srb * srpb, slpct)) + tmp3 = alloc_zeros(R, onGPU, (srb * srpb, slpc * slpt)) + tmp5 = alloc_undef(R, onGPU, (srb * srpb, srpc, slpt)) + tmp8 = alloc_zeros(R, onGPU, (srb, srpbc * srpt)) + for ilt ∈ 1:slt + tmp1[:, pl_b_ct] = (@view LE[:, ilt, :]) # [lb, (lpb, lpct)] + mul!(tmp2, B2', reshape(tmp1, (slb * slpb, slpct))) # [(rb, rpb), lpct] + tmp3[:, pl_c_t] = tmp2 # [(rb, rpb), (lpc, lpt)] + tmp4 = reshape(tmp3, (srb * srpb, slpc, slpt)) # [(rb, rpb), lpc, lpt] + batched_mul!(tmp5, tmp4, M.con) # [(rb, rpb), rpc, lpt] + tmp6 = reshape(tmp5, (srb, srpb * srpc, slpt)) # [rb, (rpb, rpc), lpt] + tmp7 = reshape(tmp6[:, pr_b_c, :], (srb * srpbc, slpt)) # [(rb, rpbc), lpt] + for irt ∈ 1:srt + tmp8[:, pr_bc_t] = (@view RE[:, irt, :]) # [rb, (rpbc, rpt)] + LR[ilt, irt, :, :] = tmp7' * reshape(tmp8, (srb * srpbc, srpt)) # [lpt, rpt] + end + end + else + pr_b_ct, pr_c_t, srpct = + merge_projectors_inter(M.lp, p_rb, p_rc, p_rt, onGPU; order = "1_23") + pl_bc_t, pl_b_c, slpbc = + merge_projectors_inter(M.lp, p_lt, p_lb, p_lc, onGPU; order = "23_1") + + tmp1 = alloc_zeros(R, onGPU, (srb, srpb * srpct)) + tmp2 = alloc_undef(R, onGPU, (slb * slpb, srpct)) + tmp3 = alloc_zeros(R, onGPU, (slb * slpb, srpc * srpt)) + tmp5 = alloc_undef(R, onGPU, (slb * slpb, slpc, srpt)) + tmp8 = alloc_zeros(R, onGPU, (slb, slpbc * slpt)) + for irt ∈ 1:srt + tmp1[:, pr_b_ct] = (@view RE[:, irt, :]) # [rb, (rpb, rpct)] + mul!(tmp2, B2, reshape(tmp1, (srb * srpb, srpct))) # [(lb, lpb), rpct] + tmp3[:, pr_c_t] = tmp2 # [(lb, lpb), (rpc, rpt)] + tmp4 = reshape(tmp3, (slb * slpb, srpc, srpt)) # [(lb, lpb), rpc, rpt] + batched_mul!(tmp5, tmp4, M.con') # [(lb, lpb), lpc, rpt] + tmp6 = reshape(tmp5, (slb, slpb * slpc, srpt)) # [lb, (lpb, lpc), rpt] + tmp7 = reshape(tmp6[:, pl_b_c, :], (slb * slpbc, srpt)) # [(lb, lpbc), rpt] + for ilt ∈ 1:slt + tmp8[:, pl_bc_t] = (@view LE[:, ilt, :]) # [lb, (lpbc, lpt)] + LR[ilt, irt, :, :] = reshape(tmp8, (slb * slpbc, slpt))' * tmp7 # [lct, rct] + end + end + end + reshape(LR, (slt, srt, slpt * srpt)) +end + + +function update_env_right( + RE::S, + A::S, + M::VirtualTensor{R,4}, + B::S, +) where {S<:Tensor{R,3}} where {R<:Real} + p_lb, p_lc, p_lt, p_rb, p_rc, p_rt = M.projs + slb, srb = size(B, 1), size(B, 2) + slt, srt = size(A, 1), size(A, 2) + slc = length(M.lp, p_lc) + + slpb, slpc, slpt = size(M.lp, p_lb), size(M.lp, p_lc), size(M.lp, p_lt) + srpb, srpc, srpt = size(M.lp, p_rb), size(M.lp, p_rc), size(M.lp, p_rt) + + onGPU = typeof(RE) <: CuArray + + A = reshape(A, (slt, srt, slpt, srpt)) + B = reshape(B, (slb, srb, slpb, srpb)) + Rout = alloc_zeros(R, onGPU, (slb, slt, slc)) + + if srpb * slpt >= srpt * slpb + B2 = permutedims(B, (1, 3, 2, 4)) # [lb, lpb, rb, rpb] + B2 = reshape(B2, (slb * slpb, srb * srpb)) # [(lb, lpb), (rb, rpb)] + + pr_b_ct, pr_c_t, srpct = + merge_projectors_inter(M.lp, p_rb, p_rc, p_rt, onGPU; order = "1_23") + pl_bc_t, pl_b_c, slpbc = + merge_projectors_inter(M.lp, p_lt, p_lb, p_lc, onGPU; order = "23_1") + + tmp1 = alloc_zeros(R, onGPU, (srb, srpb * srpct)) + tmp2 = alloc_undef(R, onGPU, (slb * slpb, srpct)) + tmp3 = alloc_zeros(R, onGPU, (slb * slpb, srpc, srpt)) + tmp5 = alloc_undef(R, onGPU, (slb * slpb, slpc, srpt)) + tmp8 = alloc_undef(R, onGPU, (slb * slpbc, slpt)) + + for irt ∈ 1:srt + tmp1[:, pr_b_ct] = (@view RE[:, irt, :]) # [rb, (rpb, rpct)] + mul!(tmp2, B2, reshape(tmp1, (srb * srpb, srpct))) # [(lb, lpb), rpct] + tmp3[:, pr_c_t] = tmp2 # [(lb, lpb), (rpc, rpt)] + tmp4 = reshape(tmp3, (slb * slpb, srpc, srpt)) # [(lb, lpb), rpc, rpt] + batched_mul!(tmp5, tmp4, M.con') + tmp6 = reshape(tmp5, (slb, slpb * slpc, srpt)) # [lb, (lpb, lpc), rpt] + tmp7 = reshape(tmp6[:, pl_b_c, :], (slb * slpbc, srpt)) # [(lb, lpbc), rpt] + for ilt ∈ 1:slt + mul!(tmp8, tmp7, (@view A[ilt, irt, :, :])') + tmp9 = reshape(tmp8, (slb, slpbc * slpt)) + Rout[:, ilt, :] .+= tmp9[:, pl_bc_t] + end + end + else + A2 = permutedims(A, (1, 3, 2, 4)) # [lt, lpt, rt, rpt] + A2 = reshape(A2, (slt * slpt, srt * srpt)) # [(lt, lpt), (rt, rpt)] + + pr_t_cb, pr_c_b, srpcb = + merge_projectors_inter(M.lp, p_rt, p_rc, p_rb, onGPU; order = "1_23") + pl_tc_b, pl_t_c, slptc = + merge_projectors_inter(M.lp, p_lb, p_lt, p_lc, onGPU; order = "23_1") + + tmp1 = alloc_zeros(R, onGPU, (srt, srpt * srpcb)) + tmp2 = alloc_undef(R, onGPU, (slt * slpt, srpcb)) + tmp3 = alloc_zeros(R, onGPU, (slt * slpt, srpc * srpb)) + tmp5 = alloc_undef(R, onGPU, (slt * slpt, slpc, srpb)) + tmp8 = alloc_undef(R, onGPU, (slt * slptc, slpb)) + for irb ∈ 1:srb + tmp1[:, pr_t_cb] = (@view RE[irb, :, :]) # [rt, (rpt, rpcb)] + mul!(tmp2, A2, reshape(tmp1, (srt * srpt, srpcb))) # [(lt, lpt), rpcb] + tmp3[:, pr_c_b] = tmp2 # [(lt, lpt), (rpc, rpb)] + tmp4 = reshape(tmp3, (slt * slpt, srpc, srpb)) # [(lt, lpt), rpc, rpb] + batched_mul!(tmp5, tmp4, M.con') # [(lt, lpt), lpc, rpb] + tmp6 = reshape(tmp5, (slt, slpt * slpc, srpb)) # [lt, (lpt, lpc), rpb] + tmp7 = reshape(tmp6[:, pl_t_c, :], (slt * slptc, srpb)) # [(lb, lptc), rpb] + for ilb ∈ 1:slb + mul!(tmp8, tmp7, (@view B[ilb, irb, :, :])') + tmp9 = reshape(tmp8, (slt, slptc * slpb)) + Rout[ilb, :, :] .+= tmp9[:, pl_tc_b] + end + end + end + Rout +end + + +function update_reduced_env_right( + K::Tensor{R,1}, + RE::Tensor{R,2}, + M::VirtualTensor{R,4}, + B::Tensor{R,3}, +) where {R<:Real} + p_lb, p_lc, p_lt, p_rb, p_rc, p_rt = M.projs + + slb, srb = size(B, 1), size(B, 2) + slpb, slpc, slpt = size(M.lp, p_lb), size(M.lp, p_lc), size(M.lp, p_lt) + srpb, srpc, srpt = size(M.lp, p_rb), size(M.lp, p_rc), size(M.lp, p_rt) + + onGPU = typeof(RE) <: CuArray + + K = reshape(K, (slpt, srpt)) # [lct, rct] + B = reshape(B, (slb, srb, slpb, srpb)) # [lb, rb, lpb, rpb] + B2 = permutedims(B, (1, 3, 2, 4)) # [lb, lpb, rb, rpb] + B2 = reshape(B2, (slb * slpb, srb * srpb)) # [(lb, lpb), (rb, rpb)] + + if srpb * slpt >= srpt * slpb + pr_b_ct, pr_c_t, srpct = + merge_projectors_inter(M.lp, p_rb, p_rc, p_rt, onGPU; order = "1_23") + pl_bc_t, pl_b_c, slpbc = + merge_projectors_inter(M.lp, p_lt, p_lb, p_lc, onGPU; order = "23_1") + + tmp1 = alloc_zeros(R, onGPU, (srb, srpb * srpct)) + tmp4 = alloc_zeros(R, onGPU, (slb * slpb, srpc * srpt)) + tmp6 = alloc_undef(R, onGPU, (slb * slpb, slpc, srpt)) + + tmp1[:, pr_b_ct] = RE # [rb, (rpb, rpct)] + tmp2 = reshape(tmp1, (srb * srpb, srpct)) # [(rb, rpb), rpct] + tmp3 = B2 * tmp2 # [(lb, lpb), rpct] + tmp4[:, pr_c_t] = tmp3 + tmp5 = reshape(tmp4, (slb * slpb, srpc, srpt)) + batched_mul!(tmp6, tmp5, M.con') # [(lb, lpb), lpc, rpt] + tmp7 = reshape(tmp6, (slb, slpb * slpc, srpt)) + tmp8 = tmp7[:, pl_b_c, :] # [lb, lpbc, rpt] + tmp9 = reshape(tmp8, (slb * slpbc, srpt)) * K' # [(lb, lpbc), lpt] + tmp10 = reshape(tmp9, (slb, slpbc * slpt)) + Rtemp = tmp10[:, pl_bc_t] + else + pr_bc_t, pr_b_c, srpbc = + merge_projectors_inter(M.lp, p_rt, p_rb, p_rc, onGPU; order = "23_1") + pl_b_ct, pl_c_t, slpct = + merge_projectors_inter(M.lp, p_lb, p_lc, p_lt, onGPU; order = "1_23") + + tmp1 = alloc_zeros(R, onGPU, (srb, srpbc * srpt)) + tmp4 = alloc_zeros(R, onGPU, (srb, srpb * srpc, slpt)) + tmp6 = alloc_undef(R, onGPU, (srb * srpb, slpc, slpt)) + + tmp1[:, pr_bc_t] = RE # [rb, (rpbc, rpt)] + tmp2 = reshape(tmp1, (srb * srpbc, srpt)) # [(rb, rpbc), rpt] + tmp3 = reshape(tmp2 * K', (srb, srpbc, slpt)) # [rb, rpbc, lpt] + tmp4[:, pr_b_c, :] = tmp3 + tmp5 = reshape(tmp4, (srb * srpb, srpc, slpt)) + batched_mul!(tmp6, tmp5, M.con') # [(rb, rpb), lpc, lpt] + tmp7 = reshape(tmp6, (srb * srpb, slpc * slpt)) # [(rb, rpb), (lpc, lpt)] + tmp8 = tmp7[:, pl_c_t] + tmp9 = B2 * tmp8 # [(lb, lpb), lpct] + tmp10 = reshape(tmp9, (slb, slpb * slpct)) + Rtemp = tmp10[:, pl_b_ct] + end + Rtemp +end + + +function contract_tensors43(M::VirtualTensor{R,4}, B::Tensor{R,3}) where {R<:Real} + p_lb, p_l, p_lt, p_rb, p_r, p_rt = M.projs + + slb, srb = size(B, 1), size(B, 2) + slcb, slc, slct = size(M.lp, p_lb), size(M.lp, p_l), size(M.lp, p_lt) + srcb, src, srct = size(M.lp, p_rb), size(M.lp, p_r), size(M.lp, p_rt) + slcp, srcp = length(M.lp, p_l), length(M.lp, p_r) + + B = reshape(B, (slb, srb, slcb, srcb)) + + pls = sparse(R, M.lp, p_lb, p_l, p_lt, :CPU) + pls = typeof(B) <: CuArray ? CuArray(pls) : Array(pls) + pls = reshape(pls, (slcb, slc, slct * slcp)) + pls = permutedims(pls, (3, 1, 2)) # [(slct, slcp), lcb, lc] + + prs = sparse(R, M.lp, p_rb, p_r, p_rt, :CPU) + prs = typeof(B) <: CuArray ? CuArray(prs) : Array(prs) + prs = reshape(prs, (srcb, src, srct * srcp)) + prs = permutedims(prs, (3, 1, 2)) # [(rct, rcp), rcb, rc] + + if size(M.con, 1) <= size(M.con, 2) + prs = contract_matrix_tensor3(M.con, prs) + else + pls = contract_tensor3_matrix(pls, M.con) + end + @tensor order = (lb, c, rb) MB[l, lt, r, rt] := + pls[lt, lb, c] * prs[rt, rb, c] * B[l, r, lb, rb] + MB = reshape(MB, slb, slct, slcp, srb, srct, srcp) + MB = permutedims(MB, (1, 3, 4, 6, 2, 5)) + reshape(MB, (slb * slcp, srb * srcp, slct * srct)) +end + +function corner_matrix( + C::S, + M::T, + B::S, +) where {S<:Tensor{R,3},T<:VirtualTensor{R,4}} where {R<:Real} + slb, srb = size(B, 1), size(B, 2) + srcc, stc = size(C, 2), size(C, 3) + V = contract_tensors43(M, B) + vl, vr, vt = size(V, 1), size(V, 2), size(V, 3) + V = reshape(V, (vl, srb, stc, vt)) + @tensor Cnew[vl, vt, vrr] := V[vl, srb, stc, vt] * C[srb, vrr, stc] + reshape(Cnew, (slb, :, srcc, vt)) +end diff --git a/src/environment.jl b/src/environment.jl new file mode 100644 index 0000000..b18caf5 --- /dev/null +++ b/src/environment.jl @@ -0,0 +1,293 @@ +export Environment, EnvironmentMixed, left_nbrs_site, right_nbrs_site + +abstract type AbstractEnvironment end + +mutable struct EnvironmentMixed{T<:Real} <: AbstractEnvironment + bra::QMps{T} # mps that is to be optimized + mpo::QMpo{T} + ket::QMps{T} + C::Tensor{T,3} + site::Any # position of C is at: site - epsilon ::Union(Sites, :central) + env::Dict + onGPU::Bool + + function EnvironmentMixed( + bra::QMps{T}, + C::Tensor{T,3}, + mpo::QMpo{T}, + ket::QMps{T}; + ) where {T<:Real} + onGPU = bra.onGPU && mpo.onGPU && ket.onGPU + @assert bra.sites == ket.sites && issubset(bra.sites, mpo.sites) + id3 = onGPU ? CUDA.ones(T, 1, 1, 1) : ones(T, 1, 1, 1) + id2 = onGPU ? CUDA.ones(T, 1, 1) : ones(T, 1, 1) + env0 = Dict{Any,Any}((bra.sites[1], :left) => id2, (bra.sites[end], :right) => id3) + env = new{T}(bra, mpo, ket, C, last(bra.sites) + 1, env0, onGPU) # + update_env_left!.(Ref(env), env.bra.sites) + env + end +end + +function clear_env_containing_site!(env::EnvironmentMixed, site) + if site == :central + delete!(env.env, (env.site, :left)) + delete!(env.env, (left_nbrs_site(env.site, env.ket.sites), :right)) + else + if site == env.site + delete!(env.env, (:central, :right)) + else + delete!(env.env, (left_nbrs_site(site, env.ket.sites), :right)) + end + rs = right_nbrs_site(site, env.ket.sites) + if rs == env.site + delete!(env.env, (:central, :left)) + else + delete!(env.env, (rs, :left)) + end + end +end + +mutable struct Environment{T<:Real} <: AbstractEnvironment + bra::QMps{T} # mps that is to be optimized + mpo::QMpo{T} + ket::QMps{T} + env::Dict + log_norms::Dict + + function Environment(bra::QMps{T}, mpo::QMpo{T}, ket::QMps{T}) where {T<:Real} + onGPU = bra.onGPU && mpo.onGPU && ket.onGPU + @assert bra.sites == ket.sites && issubset(bra.sites, mpo.sites) + id = onGPU ? CUDA.ones(T, 1, 1, 1) : ones(T, 1, 1, 1) + env0 = Dict((bra.sites[1], :left) => id, (bra.sites[end], :right) => id) + ln0 = Dict((bra.sites[1], :left) => zero(T), (bra.sites[end], :right) => zero(T)) + env = new{T}(bra, mpo, ket, env0, ln0) + update_env_left!.(Ref(env), env.bra.sites) + env + end +end + +function clear_env_containing_site!(env::Environment, site::Site) + delete!(env.env, (left_nbrs_site(site, env.ket.sites), :right)) + delete!(env.env, (right_nbrs_site(site, env.ket.sites), :left)) +end + +""" +Largest x in sites: x < site +""" +function left_nbrs_site(site::Site, sites) + ls = filter(i -> i < site, sites) + isempty(ls) && return -Inf + maximum(ls) +end + +""" +Smallest x in sites: x > site +""" +function right_nbrs_site(site::Site, sites) + ms = filter(i -> i > site, sites) + isempty(ms) && return Inf + minimum(ms) +end + +""" + -- A -- + | | + L = LE -- M -- + | | + -- B -- +""" +function update_env_left( + LE::S, + A::S, + M::T, + B::S, +) where {S<:AbstractArray{R,3},T<:MpoTensor{R,4}} where {R<:Real} + for v ∈ M.top + A = contract_tensor3_matrix(A, v) + end + for v ∈ reverse(M.bot) + B = contract_matrix_tensor3(v, B) + end + update_env_left(LE, A, M.ctr, B) +end + +function update_env_left!(env::Environment, site::Site) + site <= first(env.bra.sites) && return + ls = left_nbrs_site(site, env.bra.sites) + LL = update_env_left(env.env[(ls, :left)], env.bra[ls], env.mpo[ls], env.ket[ls]) + rs = right_nbrs_site(ls, env.mpo.sites) + while rs < site + LL = contract_tensor3_matrix(LL, env.mpo[rs]) + rs = right_nbrs_site(rs, env.mpo.sites) + end + nLL = maximum(abs.(LL)) + LL ./= nLL + push!(env.env, (site, :left) => LL) + nLL = env.log_norms[(ls, :left)] + log(nLL) + push!(env.log_norms, (site, :left) => nLL) +end + +function update_env_left!(env::EnvironmentMixed{T}, site) where {T} # site::Union(Sites, :central) + if site == first(env.bra.sites) + if env.site == first(env.bra.sites) + LL = env.onGPU ? CUDA.ones(T, 1, 1, 1) : ones(T, 1, 1, 1) + else + LL = env.onGPU ? CUDA.ones(T, 1, 1) : ones(T, 1, 1) + end + elseif site == :central + if env.site == first(env.bra.sites) + LL = env.onGPU ? CUDA.ones(T, 1, 1) : ones(T, 1, 1) + else + ls = left_nbrs_site(env.site, env.bra.sites) + LL = update_env_left(env.env[(ls, :left)], env.bra[ls], env.ket[ls]) + LL ./= maximum(abs.(LL)) + end + elseif site < env.site + ls = left_nbrs_site(site, env.bra.sites) + LL = update_env_left(env.env[(ls, :left)], env.bra[ls], env.ket[ls]) + LL ./= maximum(abs.(LL)) + elseif site == env.site + ls = left_nbrs_site(site, env.bra.sites) + LL = update_env_left(env.env[(:central, :left)], env.C) + LL ./= maximum(abs.(LL)) + else + ls = left_nbrs_site(site, env.bra.sites) + LL = update_env_left(env.env[(ls, :left)], env.bra[ls], env.mpo[ls], env.ket[ls]) + rs = right_nbrs_site(ls, env.mpo.sites) + while rs < site + LL = contract_tensor3_matrix(LL, env.mpo[rs]) + rs = right_nbrs_site(rs, env.mpo.sites) + end + LL ./= maximum(abs.(LL)) + end + push!(env.env, (site, :left) => LL) +end + + +""" + -- A -- + | | + R = -- M -- RE + | | + -- B -- +""" +function update_env_right( + RE::S, + A::S1, + M::T, + B::S, +) where {T<:MpoTensor{R,4},S<:AbstractArray{R,3},S1<:AbstractArray{R,3}} where {R<:Real} + for v ∈ M.top + A = contract_tensor3_matrix(A, v) + end + for v ∈ reverse(M.bot) + B = contract_matrix_tensor3(v, B) + end + update_env_right(RE, A, M.ctr, B) +end + +function update_env_right!(env::Environment, site::Site) + site >= last(env.bra.sites) && return + rs = right_nbrs_site(site, env.bra.sites) + RR = update_env_right(env.env[(rs, :right)], env.bra[rs], env.mpo[rs], env.ket[rs]) + ls = left_nbrs_site(rs, env.mpo.sites) + while ls > site + RR = contract_matrix_tensor3(env.mpo[ls], RR) + ls = left_nbrs_site(ls, env.mpo.sites) + end + nRR = maximum(abs.(RR)) + RR ./= nRR + push!(env.env, (site, :right) => RR) + nRR = env.log_norms[(rs, :right)] + log(nRR) + push!(env.log_norms, (site, :right) => nRR) +end + +function update_env_right!(env::EnvironmentMixed{T}, site) where {T} # site::Union(Sites, :central) + if site == last(env.bra.sites) + if env.site > last(env.bra.sites) + RR = env.onGPU ? CUDA.ones(T, 1, 1) : ones(T, 1, 1) + else + RR = env.onGPU ? CUDA.ones(T, 1, 1, 1) : ones(T, 1, 1, 1) + end + elseif site == :central + rs = env.site + RR = update_env_right(env.env[(rs, :right)], env.bra[rs], env.mpo[rs], env.ket[rs]) + elseif site >= env.site + rs = right_nbrs_site(site, env.bra.sites) + RR = update_env_right(env.env[(rs, :right)], env.bra[rs], env.mpo[rs], env.ket[rs]) + ls = left_nbrs_site(rs, env.mpo.sites) + while ls > site + RR = contract_matrix_tensor3(env.mpo[ls], RR) + ls = left_nbrs_site(ls, env.mpo.sites) + end + else + rs = right_nbrs_site(site, env.bra.sites) + if rs == env.site + RR = update_env_right(env.env[(:central, :right)], env.C) + else + RR = update_env_right(env.env[(rs, :right)], env.bra[rs], env.ket[rs]) + end + end + RR ./= maximum(abs.(RR)) + push!(env.env, (site, :right) => RR) +end + + +""" + | | | + LE -- M -- RE + | | | + -- B -- +""" +function project_ket_on_bra( + LE::S, + B::S, + M::T, + RE::S, +) where {S<:AbstractArray{R,3},T<:MpoTensor{R,4}} where {R<:Real} + for v ∈ reverse(M.bot) + B = contract_matrix_tensor3(v, B) + end + B = project_ket_on_bra(LE, B, M.ctr, RE) + for v ∈ reverse(M.top) + B = contract_matrix_tensor3(v, B) + end + B +end + + +project_ket_on_bra(env::Environment, site::Site) = project_ket_on_bra( + env.env[(site, :left)], + env.ket[site], + env.mpo[site], + env.env[(site, :right)], +) + +function project_ket_on_bra(env::EnvironmentMixed, site) + if site == :central + B = project_ket_on_bra(env.env[(site, :left)], env.env[(site, :right)]) + elseif site >= env.site + B = project_ket_on_bra( + env.env[(site, :left)], + env.ket[site], + env.mpo[site], + env.env[(site, :right)], + ) + else + B = project_ket_on_bra( + env.env[(site, :left)], + env.ket[site], + env.env[(site, :right)], + ) + end + B +end + +function measure_env(env::Environment, site::Site) + L = update_env_left(env.env[(site, :left)], env.bra[site], env.mpo[site], env.ket[site]) + R = env.env[(site, :right)] + overlap = @tensor L[b, t, c] * R[b, t, c] + negative = overlap < 0 + overlap *= sign(overlap) + (log(overlap) + env.log_norms[(site, :left)] + env.log_norms[(site, :right)], negative) +end diff --git a/src/gauges.jl b/src/gauges.jl new file mode 100644 index 0000000..c55cc78 --- /dev/null +++ b/src/gauges.jl @@ -0,0 +1,155 @@ + +# gauges.jl: This file provides basic functions to optimize gauges for the PEPS network. CUDA is supported. + +export optimize_gauges_for_overlaps!!, overlap_density_matrix + +function update_rq!(ψ::QMps{T}, AT::Array{T,3}, i::Site) where {T<:Real} + @cast ATR[x, (σ, y)] := AT[x, σ, y] + RT, QT = rq_fact(ATR) + RT ./= maximum(abs.(RT)) + @cast AT[x, σ, y] := QT[x, (σ, y)] (σ ∈ 1:size(AT, 2)) + ψ[i] = AT + RT +end + +function update_rq!(ψ::QMps{T}, AT::CuArray{T,3}, i::Site) where {T<:Real} + @cast ATR[x, (σ, y)] := AT[x, σ, y] + RT, QT = rq_fact(ATR) + RT ./= maximum(abs.(RT)) + @cast AT[x, σ, y] := QT[x, (σ, y)] (σ ∈ 1:size(AT, 2)) + ψ[i] = AT + RT +end + +function update_qr!(ψ::QMps{T}, AT::Array{T,3}, i::Site) where {T<:Real} + @cast ATR[(x, σ), y] := AT[x, σ, y] + QT, RT = qr_fact(ATR) + RT ./= maximum(abs.(RT)) + @cast AT[x, σ, y] := QT[(x, σ), y] (σ ∈ 1:size(AT, 2)) + ψ[i] = AT + RT +end + +function update_qr!(ψ::QMps{T}, AT::CuArray{T,3}, i::Site) where {T<:Real} + @cast ATR[(x, σ), y] := AT[x, σ, y] + QT, RT = qr_fact(ATR) + RT ./= maximum(abs.(RT)) + @cast AT[x, σ, y] := QT[(x, σ), y] (σ ∈ 1:size(AT, 2)) + ψ[i] = AT + RT +end + +function _gauges_right_sweep!!!( + ψ_top::QMps{R}, + ψ_bot::QMps{R}, + gauges::Dict; + tol = 1E-12, +) where {R<:Real} + RT = ψ_top.onGPU && ψ_bot.onGPU ? CUDA.ones(R, 1, 1) : ones(R, 1, 1) + RB = copy(RT) + for i ∈ ψ_top.sites + T, B = ψ_top[i], ψ_bot[i] + + @tensor T[a, b, c] := RT[a, s] * T[s, b, c] + @tensor B[a, b, c] := RB[a, s] * B[s, b, c] + @tensor ρ_t[r, s] := T[i, r, j] * conj(T)[i, s, j] + @tensor ρ_b[r, s] := B[i, r, j] * conj(B)[i, s, j] + + dρ_b, dρ_t = diag.((ρ_b, ρ_t)) + K = (dρ_b .< tol) .|| (dρ_t .< tol) + dρ_b[K] .= 1 + dρ_t[K] .= 1 + + X = (dρ_b ./ dρ_t) .^ (1 / 4) # optimize + X_inv = 1 ./ X + gauges[i] .*= X # update + + RT = update_qr!(ψ_top, T .* reshape(X, 1, :, 1), i) + RB = update_qr!(ψ_bot, B .* reshape(X_inv, 1, :, 1), i) + end +end + +function _gauges_left_sweep!!!( + ψ_top::QMps{R}, + ψ_bot::QMps{R}, + gauges::Dict; + tol = 1E-12, +) where {R<:Real} + RT = ψ_top.onGPU && ψ_bot.onGPU ? CUDA.ones(R, 1, 1) : ones(R, 1, 1) + RB = copy(RT) + for i ∈ reverse(ψ_top.sites) + T, B = ψ_top[i], ψ_bot[i] + + @tensor T[a, b, c] := T[a, b, s] * RT[s, c] + @tensor B[a, b, c] := B[a, b, s] * RB[s, c] + @tensor ρ_t[r, s] := T[i, r, j] * conj(T)[i, s, j] + @tensor ρ_b[r, s] := B[i, r, j] * conj(B)[i, s, j] + + dρ_b, dρ_t = diag.((ρ_b, ρ_t)) + K = (dρ_b .< tol) .|| (dρ_t .< tol) + dρ_b[K] .= 1 + dρ_t[K] .= 1 + + X = (dρ_b ./ dρ_t) .^ (1 / 4) # optimize + X_inv = 1 ./ X + gauges[i] .*= X # update + + RT = update_rq!(ψ_top, T .* reshape(X, 1, :, 1), i) + RB = update_rq!(ψ_bot, B .* reshape(X_inv, 1, :, 1), i) + end +end + +function optimize_gauges_for_overlaps!!( + ψ_top::QMps{T}, + ψ_bot::QMps{T}, + tol = 1E-8, + max_sweeps::Int = 4, +) where {T<:Real} + onGPU = ψ_top.onGPU && ψ_bot.onGPU + canonise!(ψ_top, :right) + canonise!(ψ_bot, :right) + overlap_old = dot(ψ_top, ψ_bot) + gauges = Dict(i => (onGPU ? CUDA.ones : ones)(T, size(ψ_top[i], 2)) for i ∈ ψ_top.sites) + #gauges = Dict(i => ones(T, size(ψ_top[i], 2)) for i ∈ ψ_top.sites) + for _ ∈ 1:max_sweeps + _gauges_right_sweep!!!(ψ_top, ψ_bot, gauges) + _gauges_left_sweep!!!(ψ_top, ψ_bot, gauges) + overlap_new = dot(ψ_top, ψ_bot) + Δ = overlap_new / overlap_old + overlap_old = overlap_new + if abs(Δ - one(T)) < tol + break + end + end + gauges +end + +function overlap_density_matrix(ϕ::QMps{T}, ψ::QMps{T}, k::Site) where {T<:Real} + @assert ψ.sites == ϕ.sites + C = _overlap_forward(ϕ, ψ, k) + D = _overlap_backwards(ϕ, ψ, k) + A, B = ψ[k], ϕ[k] + @tensor E[x, y] := C[b, a] * conj(B)[b, x, β] * A[a, y, α] * D[β, α] +end + +function _overlap_forward(ϕ::QMps{T}, ψ::QMps{T}, k::Site) where {T<:Real} + C = ϕ.onGPU && ψ.onGPU ? CUDA.ones(T, 1, 1) : ones(T, 1, 1) + i = ψ.sites[1] + while i < k + A, B = ψ[i], ϕ[i] + @tensor order = (α, β, σ) C[x, y] := conj(B)[β, σ, x] * C[β, α] * A[α, σ, y] + i += 1 + end + C +end + +function _overlap_backwards(ϕ::QMps{T}, ψ::QMps{T}, k::Site) where {T<:Real} + D = ϕ.onGPU && ψ.onGPU ? CUDA.ones(T, 1, 1) : ones(T, 1, 1) + i = ψ.sites[end] + while i > k + A, B = ψ[i], ϕ[i] + @tensor order = (α, β, σ) D[x, y] := conj(B)[x, σ, β] * D[β, α] * A[y, σ, α] + i -= 1 + end + D +end diff --git a/src/identities.jl b/src/identities.jl deleted file mode 100644 index a3077c0..0000000 --- a/src/identities.jl +++ /dev/null @@ -1,45 +0,0 @@ -export IdentityMPO, IdentityMPS -struct IdentityMPS{T<:Number,S<:AbstractArray} <: AbstractMPS{T} end -struct IdentityMPO{T<:Number,S<:AbstractArray} <: AbstractMPO{T} end -IdentityMPS() = IdentityMPS{Float64,Array}() -IdentityMPO() = IdentityMPO{Float64,Array}() - -IdentityMPS(::Type{T}) where {T<:AbstractArray} = IdentityMPS{Float64,T} -IdentityMPO(::Type{T}) where {T<:AbstractArray} = IdentityMPO{Float64,T} - -IdentityMPS(::Type{S}, ::Type{T}) where {S<:Number,T<:AbstractArray} = IdentityMPS{S,T} -IdentityMPO(::Type{S}, ::Type{T}) where {S<:Number,T<:AbstractArray} = IdentityMPO{S,T} - -const IdentityMPSorMPO = Union{IdentityMPO,IdentityMPS} - - -@inline function Base.getindex(::IdentityMPS{S,T}, ::Int) where {S,T} - ret = similar(T{S}, (1, 1, 1)) - ret[1] = one(S) - ret -end - - -@inline function Base.getindex(::IdentityMPO{S,T}, ::Int) where {S,T} - ret = similar(T{S}, (1, 1, 1, 1)) - ret[1] = one(S) - ret -end - - -LinearAlgebra.dot(O::AbstractMPO, ::IdentityMPO) = O -LinearAlgebra.dot(::IdentityMPO, O::AbstractMPO) = O -Base.length(::IdentityMPSorMPO) = Inf - - -LinearAlgebra.dot(O::AbstractMPO, ::IdentityMPS) = - MPS([dropdims(sum(A, dims = 4), dims = 4) for A ∈ O]) - - -LinearAlgebra.dot(::IdentityMPO, ψ::AbstractMPS) = ψ -LinearAlgebra.dot(ψ::AbstractMPS, ::IdentityMPO) = ψ - -function Base.show(io::IO, ::IdentityMPSorMPO) - println(io, "Trivial matrix product state") - println(io, " ") -end diff --git a/src/linear_algebra_ext.jl b/src/linear_algebra_ext.jl index ec7f65c..9055125 100644 --- a/src/linear_algebra_ext.jl +++ b/src/linear_algebra_ext.jl @@ -1,54 +1,59 @@ -export rq_fact, qr_fact +# linear_algebra_ext.jl: This file provides basic functions to perform custom SVD, and QR. +# Both are calculated on CPU, but can be transferd to GPU if need be. -function qr_fact(M::AbstractMatrix, Dcut::Int = typemax(Int), tol::Float64 = 1E-12, args...) - F = qr(M, args...) - q, r = _qr_fix(Array(F.Q), Array(F.R)) - if Dcut > size(q, 2) - return q, r - end - U, Σ, V = svd(r, Dcut, tol) - q * U, Diagonal(Σ) * V' -end +export rq_fact, qr_fact, svd_fact +@inline phase(d::T; atol = eps()) where {T<:Real} = + isapprox(d, zero(T), atol = atol) ? one(T) : d / abs(d) +@inline phase(d::AbstractArray; atol = eps()) = map(x -> phase(x; atol = atol), d) -function rq_fact(M::AbstractMatrix, Dcut::Int = typemax(Int), tol::Float64 = 1E-12, args...) - q, r = qr_fact(M', Dcut, tol, args...) - r', q' +function svd_fact( + A::AbstractMatrix{T}, + Dcut::Int = typemax(Int), + tol = eps(T); + kwargs..., +) where {T<:Real} + U, Σ, V = svd(A; kwargs...) + δ = min(Dcut, sum(Σ .> Σ[1] * max(eps(), tol))) + U, Σ, V = U[:, 1:δ], Σ[1:δ], V[:, 1:δ] + Σ ./= sqrt(sum(Σ .^ 2)) + ϕ = reshape(phase(diag(U); atol = tol), 1, :) + U .* ϕ, Σ, V .* ϕ end -function _qr_fix(Q::T, R::AbstractMatrix) where {T<:AbstractMatrix} - d = diag(R) - for i ∈ eachindex(d) - @inbounds d[i] = ifelse(isapprox(d[i], 0, atol = 1e-14), 1, d[i]) +function qr_fact( + M::AbstractMatrix{T}, + Dcut::Int = typemax(Int), + tol::T = eps(); + toGPU::Bool = true, + kwargs..., +) where {T<:Real} + q, r = qr_fix(qr(Array(M); kwargs...)) + if Dcut >= size(q, 2) + toGPU && return CuArray.((q, r)) + return q, r end - ph = d ./ abs.(d) - Q * Diagonal(ph), Diagonal(ph) * R + U, Σ, V = svd_fact(r, Dcut, tol, kwargs...) + toGPU && return CuArray.((q * U, Σ .* V')) + q * U, Σ .* V' end -function LinearAlgebra.svd( - A::AbstractMatrix, +function rq_fact( + M::AbstractMatrix{T}, Dcut::Int = typemax(Int), - tol::Float64 = 1E-12, - args..., -) - - U, Σ, V = svd(A, args...) - - tol = Σ[1] * max(eps(), tol) - δ = min(Dcut, sum(Σ .> tol)) - - U = U[:, 1:δ] - Σ = Σ[1:δ] - Σ ./ sum(Σ .^ 2) - V = V[:, 1:δ] + tol::T = eps(); + toGPU::Bool = true, + kwargs..., +) where {T<:Real} + q, r = qr_fact(M', Dcut, tol; toGPU = toGPU, kwargs...) + toGPU && return CuArray.((r', q')) + r', q' +end - d = diag(U) - for i ∈ eachindex(d) - @inbounds d[i] = ifelse(isapprox(d[i], 0, atol = 1e-14), 1, d[i]) - end - ph = d ./ abs.(d) - U * Diagonal(ph), Σ, V * Diagonal(ph) +function qr_fix(QR_fact; tol::T = eps()) where {T<:Real} + ϕ = phase(diag(QR_fact.R); atol = tol) + QR_fact.Q * Diagonal(ϕ), ϕ .* QR_fact.R end diff --git a/src/mps/base.jl b/src/mps/base.jl new file mode 100644 index 0000000..fe72822 --- /dev/null +++ b/src/mps/base.jl @@ -0,0 +1,110 @@ +# ./mps/base.jl: This file provides basic definitions of custom Matrix Product States / Operators. + +export Site, Sites, AbstractTensorNetwork, MpoTensor, QMpsOrMpo + +abstract type AbstractTensorNetwork{T} end + +const Site = Union{Int,Rational{Int}} +const Sites = NTuple{N,Site} where {N} +const TensorMap{T} = Dict{Site,Union{Tensor{T,2},Tensor{T,3},Tensor{T,4}}} # 2 and 4 - mpo; 3 - mps + +""" +A mutable struct representing a Matrix Product Operator (MPO) tensor in a tensor network. + +## Fields +- `top::Vector{Tensor{T, 2}}`: Vector of tensors representing the top tensor of the MPO. Empty if `N == 2`. +- `ctr::Union{Tensor{T, N}, Nothing}`: Central tensor of the MPO. `Nothing` if not present. +- `bot::Vector{Tensor{T, 2}}`: Vector of tensors representing the bottom tensor of the MPO. Empty if `N == 2`. +- `dims::Dims{N}`: Dimensions of the MPO tensor. + +## Description +`MpoTensor{T, N}` is a mutable struct that represents a Matrix Product Operator tensor in a tensor network. +The MPO tensor is characterized by its top and bottom tensors, a central tensor (`ctr`), and dimensions (`dims`). +The top and bottom legs are vectors of two-dimensional tensors (`Tensor{T, 2}`). +The central tensor is of type `Tensor{T, N}` or `Nothing` if not present. +The dimensions of the MPO tensor are specified by `dims`. +""" +mutable struct MpoTensor{T<:Real,N} + top::Vector{Tensor{T,2}} # N == 2 top = [] + ctr::Union{Tensor{T,N},Nothing} + bot::Vector{Tensor{T,2}} # N == 2 bot = [] + dims::Dims{N} +end + +# """ +# Constructor function for creating a Matrix Product Operator (MPO) tensor from a `TensorMap`. + +# ## Arguments +# - `ten::TensorMap{T}`: A dictionary mapping `Site` indices to tensors of type `Tensor{T, 2}`, `Tensor{T, 3}`, or `Tensor{T, 4}`. The key `0` represents the central tensor (`ctr`) of the MPO. + +# ## Returns +# - An instance of `MpoTensor{T, nn}` representing the Matrix Product Operator. + +# ## Description +# The `MpoTensor` function constructs a Matrix Product Operator tensor from a `TensorMap`, +# where the keys are `Site` indices and the values are tensors of appropriate dimensions. +# The construction process involves sorting the tensor dictionary based on the site indices +# and separating tensors into the top, central, and bottom parts. +# The central tensor is identified by the key `0`. +# The resulting `MpoTensor` encapsulates the tensors along with their dimensions. + +# ## Exceptions +# - Throws a `DomainError` if the central tensor (`ctr`) has dimensions other than 2 or 4. +# """ +function MpoTensor(ten::TensorMap{T}) where {T} + sk = sort(collect(keys(ten))) + top = [ten[k] for k ∈ sk if k < 0] + bot = [ten[k] for k ∈ sk if k > 0] + ctr = get(ten, 0, nothing) + + if isnothing(ctr) + top_bot = vcat(top, bot) + dims = (0, size(top_bot[1], 1), 0, size(top_bot[end], 2)) + nn = 4 + else + nn = ndims(ctr) + if nn == 2 + @assert isempty(top) && isempty(bot) "Both top and bot should be empty" + dims = size(ctr) + elseif nn == 4 + dims = ( + size(ctr, 1), + isempty(top) ? size(ctr, 2) : size(top[1], 1), + size(ctr, 3), + isempty(bot) ? size(ctr, 4) : size(bot[end], 2), + ) + else + throw(DomainError(ndims(ctr), "MpoTensor should have ndims 2 or 4")) + end + end + MpoTensor{T,nn}(top, ctr, bot, dims) +end + +Base.eltype(ten::MpoTensor{T,N}) where {T,N} = T +Base.ndims(ten::MpoTensor{T,N}) where {T,N} = N +Base.size(ten::MpoTensor, n::Int) = ten.dims[n] +Base.size(ten::MpoTensor) = ten.dims + +const MpoTensorMap{T} = Dict{Site,MpoTensor{T}} + +for (S, M) ∈ ((:QMpo, :MpoTensorMap), (:QMps, :TensorMap)) + @eval begin + export $S, $M + mutable struct $S{F<:Real} <: AbstractTensorNetwork{F} + tensors::$M{F} + sites::Vector{Site} + onGPU::Bool + + function $S(ten::$M{F}; onGPU::Bool = false) where {F} + new{F}(ten, sort(collect(keys(ten))), onGPU) + end + end + end +end + +const QMpsOrMpo{T} = Union{QMpo{T},QMps{T}} + +@inline Base.getindex(ψ::QMpsOrMpo, i) = getindex(ψ.tensors, i) +@inline Base.setindex!(ψ::QMpsOrMpo, A, i::Site) = ψ.tensors[i] = A +@inline Base.eltype(ψ::QMpsOrMpo{T}) where {T} = T +@inline Base.copy(ψ::QMps) = QMps(copy(ψ.tensors), onGPU = ψ.onGPU) diff --git a/src/mps/canonise.jl b/src/mps/canonise.jl new file mode 100644 index 0000000..ab0a381 --- /dev/null +++ b/src/mps/canonise.jl @@ -0,0 +1,98 @@ + +# canonise.jl: This file provides basic function to left / right truncate / canonise MPS. CUDA is supported. + +export canonise!, truncate!, canonise_truncate!, measure_spectrum + + +function measure_spectrum(ψ::QMps{T}) where {T<:Real} + # Assume that ψ is left_canonical + @assert is_left_normalized(ψ) + R = ones(T, 1, 1) + schmidt = Dict() # {Site =>AbstractArray} + for i ∈ reverse(ψ.sites) + B = permutedims(Array(ψ[i]), (1, 3, 2)) # [x, σ, α] + @matmul M[x, σ, y] := sum(α) B[x, σ, α] * R[α, y] + @cast M[x, (σ, y)] := M[x, σ, y] + Dcut, tolS = 100000, 0.0 + U, S, _ = svd_fact(Array(M), Dcut, tolS) + push!(schmidt, i => S) + R = U * Diagonal(S) + end + schmidt +end + + + +function truncate!( + ψ::QMps{T}, + s::Symbol, + Dcut::Int = typemax(Int), + tolS::T = eps(); + kwargs..., +) where {T<:Real} + @assert s ∈ (:left, :right) + if s == :right + _right_sweep!(ψ; kwargs...) + _left_sweep!(ψ, Dcut, tolS; kwargs...) + else + _left_sweep!(ψ, args...) + _right_sweep!(ψ, Dcut, tolS; kwargs...) + end +end + +canonise!(ψ::QMps, s::Symbol) = canonise!(ψ, Val(s)) +canonise!(ψ::QMps, ::Val{:right}) = _left_sweep!(ψ, typemax(Int)) +canonise!(ψ::QMps, ::Val{:left}) = _right_sweep!(ψ, typemax(Int)) + +function canonise_truncate!( + ψ::QMps, + type::Symbol, + Dcut::Int = typemax(Int), + tolS = eps(); + kwargs..., +) + if type == :right + _left_sweep!(ψ, Dcut, tolS; kwargs...) + elseif type == :left + _right_sweep!(ψ, Dcut, tolS; kwargs...) + else + throw(ArgumentError("Wrong canonization type $type")) + end +end + +function _right_sweep!( + ψ::QMps{T}, + Dcut::Int = typemax(Int), + tolS::T = eps(T); + kwargs..., +) where {T<:Real} + R = ψ.onGPU ? CUDA.ones(T, 1, 1) : ones(T, 1, 1) + for i ∈ ψ.sites + A = ψ[i] + @matmul M[x, y, σ] := sum(α) R[x, α] * A[α, y, σ] + M = permutedims(M, (3, 1, 2)) # [σ, x, y] + @cast M[(σ, x), y] := M[σ, x, y] + Q, R = qr_fact(M, Dcut, tolS; toGPU = ψ.onGPU, kwargs...) + R ./= maximum(abs.(R)) + @cast A[σ, x, y] := Q[(σ, x), y] (σ ∈ 1:size(A, 3)) + ψ[i] = permutedims(A, (2, 3, 1)) # [x, y, σ] + end +end + +function _left_sweep!( + ψ::QMps{T}, + Dcut::Int = typemax(Int), + tolS::T = eps(T); + kwargs..., +) where {T<:Real} + R = ψ.onGPU ? CUDA.ones(T, 1, 1) : ones(T, 1, 1) + for i ∈ reverse(ψ.sites) + B = permutedims(ψ[i], (1, 3, 2)) # [x, σ, α] + @matmul M[x, σ, y] := sum(α) B[x, σ, α] * R[α, y] + @cast M[x, (σ, y)] := M[x, σ, y] + R, Q = rq_fact(M, Dcut, tolS; toGPU = ψ.onGPU, kwargs...) + R ./= maximum(abs.(R)) + @cast B[x, σ, y] := Q[x, (σ, y)] (σ ∈ 1:size(B, 2)) + ψ[i] = permutedims(B, (1, 3, 2)) + end +end diff --git a/src/mps/dot.jl b/src/mps/dot.jl new file mode 100644 index 0000000..8cefc22 --- /dev/null +++ b/src/mps/dot.jl @@ -0,0 +1,53 @@ +# ./mps/dot.jl: This file provides basic functionality to compute the dot product between MPS +# Other functions to contract MPS with other tensors are also provided. + +LinearAlgebra.norm(ψ::QMps) = sqrt(abs(dot(ψ, ψ))) + +Base.:(*)(ϕ::QMps, ψ::QMps) = dot(ϕ, ψ) +Base.:(*)(W::QMpo, ψ::QMps) = dot(W, ψ) + +function LinearAlgebra.dot(ψ::QMps{T}, ϕ::QMps{T}) where {T<:Real} + @assert ψ.sites == ϕ.sites + C = ψ.onGPU && ϕ.onGPU ? CUDA.ones(T, 1, 1) : ones(T, 1, 1) + for i ∈ ϕ.sites + A, B = ϕ[i], ψ[i] + @tensor order = (α, β, σ) C[x, y] := conj(B)[β, x, σ] * C[β, α] * A[α, y, σ] + end + tr(C) +end + +function LinearAlgebra.dot(ψ::QMpo{R}, ϕ::QMps{R}) where {R<:Real} + D = TensorMap{R}() + for i ∈ reverse(ϕ.sites) + M, B = ψ[i], ϕ[i] + for v ∈ reverse(M.bot) + B = contract_matrix_tensor3(v, B) + end + B = contract_tensors43(M.ctr, B) + for v ∈ reverse(M.top) + B = contract_matrix_tensor3(v, B) + end + + mps_li = left_nbrs_site(i, ϕ.sites) + mpo_li = left_nbrs_site(i, ψ.sites) + + while mpo_li > mps_li + st = size(B, 3) + sl2 = size(ψ[mpo_li], 2) + @cast B[l1, l2, (r, t)] := B[(l1, l2), r, t] (l2 ∈ 1:sl2) + B = permutedims(B, (1, 3, 2)) + B = contract_matrix_tensor3(ψ[mpo_li], B) + B = permutedims(B, (1, 3, 2)) + @cast B[(l1, l2), r, t] := B[l1, l2, (r, t)] (t ∈ 1:st) + mpo_li = left_nbrs_site(mpo_li, ψ.sites) + end + push!(D, i => B) + end + QMps(D; onGPU = ψ.onGPU && ϕ.onGPU) +end + +contract_tensor3_matrix(B::AbstractArray{T,3}, M::MpoTensor{T,2}) where {T<:Real} = + contract_tensor3_matrix(B, M.ctr) +contract_matrix_tensor3(M::MpoTensor{T,2}, B::AbstractArray{T,3}) where {T<:Real} = + contract_matrix_tensor3(M.ctr, B) +contract_tensors43(B::Nothing, A::AbstractArray{T,3}) where {T<:Real} = A diff --git a/src/mps/identity.jl b/src/mps/identity.jl new file mode 100644 index 0000000..c07c7bc --- /dev/null +++ b/src/mps/identity.jl @@ -0,0 +1,34 @@ +# ./mps/identity.jl: This file provides custom MPS Identity. Note, this approach is easier than +# trying to overload the universal identity operator, I, from LinearAlgebra. + +export local_dims, IdentityQMps + +function IdentityQMps( + ::Type{T}, + loc_dims::Dict, + Dmax::Int = 1; + onGPU = true, +) where {T<:Real} + _zeros = onGPU ? CUDA.zeros : zeros + id = TensorMap{T}(keys(loc_dims) .=> _zeros.(T, Dmax, Dmax, values(loc_dims))) + + site_min, ld_min = minimum(loc_dims) + site_max, ld_max = maximum(loc_dims) + if site_min == site_max + id[site_min] = _zeros(T, 1, 1, ld_min) + else + id[site_min] = _zeros(T, 1, Dmax, ld_min) + id[site_max] = _zeros(T, Dmax, 1, ld_max) + end + + for (site, ld) ∈ loc_dims + id[site][1, 1, :] .= 1 / sqrt(ld) + end + QMps(id; onGPU = onGPU) +end + +function local_dims(mpo::QMpo, dir::Symbol) + @assert dir ∈ (:down, :up) + dim = dir == :down ? 4 : 2 + Dict{Site,Int}(k => size(mpo[k], dim) for k ∈ mpo.sites if ndims(mpo[k]) == 4) +end diff --git a/src/mps/rand.jl b/src/mps/rand.jl new file mode 100644 index 0000000..b96ab9d --- /dev/null +++ b/src/mps/rand.jl @@ -0,0 +1,45 @@ + +# ./mps/rand.jl: This file provides methods to generate random MPS / MPO + +function Base.rand( + ::Type{QMps{T}}, + loc_dims::Dict, + Dmax::Int = 1; + onGPU = false, +) where {T<:Real} + id = TensorMap{T}(keys(loc_dims) .=> rand.(T, Dmax, Dmax, values(loc_dims))) + site_min, ld_min = minimum(loc_dims) + site_max, ld_max = maximum(loc_dims) + if site_min == site_max + id[site_min] = rand(T, 1, 1, ld_min) + else + id[site_min] = rand(T, 1, Dmax, ld_min) + id[site_max] = rand(T, Dmax, 1, ld_max) + end + onGPU ? move_to_CUDA!(QMps(id)) : QMps(id) +end + +function Base.rand(::Type{CentralTensor{T}}, s::Vector{Int}) where {T<:Real} + CentralTensor( + Real.(rand(s[1], s[5])), + Real.(rand(s[2], s[6])), + Real.(rand(s[3], s[7])), + Real.(rand(s[4], s[8])), + ) +end + +function Base.rand( + ::Type{SiteTensor{T}}, + lp::PoolOfProjectors, + l::Int, + D::NTuple, +) where {T<:Real} + loc_exp = rand(l) + projs = D + + SiteTensor(lp, loc_exp, projs) +end + +function Base.rand(::Type{QMpo{T}}, loc_dims::Dict; onGPU::Bool = false) where {T<:Real} + QMpo(MpoTensorMap{T}(loc_dims)) +end diff --git a/src/mps/transpose.jl b/src/mps/transpose.jl new file mode 100644 index 0000000..dbb1ab1 --- /dev/null +++ b/src/mps/transpose.jl @@ -0,0 +1,21 @@ + +# ./mps/transpose.jl: This file defines what it means to transpse MPO. Note, this should not be +# done by overloading Base.transpose for QMpo to avoid overloading (Array)'. + +function Base.transpose(ψ::QMpo{T}) where {T<:Real} + QMpo( + MpoTensorMap{T}(keys(ψ.tensors) .=> mpo_transpose.(values(ψ.tensors))); + onGPU = ψ.onGPU, + ) +end + +mpo_transpose(M::MpoTensor{T,2}) where {T<:Real} = M + +function mpo_transpose(M::MpoTensor{T,4}) where {T<:Real} + MpoTensor{T,4}( + mpo_transpose.(reverse(M.bot)), + mpo_transpose(M.ctr), + mpo_transpose.(reverse(M.top)), + M.dims[[1, 4, 3, 2]], + ) +end diff --git a/src/mps/utils.jl b/src/mps/utils.jl new file mode 100644 index 0000000..5ab360d --- /dev/null +++ b/src/mps/utils.jl @@ -0,0 +1,67 @@ +# ./mps/aux.jl: This file provides auxiliary functions to verify various MPS properties. + +export bond_dimension, + bond_dimensions, is_consistent, is_left_normalized, is_right_normalized, length, size + +@inline bond_dimension(ψ::QMpsOrMpo) = maximum(size.(values(ψ.tensors), 1)) +@inline bond_dimensions(ψ::QMpsOrMpo) = [size(ψ.tensors[n]) for n ∈ ψ.sites] +@inline Base.length(ψ::QMpsOrMpo) = maximum(ψ.sites) +@inline Base.size(ψ::QMpsOrMpo) = (maximum(ψ.sites),) + +function is_consistent(ψ::QMps) + site_min = minimum(ψ.sites) + site_max = maximum(ψ.sites) + @assert size(ψ.tensors[site_min], 1) == 1 "Incorrect size on the left boundary." + @assert size(ψ.tensors[site_max], 2) == 1 "Incorrect size on the right boundary." + for (s1, s2) ∈ zip(ψ.sites[begin:end-1], ψ.sites[begin+1:end]) + @assert size(ψ.tensors[s1], 2) == size(ψ.tensors[s2], 1) "Incorrect link between $i and $(i+1)." + end + dev = which_device(ψ) + if ψ.onGPU + @assert :GPU ∈ dev && :CPU ∉ dev + end + if !ψ.onGPU + @assert :GPU ∉ dev && :CPU ∈ dev + end + true +end + +function eye(::Type{T}, dim; toGPU::Bool = false) where {T} + v = ones(T, dim) + toGPU && return cu(spdiagm(v)) + Diagonal(v) +end + +function is_left_normalized(ψ::QMps, ::Val{false}) + all( + eye(eltype(ψ), size(A, 2); toGPU = false) ≈ + @tensor(Id[x, y] := A[α, x, σ] * A[α, y, σ]; order = (α, σ)) for + A ∈ values(ψ.tensors) # TODO: split the line + ) +end + +function is_left_normalized(ψ::QMps, ::Val{true}) + all( + eye(eltype(ψ), size(A, 2); toGPU = true) ≈ + @cutensor(Id[x, y] := A[α, x, σ] * A[α, y, σ]) for A ∈ values(ψ.tensors) # TODO: split the line + ) +end + +is_left_normalized(ψ::QMps) = is_left_normalized(ψ, Val(ψ.onGPU)) + +function is_right_normalized(ψ::QMps, ::Val{false}) + all( + eye(eltype(ψ), size(B, 1); toGPU = false) ≈ + @tensor(Id[x, y] := B[x, α, σ] * B[y, α, σ]; order = (α, σ)) for + B ∈ values(ψ.tensors) # TODO: split the line + ) +end + +function is_right_normalized(ψ::QMps, ::Val{true}) + all( + eye(eltype(ψ), size(B, 1); toGPU = true) ≈ + @cutensor(Id[x, y] := B[x, α, σ] * B[y, α, σ]) for B ∈ values(ψ.tensors) # TODO: split the line + ) +end + +is_right_normalized(ψ::QMps) = is_right_normalized(ψ, Val(ψ.onGPU)) diff --git a/src/projectors.jl b/src/projectors.jl new file mode 100644 index 0000000..fa21437 --- /dev/null +++ b/src/projectors.jl @@ -0,0 +1,140 @@ +export PoolOfProjectors, get_projector!, add_projector!, empty! + +const Proj{T} = Union{Vector{T},CuArray{T,1}} + +""" +$(TYPEDSIGNATURES) + +`PoolOfProjectors` is a data structure for managing projectors associated with Ising model sites. +It allows efficient storage and retrieval of projectors based on their indices and provides support for different computational devices. + +# Fields: +- `data::Dict{Symbol, Dict{Int, Proj{T}}}`: A dictionary that stores projectors associated with different +computational devices (`:CPU`, `:GPU`, etc.). The inner dictionary maps site indices to projectors. +- `default_device::Symbol`: A symbol representing the default computational device for projectors in the pool. +- `sizes::Dict{Int, Int}`: A dictionary that maps site indices to the maximum projector size for each site. + +# Constructors: +- `PoolOfProjectors(data::Dict{Int, Dict{Int, Vector{T}}}) where T`: Create a `PoolOfProjectors` with initial data for projectors. +The data is provided as a dictionary that maps site indices to projectors stored in different computational devices. +The `sizes` dictionary is automatically populated based on the maximum projector size for each site. +- `PoolOfProjectors{T}() where T`: Create an empty `PoolOfProjectors` with no projectors initially stored. +""" +struct PoolOfProjectors{T<:Integer} + data::Dict{Symbol,Dict{Int,Proj{T}}} + default_device::Symbol + sizes::Dict{Int,Int} + + PoolOfProjectors(data::Dict{Int,Dict{Int,Vector{T}}}) where {T} = + new{T}(Dict(:CPU => data), :CPU, Dict{Int,Int}(k => maximum(v) for (k, v) ∈ data)) + PoolOfProjectors{T}() where {T} = + new{T}(Dict(:CPU => Dict{Int,Proj{T}}()), :CPU, Dict{Int,Int}()) +end + + +Base.eltype(lp::PoolOfProjectors{T}) where {T} = T +Base.length(lp::PoolOfProjectors) = length(lp.data[lp.default_device]) +Base.length(lp::PoolOfProjectors, device::Symbol) = length(lp.data[device]) + +""" +$(TYPEDSIGNATURES) + +Empty the pool of projectors associated with a specific computational device. + +This function removes all projectors stored on the specified computational device, freeing up memory resources. + +# Arguments: +- `lp::PoolOfProjectors`: The `PoolOfProjectors` object containing projectors. +- `device::Symbol`: The computational device for which projectors should be emptied (e.g., `:CPU`, `:GPU`). +""" +function Base.empty!(lp::PoolOfProjectors, device::Symbol) + if device ∈ keys(lp.data) + empty!(lp.data[device]) + end +end + +Base.length(lp::PoolOfProjectors, index::Int) = length(lp.data[lp.default_device][index]) +Base.size(lp::PoolOfProjectors, index::Int) = lp.sizes[index] + +get_projector!(lp::PoolOfProjectors, index::Int) = + get_projector!(lp, index, lp.default_device) + +""" +$(TYPEDSIGNATURES) + +TODO This is version for only one GPU + +Retrieve or create a projector from the `PoolOfProjectors` associated with a specific device. + +This function retrieves a projector from the `PoolOfProjectors` if it already exists. +If the projector does not exist in the pool, it creates a new one and stores it for future use on the specified computational device. + +# Arguments: +- `lp::PoolOfProjectors{T}`: The `PoolOfProjectors` object containing projectors. +- `index::Int`: The index of the projector to retrieve or create. +- `device::Symbol`: The computational device on which the projector should be stored or retrieved (e.g., `:CPU`, `:GPU`). + +# Returns: +- `Proj{T}`: The projector of type `T` associated with the specified index and device. +""" +function get_projector!( + lp::PoolOfProjectors{T}, + index::Int, + device::Symbol, +) where {T<:Integer} + if device ∉ keys(lp.data) + push!(lp.data, device => Dict{Int,Proj{T}}()) + end + + if index ∉ keys(lp.data[device]) + if device == :GPU + p = CuArray{T}(lp.data[lp.default_device][index]) + elseif device == :CPU + p = Array{T}(lp.data[lp.default_device][index]) + else + throw(ArgumentError("device should be :CPU or :GPU")) + end + push!(lp.data[device], index => p) + end + lp.data[device][index] +end + +""" +$(TYPEDSIGNATURES) + +Add a projector to the `PoolOfProjectors` and associate it with an index. + +This function adds a projector `p` to the `PoolOfProjectors`. +The `PoolOfProjectors` stores projectors based on their computational device (e.g., CPU or GPU) and assigns a unique index to each projector. +The index can be used to retrieve the projector later using `get_projector!`. + +# Arguments: +- `lp::PoolOfProjectors{T}`: The `PoolOfProjectors` object to which the projector should be added. +- `p::Proj`: The projector to be added to the pool. The type of the projector `Proj` should match the type `T` specified in the `PoolOfProjectors`. + +# Returns: +- `Int`: The unique index associated with the added projector in the pool. +""" +function add_projector!(lp::PoolOfProjectors{T}, p::Proj) where {T<:Integer} + if lp.default_device == :CPU + p = Array{T}(p) + elseif lp.default_device == :GPU + p = CuArray{T}(p) + else + throw(ArgumentError("default_device should be :CPU or :GPU")) + end + if p in values(lp.data[lp.default_device]) + key = -1 + for guess in keys(lp.data[lp.default_device]) + if lp.data[lp.default_device][guess] == p + key = guess + break + end + end + else + key = length(lp.data[lp.default_device]) + 1 + push!(lp.data[lp.default_device], key => p) + push!(lp.sizes, key => maximum(p)) + end + key +end diff --git a/src/transfer.jl b/src/transfer.jl new file mode 100644 index 0000000..994266e --- /dev/null +++ b/src/transfer.jl @@ -0,0 +1,84 @@ + +# transfer.jl: This file provides rules of how to transfer tensors to GPU. Note, NOT all of +# tensor's coponents are moved from CPU to GPU and most tensors are generated +# on CPU due to the size of clustered Hamiltonian. +export which_device, move_to_CUDA!, move_to_CPU! + +move_to_CUDA!(ten::Array{T,N}) where {T,N} = CuArray(ten) #cu(ten, unified=true) + + +move_to_CUDA!(ten::Union{CuArray{T,N},Nothing}) where {T,N} = ten +move_to_CUDA!(ten::Diagonal) = Diagonal(move_to_CUDA!(diag(ten))) + +function move_to_CUDA!(ten::CentralTensor) + ten.e11 = move_to_CUDA!(ten.e11) + ten.e12 = move_to_CUDA!(ten.e12) + ten.e21 = move_to_CUDA!(ten.e21) + ten.e22 = move_to_CUDA!(ten.e22) + ten +end + +function move_to_CUDA!(ten::DiagonalTensor) + ten.e1 = move_to_CUDA!(ten.e1) + ten.e2 = move_to_CUDA!(ten.e2) + ten +end + +function move_to_CUDA!(ten::VirtualTensor) + ten.con = move_to_CUDA!(ten.con) + # ten.projs = move_to_CUDA!.(ten.projs) # TODO 1) is this necessary ? + ten +end + +function move_to_CUDA!(ten::SiteTensor) + ten.loc_exp = move_to_CUDA!(ten.loc_exp) + # ten.projs = move_to_CUDA!.(ten.projs) # TODO 2) is this necessary ? + ten +end + +function move_to_CUDA!(ten::MpoTensor) + for i ∈ 1:length(ten.top) + ten.top[i] = move_to_CUDA!(ten.top[i]) + end + for i ∈ 1:length(ten.bot) + ten.bot[i] = move_to_CUDA!(ten.bot[i]) + end + ten.ctr = move_to_CUDA!(ten.ctr) + ten +end + +function move_to_CUDA!(ψ::Union{QMpo{T},QMps{T}}) where {T} + for k ∈ keys(ψ.tensors) + ψ[k] = move_to_CUDA!(ψ[k]) + end + ψ.onGPU = true + ψ +end + +move_to_CPU!(ten::CuArray{T,N}) where {T,N} = Array(ten) +move_to_CPU!(ten::Union{Array{T,N},Nothing}) where {T,N} = ten +move_to_CPU!(ten::Diagonal) = Diagonal(move_to_CPU!(diag(ten))) + +function move_to_CPU!(ψ::QMps{T}) where {T} + for k ∈ keys(ψ.tensors) + ψ[k] = move_to_CPU!(ψ[k]) + end + ψ.onGPU = false + ψ +end + + + +which_device(::Nothing) = Set() +which_device(ψ::Union{QMpo{T},QMps{T}}) where {T} = + union(which_device.(values(ψ.tensors))...) +which_device(ten::MpoTensor) = + union(which_device(ten.ctr), which_device.(ten.top)..., which_device.(ten.bot)...) +which_device(ten::DiagonalTensor) = union(which_device.((ten.e1, ten.e2))...) +which_device(ten::VirtualTensor) = union(which_device.((ten.con,))...) # TODO cf. 1) ten.projs +which_device(ten::CentralTensor) = + union(which_device.((ten.e11, ten.e12, ten.e21, ten.e22))...) +which_device(ten::SiteTensor) = union(which_device.((ten.loc_exp,))...) # TODO cf. 2) ten.projs +which_device(ten::Array{T,N}) where {T,N} = Set((:CPU,)) +which_device(ten::CuArray{T,N}) where {T,N} = Set((:GPU,)) +which_device(ten::Diagonal) = which_device(diag(ten)) diff --git a/src/utils/memory.jl b/src/utils/memory.jl new file mode 100644 index 0000000..06f33c5 --- /dev/null +++ b/src/utils/memory.jl @@ -0,0 +1,47 @@ +export measure_memory, format_bytes + +measure_memory(ten::AbstractArray) = [Base.summarysize(ten), 0] # [CPU_memory, GPU_memory] +measure_memory(ten::CuArray) = [0, prod(size(ten)) * sizeof(eltype(ten))] +measure_memory(ten::SparseMatrixCSC) = + sum(measure_memory.([ten.colptr, ten.rowval, ten.nzval])) +measure_memory(ten::CuSparseMatrixCSC) = + sum(measure_memory.([ten.colPtr, ten.rowVal, ten.nzVal])) +measure_memory(ten::CuSparseMatrixCSR) = + sum(measure_memory.([ten.rowPtr, ten.colVal, ten.nzVal])) +measure_memory(ten::Diagonal) = measure_memory(diag(ten)) +measure_memory(ten::SiteTensor) = sum(measure_memory.([ten.loc_exp])) # ten.projs...])) +measure_memory(ten::CentralTensor) = + sum(measure_memory.([ten.e11, ten.e12, ten.e21, ten.e22])) +measure_memory(ten::DiagonalTensor) = sum(measure_memory.([ten.e1, ten.e2])) +measure_memory(ten::VirtualTensor) = sum(measure_memory.([ten.con])) # ten.projs...])) +measure_memory(ten::MpoTensor) = sum(measure_memory.([ten.top..., ten.ctr, ten.bot...])) +measure_memory(ten::Union{QMps,QMpo}) = sum(measure_memory.(values(ten.tensors))) +measure_memory(env::Environment) = sum(measure_memory.(values(env.env))) +measure_memory(env::EnvironmentMixed) = sum(measure_memory.(values(env.env))) +measure_memory(lp::PoolOfProjectors) = sum([measure_memory(da) for da ∈ values(lp.data)]) +measure_memory(dict::Dict) = isempty(dict) ? [0, 0] : sum(measure_memory.(values(dict))) +measure_memory(tuple::Tuple) = sum(measure_memory.(tuple)) +measure_memory(ten::Int) = [sizeof(ten), 0] +measure_memory(::Nothing) = [0, 0] + + +function format_bytes(bytes, decimals::Int = 2, k::Int = 1024) + bytes == 0 && return "0 Bytes" + dm = decimals < 0 ? 0 : decimals + sizes = ["Bytes", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"] + i = convert(Int, floor(log(bytes) / log(k))) + string(round((bytes / ^(k, i)), digits = dm)) * " " * sizes[i+1] +end + +function measure_memory(caches::IdDict{Any,Any}, bytes::Bool = true) + memoization_memory = bytes ? Dict{Any,Vector{String}}() : Dict{Any,Vector{Int64}}() + for key in keys(caches) + push!( + memoization_memory, + key => + bytes ? format_bytes.(measure_memory(caches[key])) : + measure_memory(caches[key]), + ) + end + memoization_memory +end diff --git a/src/utils/utils.jl b/src/utils/utils.jl new file mode 100644 index 0000000..f498204 --- /dev/null +++ b/src/utils/utils.jl @@ -0,0 +1,108 @@ +export rank_reveal, unique_dims + +import Base.Prehashed +""" +$(TYPEDSIGNATURES) + +Reveal ranks and energies in a specified order. + +This function calculates and reveals the ranks and energies of a set of states in either the +'PE' (Projector Energy) or 'EP' (Energy Projector) order. + +# Arguments: +- `energy`: The energy values of states. +- `order::Symbol`: The order in which to reveal the ranks and energies. +It can be either `:PE` for 'Projector Energy)' order (default) or `:EP` for 'Energy Projector' order. + +# Returns: +- If `order` is `:PE`, the function returns a tuple `(P, E)` where: + - `P`: A permutation matrix representing projectors. + - `E`: An array of energy values. +- If `order` is `:EP`, the function returns a tuple `(E, P)` where: + - `E`: An array of energy values. + - `P`: A permutation matrix representing projectors. +""" +function rank_reveal(energy, order = :PE) #TODO: add type + @assert order ∈ (:PE, :EP) + dim = order == :PE ? 1 : 2 + E, idx = unique_dims(energy, dim) + P = identity.(idx) + order == :PE ? (P, E) : (E, P) +end + +@generated function unique_dims(A::AbstractArray{T,N}, dim::Integer) where {T,N} + quote + 1 <= dim <= $N || return copy(A) + hashes = zeros(UInt, axes(A, dim)) + + # Compute hash for each row + k = 0 + @nloops $N i A d -> ( + if d == dim + k = i_d + end + ) begin + @inbounds hashes[k] = hash(hashes[k], hash((@nref $N A i))) + end + + # Collect index of first row for each hash + uniquerow = similar(Array{Int}, axes(A, dim)) + firstrow = Dict{Prehashed,Int}() + for k in axes(A, dim) + uniquerow[k] = get!(firstrow, Prehashed(hashes[k]), k) + end + uniquerows = collect(values(firstrow)) + + # Check for collisions + collided = falses(axes(A, dim)) + @inbounds begin + @nloops $N i A d -> ( + if d == dim + k = i_d + j_d = uniquerow[k] + else + j_d = i_d + end + ) begin + if (@nref $N A j) != (@nref $N A i) + collided[k] = true + end + end + end + + if any(collided) + nowcollided = similar(BitArray, axes(A, dim)) + while any(collided) + # Collect index of first row for each collided hash + empty!(firstrow) + for j in axes(A, dim) + collided[j] || continue + uniquerow[j] = get!(firstrow, Prehashed(hashes[j]), j) + end + for v ∈ values(firstrow) + push!(uniquerows, v) + end + + # Check for collisions + fill!(nowcollided, false) + @nloops $N i A d -> begin + if d == dim + k = i_d + j_d = uniquerow[k] + (!collided[k] || j_d == k) && continue + else + j_d = i_d + end + end begin + if (@nref $N A j) != (@nref $N A i) + nowcollided[k] = true + end + end + (collided, nowcollided) = (nowcollided, collided) + end + end + + (@nref $N A d -> d == dim ? sort!(uniquerows) : (axes(A, d))), + indexin(uniquerow, uniquerows) + end +end diff --git a/src/variational.jl b/src/variational.jl new file mode 100644 index 0000000..9af479f --- /dev/null +++ b/src/variational.jl @@ -0,0 +1,97 @@ + + +# variational.jl: This file provides basic functions to perform variational compression for MPS. +# If the MPS is moved to the GPU, its compression will be performed on the device. + +export variational_compress!, variational_sweep! + +function variational_compress!( + bra::QMps{T}, + mpo::QMpo{T}, + ket::QMps{T}, + tol = 1E-10, + max_sweeps::Int = 4, + kwargs..., +) where {T<:Real} + @assert is_left_normalized(bra) + env = Environment(bra, mpo, ket) + overlap = Inf + overlap_0, negative = measure_env(env, last(env.bra.sites)) + if negative + env.bra[last(env.bra.sites)] .*= -1 + end + + for sweep ∈ 1:max_sweeps + _left_sweep_var!(env; kwargs...) + _right_sweep_var!(env; kwargs...) + overlap, negative = measure_env(env, last(env.bra.sites)) + if negative + env.bra[last(env.bra.sites)] .*= -1 + end + Δ = abs(overlap_0 - overlap) + @info "Convergence" Δ + if Δ < tol + return overlap, env + else + overlap_0 = overlap + end + end + overlap, env +end + +function _left_sweep_var!(env::Environment; kwargs...) + for site ∈ reverse(env.bra.sites) + _left_sweep_var_site!(env, site; kwargs...) + end +end + +function _left_sweep_var_site!(env::Environment, site::Site; kwargs...) + toGPU = env.ket.onGPU && env.mpo.onGPU && env.bra.onGPU + update_env_right!(env, site) + A = project_ket_on_bra(env, site) + @cast B[l, (r, t)] := A[l, r, t] + _, Q = rq_fact(B; toGPU = toGPU, kwargs...) + @cast C[l, r, t] := Q[l, (r, t)] (t ∈ 1:size(A, 3)) + env.bra[site] = C + clear_env_containing_site!(env, site) +end + +function _right_sweep_var!(env::Environment; kwargs...) + for site ∈ env.bra.sites + _right_sweep_var_site!(env, site; kwargs...) + end +end + +function _right_sweep_var_site!(env::Environment, site::Site; kwargs...) + toGPU = env.ket.onGPU && env.mpo.onGPU && env.bra.onGPU + update_env_left!(env, site) + A = project_ket_on_bra(env, site) + B = permutedims(A, (1, 3, 2)) # [l, t, r] + @cast B[(l, t), r] := B[l, t, r] + Q, _ = qr_fact(B; toGPU = toGPU, kwargs...) + @cast C[l, t, r] := Q[(l, t), r] (t ∈ 1:size(A, 3)) + C = permutedims(C, (1, 3, 2)) # [l, r, t] + env.bra[site] = C + clear_env_containing_site!(env, site) +end + +# TODO those 2 functions are to be removed eventually +function variational_sweep!( + bra::QMps{T}, + mpo::QMpo{T}, + ket::QMps{T}, + ::Val{:left}; + kwargs..., +) where {T<:Real} + _right_sweep_var!(Environment(bra, mpo, ket); kwargs...) +end + +function variational_sweep!( + bra::QMps{T}, + mpo::QMpo{T}, + ket::QMps{T}, + ::Val{:right}; + kwargs..., +) where {T<:Real} + _left_sweep_var!(Environment(bra, mpo, ket); kwargs...) +end diff --git a/src/zipper.jl b/src/zipper.jl new file mode 100644 index 0000000..658092d --- /dev/null +++ b/src/zipper.jl @@ -0,0 +1,278 @@ +export zipper, corner_matrix, CornerTensor + +struct CornerTensor{T<:Real} + C::Tensor{T,3} + M::MpoTensor{T,4} + B::Tensor{T,3} + + function CornerTensor(C, M, B) + T = promote_type(eltype.((C, M, B))...) + new{T}(C, M, B) + end +end + +struct Adjoint{T,S<:CornerTensor} + parent::S + + function Adjoint{T}(ten::CornerTensor{S}) where {T,S} + F = promote_type(T, S) + new{F,CornerTensor{F}}(ten) + end +end + +function zipper( + ψ::QMpo{R}, + ϕ::QMps{R}; + method::Symbol = :svd, + Dcut::Int = typemax(Int), + tol = eps(), + iters_rand = 3, + iters_svd = 1, + iters_var = 1, + Dtemp_multiplier = 2, + depth::Int = 0, + kwargs..., +) where {R<:Real} + onGPU = ψ.onGPU && ϕ.onGPU + @assert is_left_normalized(ϕ) + + C = onGPU ? CUDA.ones(R, 1, 1, 1) : ones(R, 1, 1, 1) + mpo_li = last(ψ.sites) + + d = (depth == 0) ? mpo_li : depth + + Dtemp = Dtemp_multiplier * Dcut + out = copy(ϕ) + env = EnvironmentMixed(out, C, ψ, ϕ) + + for i ∈ reverse(ϕ.sites) + while mpo_li > i + C = contract_matrix_tensor3(ψ[mpo_li], C) + mpo_li = left_nbrs_site(mpo_li, ψ.sites) + end + @assert mpo_li == i "Mismatch between QMpo and QMps sites." + mpo_li = left_nbrs_site(mpo_li, ψ.sites) + + if i > ϕ.sites[1] + CM = CornerTensor(C, ψ[i], out[i]) + + Urs, Srs, Vrs = [], [], [] + for i = 1:iters_rand + Utemp, Stemp, Vtemp = + svd_corner_matrix(CM, method, Dtemp, tol; toGPU = false, kwargs...) + push!(Urs, Utemp) + push!(Srs, Stemp) + push!(Vrs, Vtemp) + end + + Ur = hcat(Urs...) + Vr = hcat(Vrs...) + Sr = vcat(Srs...) ./ iters_rand + QU, RU = qr_fact(Ur, Dtemp * iters_rand, 0.0; toGPU = false, kwargs...) + QV, RV = qr_fact(Vr, Dtemp * iters_rand, 0.0; toGPU = false, kwargs...) + Ur, Sr, Vr = svd_fact(RU * Diagonal(Sr) * RV', Dtemp, tol; kwargs...) + # Ur = QU * Ur + Vr = QV * Vr + + if onGPU + Vr = CuArray(Vr) + end + + for _ = 1:iters_svd + # CM * Vr + x = reshape(Vr, size(CM.C, 2), size(CM.M, 2), :) + x = permutedims(x, (3, 1, 2)) + x = update_env_right(CM.C, x, CM.M, CM.B) + CCC = reshape(permutedims(x, (1, 3, 2)), size(CM.B, 1) * size(CM.M, 1), :) + + Ut, _ = qr_fact(CCC, Dtemp, 0.0; toGPU = ψ.onGPU, kwargs...) + + # CM' * Ut + x = reshape(Ut, size(CM.B, 1), size(CM.M, 1), :) + x = permutedims(x, (1, 3, 2)) + yp = project_ket_on_bra(x, CM.B, CM.M, CM.C) + CCC = reshape(permutedims(yp, (2, 3, 1)), size(CM.C, 2) * size(CM.M, 2), :) + + Vr, _ = qr_fact(CCC, Dtemp, 0.0; toGPU = ψ.onGPU, kwargs...) + end + + # CM * Vr + x = reshape(Vr, size(CM.C, 2), size(CM.M, 2), :) + x = permutedims(x, (3, 1, 2)) + x = update_env_right(CM.C, x, CM.M, CM.B) + CCC = reshape(permutedims(x, (1, 3, 2)), size(CM.B, 1) * size(CM.M, 1), :) + CCC ./= norm(CCC) + + V, CCC = qr_fact(CCC', Dcut, tol; toGPU = ψ.onGPU, kwargs...) + V = V' * Vr' + s1, s2 = size(ψ[i]) + @cast CCC[z, x, y] := CCC[z, (x, y)] (y ∈ 1:s1) + C = permutedims(CCC, (2, 1, 3)) + @cast V[x, y, z] := V[x, (y, z)] (z ∈ 1:s2) + out[i] = V + else + L = onGPU ? CUDA.ones(R, 1, 1, 1) : ones(R, 1, 1, 1) + V = project_ket_on_bra(L, out[i], ψ[i], C) + V ./= norm(V) + out[i] = V + C = onGPU ? CUDA.ones(R, 1, 1, 1) : ones(R, 1, 1, 1) + end + + for _ = 1:iters_var + env.site = i + update_env_right!(env, i) + env.C = C + update_env_left!(env, :central) + _left_sweep_var_site!(env, :central; kwargs...) + for k in reverse(ϕ.sites) + if (i - d) <= k < i + _left_sweep_var_site!(env, k; kwargs...) + end + end + for k in ϕ.sites + if (i - d) <= k < i + _right_sweep_var_site!(env, k; kwargs...) + end + end + _right_sweep_var_site!(env, :central; kwargs...) + + for k in ϕ.sites + if (i + d) >= k >= i + _right_sweep_var_site!(env, k; kwargs...) + end + end + for k in reverse(ϕ.sites) + if (i + d) >= k >= i + _left_sweep_var_site!(env, k; kwargs...) + end + end + update_env_right!(env, :central) + C = project_ket_on_bra(env, :central) + end + end + out +end + +function _left_sweep_var_site!(env::EnvironmentMixed, site; kwargs...) # site: Union(Sites, :central) + update_env_right!(env, site) + A = project_ket_on_bra(env, site) + @cast B[l, (r, t)] := A[l, r, t] + _, Q = rq_fact(B; toGPU = env.onGPU, kwargs...) + @cast C[l, r, t] := Q[l, (r, t)] (t ∈ 1:size(A, 3)) + if site == :central + env.C = C + else + env.bra[site] = C + end + clear_env_containing_site!(env, site) +end + +function _right_sweep_var_site!(env::EnvironmentMixed, site; kwargs...) + update_env_left!(env, site) + A = project_ket_on_bra(env, site) + B = permutedims(A, (1, 3, 2)) # [l, t, r] + @cast B[(l, t), r] := B[l, t, r] + Q, _ = qr_fact(B; toGPU = env.onGPU, kwargs...) + @cast C[l, t, r] := Q[(l, t), r] (t ∈ 1:size(A, 3)) + C = permutedims(C, (1, 3, 2)) # [l, r, t] + if site == :central + env.C = C + else + env.bra[site] = C + end + clear_env_containing_site!(env, site) +end + +function Base.Array(CM::CornerTensor) # change name, or be happy with "psvd(Array(Array(CM))" + B, M, C = CM.B, CM.M, CM.C + for v ∈ reverse(M.bot) + B = contract_matrix_tensor3(v, B) + end + Cnew = corner_matrix(C, M.ctr, B) + @cast Cnew[(t1, t2), t3, t4] := Cnew[t1, t2, t3, t4] + for v ∈ reverse(M.top) + Cnew = contract_matrix_tensor3(v, Cnew) + end + @cast Cnew[t12, (t3, t4)] := Cnew[t12, t3, t4] +end + +Base.Array(CM::Adjoint{T,CornerTensor{T}}) where {T} = adjoint(Array(CM.ten)) + +# TODO rethink this function +function svd_corner_matrix( + CM, + method::Symbol, + Dcut::Int, + tol::Real; + toGPU::Bool = true, + kwargs..., +) + if method == :svd + U, Σ, V = svd_fact(Array(Array(CM)), Dcut, tol; kwargs...) + elseif method == :psvd + U, Σ, V = psvd(Array(Array(CM)), rank = Dcut) + elseif method == :psvd_sparse + U, Σ, V = psvd(CM, rank = Dcut) + elseif method == :tsvd + U, Σ, V = tsvd(Array(CM), Dcut; kwargs...) + elseif method == :tsvd_sparse + v0 = 2 .* rand(eltype(CM), size(CM, 1)) .- 1 + U, Σ, V = tsvd(CM, Dcut, initvec = v0; kwargs...) + else + throw(ArgumentError("Wrong method $method")) + end + toGPU && return CuArray.((U, Σ, V)) + U, Σ, V +end + +# this is for psvd to work +LinearAlgebra.ishermitian(ten::CornerTensor) = false +Base.size(ten::CornerTensor) = + (size(ten.B, 1) * size(ten.M, 1), size(ten.C, 2) * size(ten.M, 2)) +Base.size(ten::CornerTensor, n::Int) = size(ten)[n] +Base.eltype(ten::CornerTensor{T}) where {T} = T +Base.adjoint(ten::CornerTensor{T}) where {T} = Adjoint{T}(ten) + +CuArrayifneeded(ten::CornerTensor, x) = typeof(ten.B) <: CuArray ? CuArray(x) : x +CuArrayifneeded(ten::Adjoint{T,CornerTensor{T}}, x) where {T} = + CuArrayifneeded(ten.parent, x) + + +function LinearAlgebra.mul!(y, ten::CornerTensor, x) + x = CuArrayifneeded(ten, x) # CuArray(x) # TODO this an ugly patch + x = reshape(x, size(ten.C, 2), size(ten.M, 2), :) + x = permutedims(x, (3, 1, 2)) + yp = update_env_right(ten.C, x, ten.M, ten.B) + y[:, :] .= + Array(reshape(permutedims(yp, (1, 3, 2)), size(ten.B, 1) * size(ten.M, 1), :)) +end + +function LinearAlgebra.mul!(y, ten::Adjoint{T,CornerTensor{T}}, x) where {T<:Real} + x = CuArrayifneeded(ten, x) # CuArray(x) # TODO this an ugly patch + x = reshape(x, size(ten.parent.B, 1), size(ten.parent.M, 1), :) + x = permutedims(x, (1, 3, 2)) + yp = project_ket_on_bra(x, ten.parent.B, ten.parent.M, ten.parent.C) + y[:, :] .= Array( + reshape( + permutedims(yp, (2, 3, 1)), + size(ten.parent.C, 2) * size(ten.parent.M, 2), + :, + ), + ) +end + +function Base.:(*)(ten::CornerTensor{T}, x) where {T} + x = CuArrayifneeded(ten, x) # CuArray(x) # TODO this an ugly patch + x = reshape(x, 1, size(ten.C, 2), size(ten.M, 2)) + yp = update_env_right(ten.C, x, ten.M, ten.B) + out = reshape(yp, size(ten.B, 1) * size(ten.M, 1)) + Array(out) # TODO this an ugly patch +end + +function Base.:(*)(ten::Adjoint{T,CornerTensor{T}}, x) where {T<:Real} + x = CuArrayifneeded(ten, x) # CuArray(x) # TODO this an ugly patch + x = reshape(x, size(ten.parent.B, 1), 1, size(ten.parent.M, 1)) + yp = project_ket_on_bra(x, ten.parent.B, ten.parent.M, ten.parent.C) + out = reshape(yp, size(ten.parent.C, 2) * size(ten.parent.M, 2)) + Array(out) # TODO this an ugly patch +end diff --git a/test/attic/canonise.jl b/test/attic/canonise.jl new file mode 100644 index 0000000..24330a8 --- /dev/null +++ b/test/attic/canonise.jl @@ -0,0 +1,25 @@ +T = Float64 +D = 16 + +sites = [1, 3 // 2, 2, 5 // 2, 3, 7 // 2, 4] +d = [1, 2, 2, 2, 4, 2, 2] + +id = Dict(j => d[i] for (i, j) in enumerate(sites)) + +@testset "Random QMps" begin + ψ = rand(QMps{T}, id, D) + ϕ = rand(QMps{T}, id, D) + + ψ = move_to_CUDA!(ψ) + ϕ = move_to_CUDA!(ϕ) + + @testset "is left normalized" begin + canonise!(ψ, :left) + #@test is_left_normalized(ψ) + end + + @testset "is right normalized" begin + canonise!(ϕ, :right) + #@test is_right_normalized(ϕ) + end +end diff --git a/test/attic/compressions.jl b/test/attic/compressions.jl new file mode 100644 index 0000000..a73e460 --- /dev/null +++ b/test/attic/compressions.jl @@ -0,0 +1,35 @@ + +@testset "Compressions for sparse mps and mpo works" begin + D = 16 + d = 2 + sites = collect(1:4) + T = Float64 + + Dcut = 8 + max_sweeps = 100 + tol = 1E-10 + + ψ = rand(QMps{T}, sites, D, d) + W = rand(QMpo{T}, [1, 2, 3, 4], 2, 4) + + bra = ψ + ket = ψ + mpo = W + + @testset "Two mps representations are compressed to the same state" begin + χ = W * ψ + @test is_left_normalized(χ) + + ϕ = copy(ψ) + @test bond_dimension(bra) == max(D, d) + @test bond_dimensions(bra) == [(1, d, D), (D, d, D), (D, d, D), (D, d, 1)] + canonise!(bra, :left) + bra = QMps(ψ) + + @time overlap, env = variational_compress!(bra, mpo, ket, tol, max_sweeps) + + ϕ = MPS(bra) + @time is_right_normalized(ϕ) + @test norm(χ) ≈ norm(bra) ≈ 1 + end +end diff --git a/test/attic/contractions.jl b/test/attic/contractions.jl new file mode 100644 index 0000000..4f5a3ea --- /dev/null +++ b/test/attic/contractions.jl @@ -0,0 +1,61 @@ +@testset "Contraction" begin + D = 2 + d = 2 + sites = collect(1:2) + T = Float64 + + ψ = random_QMps(sites, D, d) + ϕ = random_QMps(sites, D, d) + O1 = random_QMpo(sites, D, d) + + @testset "dot products of MPS" begin + @testset "is equal to itself" begin + @test dot(ψ, ψ) ≈ dot(ψ, ψ) + end + + @testset "change of arguments results in conjugation" begin + @test dot(ψ, ϕ) ≈ conj(dot(ϕ, ψ)) + end + + @testset "norm is 2-norm" begin + @test norm(ψ) ≈ sqrt(abs(dot(ψ, ψ))) + end + + @testset "renormalizations" begin + ψ.tensors[ψ.sites[end]] *= 1 / norm(ψ) + @test dot(ψ, ψ) ≈ 1 + + ϕ.tensors[ψ.sites[1]] *= 1 / norm(ϕ) + @test dot(ϕ, ϕ) ≈ 1 + end + end + + @testset "dot product of MPS with MPO" begin + B = randn(Float64, 4, 2, 3) + A = randn(Float64, 2, 2) + C = randn(Float64, 2, 2, 2, 2) + O2 = random_QMpo(sites, D, d) + + @testset "contract_left gives correct sizes" begin + @test size(contract_left(B, A)) == (4, 2, 3) + end + + @testset "contract_tensors43 gives correct sizes" begin + @test size(contract_tensors43(C, B)) == (8, 2, 6) + end + + # @testset "dot product of QMpo and QMps" begin + # D = dot(O2, ψ) + # E = dot(O1, ψ) + # @test size(D[1]) == size(E[1]) == (1, 2, 4) + # @test size(D[2]) == size(E[2]) == (4, 2, 1) + # end + + @testset "dot product of QMps and QMpo" begin + F = dot(ψ, O2) + G = dot(ψ, O1) + @test size(F[1]) == size(G[1]) == (1, 2, 4) + @test size(F[2]) == size(G[2]) == (4, 1, 2) + end + end +end diff --git a/test/attic/environment.jl b/test/attic/environment.jl new file mode 100644 index 0000000..d8557cd --- /dev/null +++ b/test/attic/environment.jl @@ -0,0 +1,10 @@ +@testset "Environment" begin + sites = [1, 1 // 2, 2, 3, 7 // 2, 4, 5, 6] + site = 3 + @testset "left_nbrs_site gives correct left neighbor of a given site" begin + @test left_nbrs_site(site, sites) == 2 + end + @testset "left_nbrs_site gives correct right neighbor of a given site" begin + @test right_nbrs_site(site, sites) == 7 // 2 + end +end diff --git a/test/attic/linear_algebra_ext.jl b/test/attic/linear_algebra_ext.jl new file mode 100644 index 0000000..7bbefce --- /dev/null +++ b/test/attic/linear_algebra_ext.jl @@ -0,0 +1,46 @@ +using LowRankApprox + +@testset "Truncation with standard SVD works correctly" begin + D = 100 + Dcut = D - 1 + tol = 1E-8 + + a = rand(D, D) + + U1, Σ1, V1 = svd(a) + + δ = min(Dcut, size(Σ1)...) + U1 = U1[:, 1:δ] + Σ1 = Σ1[1:δ] + V1 = V1[:, 1:δ] + + U2, Σ2, V2 = svd(a) + + δ = min(Dcut, size(Σ2)...) + U2 = U2[:, 1:δ] + Σ2 = Σ2[1:δ] + V2 = V2[:, 1:δ] + + r1 = U1 * Diagonal(Σ1) * V1' + r2 = U2 * Diagonal(Σ2) * V2' + + @test norm(r1 - r2) < tol +end + + +@testset "Truncation with with random SVD works correctly" begin + + D = 100 + Dcut = D - 1 + tol = 1E-8 + + a = rand(D, D) + + U1, Σ1, V1 = psvd(a, rank = Dcut, atol = 1E-16, rtol = 1E-16) + U2, Σ2, V2 = psvd(a, rank = Dcut, atol = 1E-16, rtol = 1E-16) + + r1 = U1 * Diagonal(Σ1) * V1' + r2 = U2 * Diagonal(Σ2) * V2' + + @test norm(r1 - r2) < tol +end diff --git a/test/memoization.jl b/test/attic/memoization.jl similarity index 100% rename from test/memoization.jl rename to test/attic/memoization.jl diff --git a/test/attic/mps.jl b/test/attic/mps.jl new file mode 100644 index 0000000..f00a778 --- /dev/null +++ b/test/attic/mps.jl @@ -0,0 +1,45 @@ +@testset "QMps" begin + + T = Float64 + D = 16 + + sites = [1, 3 // 2, 2, 5 // 2, 3, 7 // 2, 4] + d = [1, 2, 2, 2, 4, 2, 2] + + id = Dict(j => d[i] for (i, j) in enumerate(sites)) + + @testset "Random QMps with varying physical dimension" begin + ψ = rand(QMps{T}, id, D) + + @testset "has correct number of sites" begin + @test length(ψ) == maximum(sites) + @test size(ψ) == (maximum(sites),) + end + + @testset "has correct type" begin + @test eltype(ψ) == T + end + + @testset "has correct rank" begin + @test rank(ψ) == Tuple(d) + end + + @testset "has correct bonds" begin + @test bond_dimension(ψ) ≈ D + @test bond_dimensions(ψ) == [ + (1, d[1], D), + (D, d[2], D), + (D, d[3], D), + (D, d[4], D), + (D, d[5], D), + (D, d[6], D), + (D, d[7], 1), + ] + @test verify_bonds(ψ) === nothing + end + + @testset "is equal to itself" begin + @test ψ == ψ + end + end +end diff --git a/test/attic/runtests.jl b/test/attic/runtests.jl new file mode 100644 index 0000000..f43ee30 --- /dev/null +++ b/test/attic/runtests.jl @@ -0,0 +1,20 @@ +using SpinGlassTensors +using TensorOperations +using TensorCast +using Logging +using LinearAlgebra + +disable_logging(LogLevel(1)) + +using Test + +my_tests = [ + #"mps.jl", + "canonise.jl", + #"environment.jl" +] + + +for my_test in my_tests + include(my_test) +end diff --git a/test/base.jl b/test/base.jl deleted file mode 100644 index 1f5a36f..0000000 --- a/test/base.jl +++ /dev/null @@ -1,187 +0,0 @@ -@testset "MPS" begin - - D = 10 - d = 4 - sites = 5 - T = ComplexF64 - - @testset "Random MPS with the same physical dimension" begin - - ψ = randn(MPS{T}, sites, D, d) - - @testset "has correct number of sites" begin - @test length(ψ) == sites - @test size(ψ) == (sites,) - end - - @testset "has correct type" begin - @test eltype(ψ) == T - end - - @testset "has correct rank" begin - @test rank(ψ) == Tuple(fill(d, sites)) - end - - @testset "has correct bonds" begin - @test bond_dimension(ψ) ≈ D - @test verify_bonds(ψ) === nothing - end - - @testset "is equal to itself" begin - @test ψ == ψ - @test ψ ≈ ψ - end - - @testset "is equal to its copy" begin - ϕ = copy(ψ) - @test ϕ == ψ - @test ϕ ≈ ψ - end - end - - @testset "Random MPS with varying physical dimension" begin - - dims = (3, 2, 5, 4) - ψ = randn(MPS{T}, D, dims) - - @testset "has correct number of sites" begin - n = length(dims) - @test length(ψ) == n - @test size(ψ) == (n,) - end - - @testset "has correct type" begin - @test eltype(ψ) == T - end - - @testset "has correct rank" begin - @test rank(ψ) == dims - end - - @testset "has correct bonds" begin - @test bond_dimension(ψ) ≈ D - @test verify_bonds(ψ) === nothing - end - - @testset "is equal to itself" begin - @test ψ == ψ - @test ψ ≈ ψ - end - - @testset "is equal to its copy" begin - ϕ = copy(ψ) - @test ϕ == ψ - @test ϕ ≈ ψ - end - end - - @testset "Random MPO with the same physical dimension" begin - - W = randn(MPO{T}, sites, D, d) - - @testset "has correct number of sites" begin - @test length(W) == sites - @test size(W) == (sites,) - end - - @testset "has correct type" begin - @test eltype(W) == T - end - - @testset "is equal to itself" begin - @test W == W - @test W ≈ W - end - - @testset "is equal to its copy" begin - U = copy(W) - @test U == W - @test U ≈ W - end - end - - @testset "Random MPO with varying physical dimension" begin - - dims = (3, 2, 5, 4) - W = randn(MPO{T}, D, dims) - - @testset "has correct number of sites" begin - n = length(dims) - @test length(W) == n - @test size(W) == (n,) - end - - @testset "has correct type" begin - @test eltype(W) == T - end - - @testset "is equal to itself" begin - @test W == W - @test W ≈ W - end - - @testset "is equal to its copy" begin - U = copy(W) - @test U == W - @test U ≈ W - end - end - - @testset "MPS from tensor" begin - ϵ = 1E-14 - - dims = (2, 3, 4, 3, 5) - sites = length(dims) - A = randn(T, dims) - - ψ = MPS(A, :right) - - @test norm(ψ) ≈ 1 - @test_nowarn verify_bonds(ψ) - @test_nowarn verify_physical_dims(ψ, dims) - @test is_right_normalized(ψ) - - B = randn(T, dims...) - ϕ = MPS(B, :left) - - @test norm(ϕ) ≈ 1 - @test_nowarn verify_bonds(ϕ) - @test_nowarn verify_physical_dims(ϕ, dims) - @test is_left_normalized(ϕ) - - χ = MPS(A, :left) - - @test norm(χ) ≈ 1 - @test abs(1 - abs(dot(ψ, χ))) < ϵ - end - -end - - -@testset "Objects with equal tensors have the same hash" begin - D = 10 - d = 4 - sites = 5 - T = ComplexF64 - - ψ = randn(MPS{T}, sites, D, d) - ϕ = copy(ψ) - - W = randn(MPO{T}, sites, D, d) - V = copy(W) - - @testset "Equal MPSs have the same hash" begin - @test hash(ψ) == hash(ϕ) - end - - @testset "Equal MPOs have the same hash" begin - @test hash(W) == hash(V) - end - - @testset "Equal tuples with MPS and MPO have the same hash" begin - tuple_1 = (ψ, W, [1, 2, 3]) - tuple_2 = (ϕ, V, [1, 2, 3]) - @test tuple_1 == tuple_2 - @test hash(tuple_1) == hash(tuple_2) - end -end diff --git a/test/canonise.jl b/test/canonise.jl new file mode 100644 index 0000000..89e44d7 --- /dev/null +++ b/test/canonise.jl @@ -0,0 +1,51 @@ +D = 16 + +sites = [1, 3 // 2, 2, 5 // 2, 3, 7 // 2, 4] +d = [1, 2, 2, 2, 4, 2, 2] + +id = Dict(j => d[i] for (i, j) in enumerate(sites)) + +@testset "Random QMps ($T)" for T in (Float32, Float64) + for toCUDA ∈ (true, false) + ψ = rand(QMps{T}, id, D) + ϕ = rand(QMps{T}, id, D) + @test is_consistent(ψ) + @test is_consistent(ϕ) + + if toCUDA + ψ = move_to_CUDA!(ψ) + ϕ = move_to_CUDA!(ϕ) + @test is_consistent(ψ) + @test is_consistent(ϕ) + end + + @testset "is left normalized" begin + canonise!(ψ, :left) + @test is_consistent(ψ) + @test is_left_normalized(ψ) + @test dot(ψ, ψ) ≈ one(T) + end + + @testset "is right normalized" begin + canonise!(ϕ, :right) + @test is_consistent(ϕ) + @test is_right_normalized(ϕ) + @test dot(ϕ, ϕ) ≈ one(T) + end + end +end + +@testset "Measure spectrum($T)" for T in (Float32, Float64) + svd_mps = TensorMap{T}( + 1 => T[ + -0.694389933025747 -0.7195989305943268;;; + 0.7195989305943268 -0.6943899330257469 + ], + 2 => T[0.7071067811865477; 0.0;;; -7.850461536237973e-17; 0.7071067811865477], + ) + ψ = QMps(svd_mps) + @test is_left_normalized(ψ) + A = measure_spectrum(ψ) + @test A[1] ≈ [1.0] + @test A[2] ≈ [0.7071067811865476, 0.7071067811865475] +end diff --git a/test/compressions.jl b/test/compressions.jl deleted file mode 100644 index fa36f9e..0000000 --- a/test/compressions.jl +++ /dev/null @@ -1,100 +0,0 @@ -@testset "Canonisation and Compression" begin - - D = 32 - Dcut = 16 - - d = 2 - sites = 100 - - T = Float64 - - var_tol = 1E-10 - var_max_sweeps = 100 - - ψ = randn(MPS{T}, sites, D, d) - ϕ = randn(MPS{T}, sites, D, d) - χ = randn(MPS{T}, sites, D, d) - Φ = randn(MPS{T}, sites, D, d) - - - @testset "Canonisation (left)" begin - b = canonise!(ψ, :left) - @test is_left_normalized(ψ) - @test dot(ψ, ψ) ≈ 1 - end - - - @testset "Canonisation (right)" begin - b = canonise!(ϕ, :right) - @test is_right_normalized(ϕ) - @test dot(ϕ, ϕ) ≈ 1 - end - - @testset "Copy and truncate twice" begin - ψ̃ = copy(ψ) - @test ψ̃ == ψ - for (direction, predicate) ∈ - ((:left, is_left_normalized), (:right, is_right_normalized)) - truncate!(ψ, direction, Dcut) - truncate!(ψ̃, direction, Dcut) - - @test predicate(ψ) - @test predicate(ψ̃) - @test bond_dimension(ψ̃) == bond_dimension(ψ) - @test all(size(A) == size(B) for (A, B) ∈ zip(ψ, ψ̃)) - @test typeof(ψ̃) == typeof(ψ) - @test norm(ψ) ≈ norm(ψ̃) ≈ 1 - @test abs(ψ * ψ̃) ≈ abs(ψ̃ * ψ) ≈ 1 - end - end - - @testset "Cauchy-Schwarz inequality (after truncation)" begin - @test abs(dot(ϕ, ψ)) <= norm(ϕ) * norm(ψ) - end - - @testset "Truncation (SVD, right)" begin - truncate!(ψ, :right, Dcut) - @test is_right_normalized(ψ) - @test norm(ψ) ≈ 1 - end - - @testset "Truncation (SVD, left)" begin - truncate!(ψ, :left, Dcut) - @test is_left_normalized(ψ) - @test norm(ψ) ≈ 1 - end - - - @testset "" begin - ϵ = 1E-10 - ψ = randn(MPS{T}, sites, D, d) - - l = copy(ψ) - r = copy(ψ) - - canonise!(l, :left) - @test is_left_normalized(l) - - canonise!(r, :right) - @test is_right_normalized(r) - - @test dot(l, l) ≈ 1 - @test dot(r, r) ≈ 1 - - @test abs(1 - abs(dot(l, r))) < ϵ - end - - - @testset "Variational compression" begin - Ψ = copy(Φ) - canonise!(Ψ, :left) - - overlap = compress!(Φ, Dcut, var_tol, var_max_sweeps) - #println(overlap) - - @test norm(Φ) ≈ 1 - @test is_left_normalized(Φ) - @test is_right_normalized(Φ) == false - end - -end diff --git a/test/contractions.jl b/test/contractions.jl deleted file mode 100644 index 1079819..0000000 --- a/test/contractions.jl +++ /dev/null @@ -1,111 +0,0 @@ -@testset "contractions" begin - - D = 10 - d = 3 - sites = 5 - T = ComplexF64 - - ψ = randn(MPS{T}, sites, D, d) - ϕ = randn(MPS{T}, sites, D, d) - mpo_ψ = randn(MPO{T}, sites, D, d) - mpo = randn(MPO{T}, 2, 2, 2) - - - Id = fill(I(d), length(ψ)) - - Id_m = MPO(fill(ones(1, 1, 1, d), length(ϕ))) - - @testset "dot products" begin - @testset "is equal to itself" begin - @test dot(ψ, ψ) ≈ dot(ψ, ψ) - end - - @testset "change of arguments results in conjugation" begin - @test dot(ψ, ϕ) ≈ conj(dot(ϕ, ψ)) - @test dot(ψ, Id, ϕ) ≈ conj(dot(ϕ, Id, ψ)) - end - - @testset "dot with identity equal to dot of two MPS" begin - @test dot(ψ, Id, ϕ) ≈ dot(ψ, ϕ) - end - - @testset "norm is 2-norm" begin - @test norm(ψ) ≈ sqrt(abs(dot(ψ, ψ))) - end - - @testset "renormalizations" begin - ψ[end] *= 1 / norm(ψ) - @test dot(ψ, ψ) ≈ 1 - - ϕ[1] *= 1 / norm(ϕ) - @test dot(ϕ, ϕ) ≈ 1 - end - - @testset "dot products of MPO" begin - mpo1 = dot(mpo, mpo) - - @testset "has correct sisze" begin - @test size(mpo1[1]) == (1, 2, 4, 2) - @test size(mpo1[2]) == (4, 2, 1, 2) - end - end - - end - - @testset "left environment returns correct overlap" begin - L = left_env(ϕ, ψ) - @test L[end][1] ≈ dot(ϕ, ψ) - end - - - @testset "right environment returns correct overlap" begin - R = right_env(ϕ, ψ) - @test R[1][end] ≈ dot(ϕ, ψ) - end - - - @testset "Cauchy-Schwarz inequality of MPS is OK" begin - @test abs(dot(ϕ, ψ)) <= norm(ϕ) * norm(ψ) - end - - - @testset "left_env correctly contracts MPS for a given configuration" begin - D = 10 - d = 2 - sites = 5 - T = ComplexF64 - - ψ = randn(MPS{T}, sites, D, d) - state = 2 * (rand(sites) .< 0.5) .- 1 - - C = I - for (A, σ) ∈ zip(ψ, state) - C *= A[:, idx(σ), :] - end - - @test tr(C) ≈ left_env(ψ, map(idx, state))[] - end - - - @testset "right_env correctly contracts MPO with MPS for a given configuration" begin - D = 10 - d = 2 - sites = 5 - T = Float64 - - ψ = randn(MPS{T}, sites, D, d) - W = randn(MPO{T}, sites, D, d) - - σ = 2 * (rand(sites) .< 0.5) .- 1 - - ϕ = MPS(T, sites) - for (i, A) ∈ enumerate(W) - m = idx(σ[i]) - @cast B[x, s, y] := A[x, $m, y, s] - ϕ[i] = B - end - - @test dot(ψ, ϕ) ≈ right_env(ψ, W, map(idx, σ))[] - end - -end diff --git a/test/identities.jl b/test/identities.jl deleted file mode 100644 index 7290ca4..0000000 --- a/test/identities.jl +++ /dev/null @@ -1,80 +0,0 @@ -using Random - - -ψ = randn(MPS{Float64}, 4, 3, 2) -O = randn(MPO{Float64}, 4, 3, 2) - -IMPS = IdentityMPS() -IMPO = IdentityMPO() - -@testset "multiplication of IdentityMPO" begin - - @testset "mutlitplication with MPS ψ returns ψ" begin - @test IMPO * ψ == ψ - @test ψ * IMPO == ψ - end - - @testset "mutlitplication with MPO O returns O" begin - @test IMPO * O == O - end -end - -@testset "Multiplication of IdentityMPS by an MPO O" begin - ϕ = O * IMPS - - @testset "result has the correct type" begin - @test typeof(ϕ) == MPS{Float64} - end - - @testset "length of result is the same as O" begin - @test length(ϕ) == length(O) - end - - @testset "the multiplication drops the correct dims" begin - for i ∈ eachindex(O) - @test ϕ[i] == dropdims(sum(O[i], dims = 4), dims = 4) - end - end -end - -@testset "Identities are singletons" begin - @test IMPO === IdentityMPO() - @test IMPS === IdentityMPS() -end - -@testset "Identities have infinite length" begin - @test length(IMPS) == Inf - @test length(IMPO) == Inf -end - -@testset "Indexing identities returns trivial tensors" begin - @testset "Indexing IdentityMPS" begin - A = IMPS[42] - @test length(A) == 1 - @test ndims(A) == 3 - @test norm(A) == 1 - end - - @testset "Indexing IdentityMPO" begin - B = IMPO[666] - @test length(B) == 1 - @test ndims(B) == 4 - @test norm(B) == 1 - end -end - -@testset "IdentityMPS is only equal to itself" begin - @test IdentityMPS() == IdentityMPS() - - true_identity = IdentityMPS() - tensors = [true_identity[i] for i = 1:4] - - @test IdentityMPS() != MPS(tensors) - @test MPS(tensors) != IdentityMPS() - - Random.seed!(123) - another_mps = randn(MPS{Float64}, 5, 3, 4) - - @test IdentityMPS() != another_mps - @test another_mps != IdentityMPS() -end diff --git a/test/projectors.jl b/test/projectors.jl new file mode 100644 index 0000000..d255328 --- /dev/null +++ b/test/projectors.jl @@ -0,0 +1,53 @@ + +@testset "Add and get from pool of projectors" begin + @testset "Start with empty pool and add elements to it" begin + lp = PoolOfProjectors{Int64}() + @test length(lp) == 0 + + p1 = [1, 1, 2, 2, 3, 3] + p2 = [1, 2, 1, 3] + k = add_projector!(lp, p1) + @test k == 1 + @test length(lp) == 1 + + k = add_projector!(lp, p1) + @test k == 1 + @test length(lp) == 1 + + k = add_projector!(lp, p2) + @test k == 2 + @test length(lp) == 2 + + @test get_projector!(lp, 1) == p1 + @test get_projector!(lp, 2) == p2 + + @test length(lp, 1) == 6 + @test length(lp, 2) == 4 + @test size(lp, 1) == 3 + @test size(lp, 2) == 3 + + empty!(lp, lp.default_device) + @test length(lp) == 0 + end + + @testset "Different devices" begin + for T ∈ [Int16, Int32, Int64] + lp = PoolOfProjectors{T}() + p1 = Vector{T}([1, 1, 2, 2, 3, 3]) + p2 = Vector{T}([1, 2, 1, 3]) + k = add_projector!(lp, p1) + k = add_projector!(lp, p2) + + @test typeof(get_projector!(lp, 2, :CPU)) <: Array{T,1} + @test typeof(get_projector!(lp, 1, :GPU)) <: CuArray{T,1} + @test length(lp, :GPU) == 1 + @test length(lp, :CPU) == 2 + + @test typeof(get_projector!(lp, 1, :GPU)) <: CuArray{T,1} + @test length(lp, :GPU) == 1 + + @test typeof(get_projector!(lp, 2, :GPU)) <: CuArray{T,1} + @test length(lp, :GPU) == 2 + end + end +end diff --git a/test/runtests.jl b/test/runtests.jl index 6a2186e..fb5d8cf 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -3,16 +3,13 @@ using TensorOperations using TensorCast using Logging using LinearAlgebra +using CUDA disable_logging(LogLevel(1)) using Test -idx(σ::Int) = (σ == -1) ? 1 : σ + 1 - - -my_tests = - ["base.jl", "memoization.jl", "contractions.jl", "compressions.jl", "identities.jl"] +my_tests = ["canonise.jl", "variational.jl", "projectors.jl"] for my_test in my_tests include(my_test) diff --git a/test/variational.jl b/test/variational.jl new file mode 100644 index 0000000..a4e34fc --- /dev/null +++ b/test/variational.jl @@ -0,0 +1,52 @@ +l = 2 +D1 = ([1, 1], [1, 2], [1, 1], [1, 1]) +D2 = ([1, 1], [1, 2], [1, 1], [1, 2]) +S = Float64 +rand_central = rand(CentralTensor{S}, [1, 1, 1, 1, 1, 1, 1, 1]) +map1 = MpoTensor( + TensorMap{S}( + Dict( + -1 // 2 => rand_central, + 0 => rand(SiteTensor{S}, PoolOfProjectors{Integer}(), l, D1), + ), + ), +) +map2 = MpoTensor( + TensorMap{S}( + Dict( + -1 // 2 => rand_central, + 0 => rand(SiteTensor{S}, PoolOfProjectors{Integer}(), l, D2), + ), + ), +) +map3 = MpoTensor( + TensorMap{S}( + Dict( + -1 // 2 => rand_central, + 0 => rand(SiteTensor{S}, PoolOfProjectors{Integer}(), l, D1), + ), + ), +) +mpomap = Dict(1 => map1, 2 => map2, 3 => map3) + +D = 2 +sites = [1, 2, 3] +d = [1, 1, 1] +id = Dict(j => d[i] for (i, j) in enumerate(sites)) + +@testset "Random QMpo with varying physical dimension" begin + W = rand(QMpo{S}, mpomap) + + @test length(W) == 3 + @test bond_dimension(W) == 1 +end + +@testset "Compressions for sparse mps and mpo works" begin + W = rand(QMpo{S}, mpomap) + ψ = rand(QMps{S}, id, D) + canonise!(ψ, :left) + ϕ = rand(QMps{S}, id, D) + canonise!(ϕ, :left) + + +end