diff --git a/Project.toml b/Project.toml index bd8f0b7d29..f43f7da517 100644 --- a/Project.toml +++ b/Project.toml @@ -12,6 +12,7 @@ Preferences = "21216c6a-2e73-6563-6e65-726566657250" Reexport = "189a3867-3050-52da-a836-e630ba90ab69" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" +TaskLocalValues = "ed4db957-447d-4319-bfb6-7fa9ae7ecf34" Tensors = "48a634ad-e948-5137-8d70-aa71f2a747f4" WriteVTK = "64499a7a-5c06-52f2-abe2-ccb03c286192" @@ -31,6 +32,7 @@ NearestNeighbors = "0.4" OrderedCollections = "1" Preferences = "1" Reexport = "1" +TaskLocalValues = "0.1" Tensors = "1.14" WriteVTK = "1.13" julia = "1.9" diff --git a/docs/Manifest.toml b/docs/Manifest.toml index 4b67ef2b1f..4f814b904c 100644 --- a/docs/Manifest.toml +++ b/docs/Manifest.toml @@ -542,7 +542,7 @@ uuid = "29a986be-02c6-4525-aec4-84b980013641" version = "2.0.4" [[deps.Ferrite]] -deps = ["EnumX", "ForwardDiff", "LinearAlgebra", "NearestNeighbors", "OrderedCollections", "Preferences", "Reexport", "SparseArrays", "StaticArrays", "Tensors", "WriteVTK"] +deps = ["EnumX", "ForwardDiff", "LinearAlgebra", "NearestNeighbors", "OrderedCollections", "Preferences", "Reexport", "SparseArrays", "StaticArrays", "TaskLocalValues", "Tensors", "WriteVTK"] path = ".." uuid = "c061ca5d-56c9-439f-9c0e-210fe06d3992" version = "1.0.0" @@ -2104,6 +2104,11 @@ deps = ["ArgTools", "SHA"] uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e" version = "1.10.0" +[[deps.TaskLocalValues]] +git-tree-sha1 = "d155450e6dff2a8bc2fcb81dcb194bd98b0aeb46" +uuid = "ed4db957-447d-4319-bfb6-7fa9ae7ecf34" +version = "0.1.2" + [[deps.TensorCore]] deps = ["LinearAlgebra"] git-tree-sha1 = "1feb45f88d133a655e001435632f019a9a1bcdb6" diff --git a/docs/src/literate-howto/threaded_assembly.jl b/docs/src/literate-howto/threaded_assembly.jl index fa0526fa9f..332989efcd 100644 --- a/docs/src/literate-howto/threaded_assembly.jl +++ b/docs/src/literate-howto/threaded_assembly.jl @@ -73,7 +73,7 @@ end; # # ScratchValues is a thread-local collection of data that each thread needs to own, # since we need to be able to mutate the data in the threads independently -struct ScratchValues{T, CV <: CellValues, FV <: FacetValues, TT <: AbstractTensor, dim, AT} +struct ScratchValues{T, CV <: CellValues, FV <: FacetValues, TT <: AbstractTensor, dim} Ke::Matrix{T} fe::Vector{T} cellvalues::CV @@ -81,7 +81,6 @@ struct ScratchValues{T, CV <: CellValues, FV <: FacetValues, TT <: AbstractTenso global_dofs::Vector{Int} ɛ::Vector{TT} coordinates::Vector{Vec{dim, T}} - assembler::AT end; # Each thread need its own CellValues and FacetValues (although, for this example we don't use @@ -96,9 +95,8 @@ function create_values(interpolation_space::Interpolation{refshape}, qr_order::I end; # Create a `ScratchValues` for each thread with the thread local data -function create_scratchvalues(K, f, dh::DofHandler{dim}, ip) where {dim} +function create_scratchvalues(dh::DofHandler{dim}, ip) where {dim} nthreads = Threads.nthreads() - assemblers = [start_assemble(K, f) for i in 1:nthreads] cellvalues, facetvalues = create_values(ip, 2) n_basefuncs = getnbasefunctions(cellvalues[1]) @@ -112,7 +110,7 @@ function create_scratchvalues(K, f, dh::DofHandler{dim}, ip) where {dim} coordinates = [[zero(Vec{dim}) for i in 1:length(dh.grid.cells[1].nodes)] for i in 1:nthreads] return [ScratchValues(Kes[i], fes[i], cellvalues[i], facetvalues[i], global_dofs[i], - ɛs[i], coordinates[i], assemblers[i]) for i in 1:nthreads] + ɛs[i], coordinates[i]) for i in 1:nthreads] end; # ## Threaded assemble @@ -121,13 +119,15 @@ end; function doassemble(K::SparseMatrixCSC, colors, grid::Grid, dh::DofHandler, C::SymmetricTensor{4, dim}, ip) where {dim} f = zeros(ndofs(dh)) - scratches = create_scratchvalues(K, f, dh, ip) + assembler = start_assemble(K, f) + + scratches = create_scratchvalues(dh, ip) b = Vec{3}((0.0, 0.0, 0.0)) # Body force for color in colors ## Each color is safe to assemble threaded Threads.@threads :static for i in 1:length(color) - assemble_cell!(scratches[Threads.threadid()], color[i], K, grid, dh, C, b) + assemble_cell!(scratches[Threads.threadid()], color[i], assembler, grid, dh, C, b) end end @@ -136,13 +136,13 @@ end # The cell assembly function is written the same way as if it was a single threaded example. # The only difference is that we unpack the variables from our `scratch`. -function assemble_cell!(scratch::ScratchValues, cell::Int, K::SparseMatrixCSC, +function assemble_cell!(scratch::ScratchValues, cell::Int, assembler, grid::Grid, dh::DofHandler, C::SymmetricTensor{4, dim}, b::Vec{dim}) where {dim} ## Unpack our stuff from the scratch - Ke, fe, cellvalues, facetvalues, global_dofs, ɛ, coordinates, assembler = + Ke, fe, cellvalues, facetvalues, global_dofs, ɛ, coordinates = scratch.Ke, scratch.fe, scratch.cellvalues, scratch.facetvalues, - scratch.global_dofs, scratch.ɛ, scratch.coordinates, scratch.assembler + scratch.global_dofs, scratch.ɛ, scratch.coordinates fill!(Ke, 0) fill!(fe, 0) diff --git a/src/Ferrite.jl b/src/Ferrite.jl index 2738904d83..ceb13fa33c 100644 --- a/src/Ferrite.jl +++ b/src/Ferrite.jl @@ -25,6 +25,8 @@ using Tensors: rotation_tensor, symmetric, tovoigt!, hessian, otimesu using ForwardDiff: ForwardDiff +using TaskLocalValues: + TaskLocalValue include("CollectionsOfViews.jl") using .CollectionsOfViews: diff --git a/src/assembler.jl b/src/assembler.jl index e79521dc4b..de034df7af 100644 --- a/src/assembler.jl +++ b/src/assembler.jl @@ -115,8 +115,8 @@ Assembler for sparse matrix with CSC storage type. struct CSCAssembler{Tv,Ti,MT<:AbstractSparseMatrixCSC{Tv,Ti}} <: AbstractCSCAssembler K::MT f::Vector{Tv} - permutation::Vector{Int} - sorteddofs::Vector{Int} + permutation::TaskLocalValue{Vector{Int}} + sorteddofs::TaskLocalValue{Vector{Int}} end """ @@ -125,8 +125,8 @@ Assembler for symmetric sparse matrix with CSC storage type. struct SymmetricCSCAssembler{Tv,Ti, MT <: Symmetric{Tv,<:AbstractSparseMatrixCSC{Tv,Ti}}} <: AbstractCSCAssembler K::MT f::Vector{Tv} - permutation::Vector{Int} - sorteddofs::Vector{Int} + permutation::TaskLocalValue{Vector{Int}} + sorteddofs::TaskLocalValue{Vector{Int}} end function Base.show(io::IO, ::MIME"text/plain", a::Union{CSCAssembler, SymmetricCSCAssembler}) @@ -165,11 +165,15 @@ start_assemble(K::Union{AbstractSparseMatrixCSC, Symmetric{<:Any,<:AbstractSpars function start_assemble(K::AbstractSparseMatrixCSC{T}, f::Vector=T[]; fillzero::Bool=true, maxcelldofs_hint::Int=0) where {T} fillzero && (fillzero!(K); fillzero!(f)) - return CSCAssembler(K, f, zeros(Int,maxcelldofs_hint), zeros(Int,maxcelldofs_hint)) + permutation = TaskLocalValue{Vector{Int}}(() -> Vector{Int}(undef, maxcelldofs_hint)) + sorteddofs = TaskLocalValue{Vector{Int}}(() -> Vector{Int}(undef, maxcelldofs_hint)) + return CSCAssembler(K, f, permutation, sorteddofs) end function start_assemble(K::Symmetric{T,<:SparseMatrixCSC}, f::Vector=T[]; fillzero::Bool=true, maxcelldofs_hint::Int=0) where T fillzero && (fillzero!(K); fillzero!(f)) - return SymmetricCSCAssembler(K, f, zeros(Int,maxcelldofs_hint), zeros(Int,maxcelldofs_hint)) + permutation = TaskLocalValue{Vector{Int}}(() -> Vector{Int}(undef, maxcelldofs_hint)) + sorteddofs = TaskLocalValue{Vector{Int}}(() -> Vector{Int}(undef, maxcelldofs_hint)) + return SymmetricCSCAssembler(K, f, permutation, sorteddofs) end function finish_assemble(a::Union{CSCAssembler, SymmetricCSCAssembler}) @@ -226,7 +230,7 @@ end # We assume that the input dofs are not sorted, because the cells need the dofs in # a specific order, which might not be the sorted order. Hence we sort them. # Note that we are not allowed to mutate `dofs` in the process. - sorteddofs, permutation = _sortdofs_for_assembly!(A.permutation, A.sorteddofs, dofs) + sorteddofs, permutation = _sortdofs_for_assembly!(A.permutation[], A.sorteddofs[], dofs) current_col = 1 @inbounds for Kcol in sorteddofs