add test_adapt, some comments

Ferrite-FEM · Oct 15, 2024 · ea83925 · ea83925
1 parent fbc1b4b
commit ea83925
Show file tree

Hide file tree

Showing 3 changed files with 111 additions and 33 deletions.
diff --git a/docs/src/literate-tutorials/gpu_qp_heat_equation.jl b/docs/src/literate-tutorials/gpu_qp_heat_equation.jl
@@ -161,15 +161,3 @@ stassy(cv,dh) = assemble_global!(cv,dh,Val(false))
 Kstd , Fstd = stassy(cellvalues,dh);
 norm(Kstd)
 
-
-
-
-# Benchmarking
-## function benchmark_gpu()
-##     Kgpu = CUSPARSE.CuSparseMatrixCSC(K)
-##     fgpu = CUDA.zeros(ndofs(dh))
-##      launch_kernel(assemble_gpu!, (Kgpu,fgpu, cellvalues, dh) , n_cells, n_basefuncs)
-##     #return (;Kgpu,fgpu)
-## end
-
-## CUDA.@profile benchmark_gpu()
diff --git a/src/GPU/GPUKernelLauncher.jl b/src/GPU/GPUKernelLauncher.jl
@@ -1,46 +1,102 @@
-#= This file represents the interface between the GPU backend (extension) and the Ferrite package. =#
+#= 
+This file defines the interface between the GPU backend (extension) and the Ferrite package.
+It provides abstract types, function signatures, and concrete types for managing GPU kernels
+and backends, serving as a foundation for GPU-accelerated computations.
+=#
 
-### Abstract types ###
+### Abstract Types ###
 abstract type AbstractGPUKernel end
 abstract type AbstractGPUBackend end
 
 
-function init_gpu_kernel(backend::AbstractGPUBackend, n_cells::Ti, n_basefuncs::Ti, kernel::Function, args::Tuple) where {Ti<: Integer}
+### Functions ###
+
+"""
+    init_gpu_kernel(backend::AbstractGPUBackend, n_cells::Ti, n_basefuncs::Ti, kernel::Function, args::Tuple) where {Ti <: Integer}
+
+Initializes a GPU kernel with the specified backend, number of cells, base functions,
+kernel function, and additional arguments.
+
+# Arguments
+- `backend::AbstractGPUBackend`: The GPU backend to use for kernel execution.
+- `n_cells::Ti`: Number of cells to be processed by the kernel.
+- `n_basefuncs::Ti`: Number of base functions for each cell.
+- `kernel::Function`: The kernel function to execute on the GPU.
+- `args::Tuple`: Additional arguments required by the kernel.
+
+# Notes
+This function needs to be implemented for each specific backend. Calling this function
+without a concrete implementation will raise an error.
+"""
+function init_gpu_kernel(backend::AbstractGPUBackend, n_cells::Ti, n_basefuncs::Ti, kernel::Function, args::Tuple) where {Ti <: Integer}
     throw(ErrorException("A concrete implementation of init_gpu_kernel is required"))
 end
 
-
 """
-    launch!(::AbstractGPUKernelLauncher)
-Interface for launching a kernel on the GPU backend.
+    launch!(kernel::AbstractGPUKernel)
+
+Launches a GPU kernel using the specified backend. This interface provides a generic
+mechanism for running GPU-accelerated computations across different GPU backends.
+
+# Arguments
+- `kernel::AbstractGPUKernel`: The GPU kernel to be launched.
+
+# Notes
+This function must be implemented for specific GPU kernels. If not implemented,
+an error will be thrown.
 """
-function launch!(::AbstractGPUKernel)
+function launch!(kernel::AbstractGPUKernel)
     throw(ErrorException("A concrete implementation of launch! is required"))
 end
 
 
-### Concrete types ###
+### Concrete Types ###
 
-### Kernels ###
 """
-    CUDAKernel{Ti}(n_cells::Int, n_basefuncs::Int)
-`CUDAKernel` represents a high-level interface to the CUDA backend for launching and configuring kernels.
+    GPUKernel{Ti}(n_cells::Ti, n_basefuncs::Ti, kernel::Function, args::Tuple, backend::Type{<:AbstractGPUBackend})
+
+Represents a high-level interface to a GPU backend for configuring and launching GPU kernels.
+It stores the necessary parameters for kernel execution, such as the number of cells,
+number of base functions, the kernel function, and any additional arguments.
 
 # Fields
-- `n_cells::Ti`: number of cells
-- `n_basefuncs::Ti`: number of base functions
-- `kernel::Function`: kernel function
-- `args::Tuple`: arguments to the kernel
+- `n_cells::Ti`: Number of cells to be processed.
+- `n_basefuncs::Ti`: Number of base functions for each cell.
+- `kernel::Function`: The GPU kernel function.
+- `args::Tuple`: Additional arguments to be passed to the kernel function.
+- `backend::Type{<:AbstractGPUBackend}`: The GPU backend used for execution.
+
+# Type Parameters
+- `Ti`: An integer type representing the number type used for `n_cells` and `n_basefuncs`.
 """
 struct GPUKernel{Ti} <: AbstractGPUKernel
-    n_cells::Ti # number of cells
-    n_basefuncs::Ti # number of base functions
-    kernel::Function # kernel function
-    args::Tuple # arguments to the kernel
-    backend::Type{<:AbstractGPUBackend} # backend
+    n_cells::Ti               # Number of cells
+    n_basefuncs::Ti           # Number of base functions
+    kernel::Function          # Kernel function to execute
+    args::Tuple               # Arguments for the kernel function
+    backend::Type{<:AbstractGPUBackend} # GPU backend
 end
 
+"""
+    getbackend(kernel::GPUKernel) -> Type{<:AbstractGPUBackend}
+
+Returns the backend associated with the given `GPUKernel`.
+
+# Arguments
+- `kernel::GPUKernel`: The GPU kernel from which to retrieve the backend.
+
+# Returns
+The backend type associated with the kernel.
+"""
 getbackend(kernel::GPUKernel) = kernel.backend
 
-### Backend ###
+
+### GPU Backend ###
+
+"""
+    BackendCUDA <: AbstractGPUBackend
+
+Represents the CUDA backend for GPU acceleration. This type serves as a concrete
+implementation of `AbstractGPUBackend` for executing GPU computations using CUDA.
+"""
 struct BackendCUDA <: AbstractGPUBackend end
diff --git a/test/GPU/test_adapt.jl b/test/GPU/test_adapt.jl
@@ -19,6 +19,28 @@ function dofs_gpu_kernel(dofs, dh, cv)
     return nothing
 end
 
+weights_cpu(cv) = cv.qr |> getweights
+
+function weights_gpu_kernel(weights, cv)
+    nweights = length(weights)
+    for i in 1:nweights
+        weights[i] = cv.weights[i]
+    end
+end
+
+function nodes_cpu(grid)
+    nodes = grid.cells .|> (x -> x.nodes |> collect)
+    return hcat(nodes...)
+end
+
+function nodes_gpu_kernel(nodes, dh, cv)
+    nbasefuncs = cv |> getnbasefunctions
+    for cell in CellIterator(dh, convert(Int32,nbasefuncs))
+        cnodes = getnodes(cell)
+        nodes[:,cellid(cell)] .= cnodes
+    end
+    return nothing
+end
 
 @testset "Adapt" begin
     dh, cv = generate_problem()
@@ -27,5 +49,17 @@ end
     nbasefunctions = cv |> getnbasefunctions
     gpudofs = zeros(Int32, nbasefunctions, ncells) |> cu
     init_gpu_kernel(BackendCUDA, ncells, nbasefunctions, dofs_gpu_kernel, (gpudofs, dh, cv)) |> launch!
+    ## Test that dofs are correctly transfered to the GPU
     @test all(cpudofs .== gpudofs)
+    ## Test that weights are correctly transfered to the GPU
+    cpuweights = weights_cpu(cv) |> cu
+    gpuweights = zeros(Float32, length(cpuweights)) |> cu
+    @cuda blocks = 1 threads = 1 weights_gpu_kernel(gpuweights, cv)
+    @test all(cpuweights .== gpuweights)
+    ## Test that nodes are correctly transfered to the GPU
+    cpunodes = nodes_cpu(dh |> get_grid) |> cu
+    n_nodes = length(cpunodes)
+    gpu_cellnodes= CUDA.zeros(Int32,nbasefunctions,ncells)
+    init_gpu_kernel(BackendCUDA, ncells, nbasefunctions, nodes_gpu_kernel, (gpu_cellnodes, dh, cv)) |> launch!
+    @test all(cpunodes .== gpu_cellnodes)
 end