Skip to content

Commit

Permalink
add test_adapt, some comments
Browse files Browse the repository at this point in the history
  • Loading branch information
Abdelrahman912 committed Oct 15, 2024
1 parent fbc1b4b commit ea83925
Show file tree
Hide file tree
Showing 3 changed files with 111 additions and 33 deletions.
12 changes: 0 additions & 12 deletions docs/src/literate-tutorials/gpu_qp_heat_equation.jl
Original file line number Diff line number Diff line change
Expand Up @@ -161,15 +161,3 @@ stassy(cv,dh) = assemble_global!(cv,dh,Val(false))
Kstd , Fstd = stassy(cellvalues,dh);
norm(Kstd)




# Benchmarking
## function benchmark_gpu()
## Kgpu = CUSPARSE.CuSparseMatrixCSC(K)
## fgpu = CUDA.zeros(ndofs(dh))
## launch_kernel(assemble_gpu!, (Kgpu,fgpu, cellvalues, dh) , n_cells, n_basefuncs)
## #return (;Kgpu,fgpu)
## end

## CUDA.@profile benchmark_gpu()
98 changes: 77 additions & 21 deletions src/GPU/GPUKernelLauncher.jl
Original file line number Diff line number Diff line change
@@ -1,46 +1,102 @@
#= This file represents the interface between the GPU backend (extension) and the Ferrite package. =#
#=
This file defines the interface between the GPU backend (extension) and the Ferrite package.
It provides abstract types, function signatures, and concrete types for managing GPU kernels
and backends, serving as a foundation for GPU-accelerated computations.
=#

### Abstract types ###
### Abstract Types ###
abstract type AbstractGPUKernel end
abstract type AbstractGPUBackend end


function init_gpu_kernel(backend::AbstractGPUBackend, n_cells::Ti, n_basefuncs::Ti, kernel::Function, args::Tuple) where {Ti<: Integer}
### Functions ###

"""
init_gpu_kernel(backend::AbstractGPUBackend, n_cells::Ti, n_basefuncs::Ti, kernel::Function, args::Tuple) where {Ti <: Integer}
Initializes a GPU kernel with the specified backend, number of cells, base functions,
kernel function, and additional arguments.
# Arguments
- `backend::AbstractGPUBackend`: The GPU backend to use for kernel execution.
- `n_cells::Ti`: Number of cells to be processed by the kernel.
- `n_basefuncs::Ti`: Number of base functions for each cell.
- `kernel::Function`: The kernel function to execute on the GPU.
- `args::Tuple`: Additional arguments required by the kernel.
# Notes
This function needs to be implemented for each specific backend. Calling this function
without a concrete implementation will raise an error.
"""
function init_gpu_kernel(backend::AbstractGPUBackend, n_cells::Ti, n_basefuncs::Ti, kernel::Function, args::Tuple) where {Ti <: Integer}
throw(ErrorException("A concrete implementation of init_gpu_kernel is required"))
end


"""
launch!(::AbstractGPUKernelLauncher)
Interface for launching a kernel on the GPU backend.
launch!(kernel::AbstractGPUKernel)
Launches a GPU kernel using the specified backend. This interface provides a generic
mechanism for running GPU-accelerated computations across different GPU backends.
# Arguments
- `kernel::AbstractGPUKernel`: The GPU kernel to be launched.
# Notes
This function must be implemented for specific GPU kernels. If not implemented,
an error will be thrown.
"""
function launch!(::AbstractGPUKernel)
function launch!(kernel::AbstractGPUKernel)
throw(ErrorException("A concrete implementation of launch! is required"))
end


### Concrete types ###
### Concrete Types ###

### Kernels ###
"""
CUDAKernel{Ti}(n_cells::Int, n_basefuncs::Int)
`CUDAKernel` represents a high-level interface to the CUDA backend for launching and configuring kernels.
GPUKernel{Ti}(n_cells::Ti, n_basefuncs::Ti, kernel::Function, args::Tuple, backend::Type{<:AbstractGPUBackend})
Represents a high-level interface to a GPU backend for configuring and launching GPU kernels.
It stores the necessary parameters for kernel execution, such as the number of cells,
number of base functions, the kernel function, and any additional arguments.
# Fields
- `n_cells::Ti`: number of cells
- `n_basefuncs::Ti`: number of base functions
- `kernel::Function`: kernel function
- `args::Tuple`: arguments to the kernel
- `n_cells::Ti`: Number of cells to be processed.
- `n_basefuncs::Ti`: Number of base functions for each cell.
- `kernel::Function`: The GPU kernel function.
- `args::Tuple`: Additional arguments to be passed to the kernel function.
- `backend::Type{<:AbstractGPUBackend}`: The GPU backend used for execution.
# Type Parameters
- `Ti`: An integer type representing the number type used for `n_cells` and `n_basefuncs`.
"""
struct GPUKernel{Ti} <: AbstractGPUKernel
n_cells::Ti # number of cells
n_basefuncs::Ti # number of base functions
kernel::Function # kernel function
args::Tuple # arguments to the kernel
backend::Type{<:AbstractGPUBackend} # backend
n_cells::Ti # Number of cells
n_basefuncs::Ti # Number of base functions
kernel::Function # Kernel function to execute
args::Tuple # Arguments for the kernel function
backend::Type{<:AbstractGPUBackend} # GPU backend
end

"""
getbackend(kernel::GPUKernel) -> Type{<:AbstractGPUBackend}
Returns the backend associated with the given `GPUKernel`.
# Arguments
- `kernel::GPUKernel`: The GPU kernel from which to retrieve the backend.
# Returns
The backend type associated with the kernel.
"""
getbackend(kernel::GPUKernel) = kernel.backend

### Backend ###

### GPU Backend ###

"""
BackendCUDA <: AbstractGPUBackend
Represents the CUDA backend for GPU acceleration. This type serves as a concrete
implementation of `AbstractGPUBackend` for executing GPU computations using CUDA.
"""
struct BackendCUDA <: AbstractGPUBackend end
34 changes: 34 additions & 0 deletions test/GPU/test_adapt.jl
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,28 @@ function dofs_gpu_kernel(dofs, dh, cv)
return nothing
end

weights_cpu(cv) = cv.qr |> getweights

function weights_gpu_kernel(weights, cv)
nweights = length(weights)
for i in 1:nweights
weights[i] = cv.weights[i]
end
end

function nodes_cpu(grid)
nodes = grid.cells .|> (x -> x.nodes |> collect)
return hcat(nodes...)
end

function nodes_gpu_kernel(nodes, dh, cv)
nbasefuncs = cv |> getnbasefunctions
for cell in CellIterator(dh, convert(Int32,nbasefuncs))
cnodes = getnodes(cell)
nodes[:,cellid(cell)] .= cnodes
end
return nothing
end

@testset "Adapt" begin
dh, cv = generate_problem()
Expand All @@ -27,5 +49,17 @@ end
nbasefunctions = cv |> getnbasefunctions
gpudofs = zeros(Int32, nbasefunctions, ncells) |> cu
init_gpu_kernel(BackendCUDA, ncells, nbasefunctions, dofs_gpu_kernel, (gpudofs, dh, cv)) |> launch!
## Test that dofs are correctly transfered to the GPU
@test all(cpudofs .== gpudofs)
## Test that weights are correctly transfered to the GPU
cpuweights = weights_cpu(cv) |> cu
gpuweights = zeros(Float32, length(cpuweights)) |> cu
@cuda blocks = 1 threads = 1 weights_gpu_kernel(gpuweights, cv)
@test all(cpuweights .== gpuweights)
## Test that nodes are correctly transfered to the GPU
cpunodes = nodes_cpu(dh |> get_grid) |> cu
n_nodes = length(cpunodes)
gpu_cellnodes= CUDA.zeros(Int32,nbasefunctions,ncells)
init_gpu_kernel(BackendCUDA, ncells, nbasefunctions, nodes_gpu_kernel, (gpu_cellnodes, dh, cv)) |> launch!
@test all(cpunodes .== gpu_cellnodes)
end

0 comments on commit ea83925

Please sign in to comment.