Skip to content

Commit

Permalink
Refactor Cuda::GetAffineDeformationField() #92
Browse files Browse the repository at this point in the history
  • Loading branch information
onurulgen committed Jan 25, 2024
1 parent 69c1fe6 commit 45698ba
Show file tree
Hide file tree
Showing 11 changed files with 106 additions and 120 deletions.
2 changes: 1 addition & 1 deletion niftyreg_build_version.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
393
394
6 changes: 3 additions & 3 deletions reg-apps/reg_benchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -255,9 +255,9 @@ int main(int argc, char **argv)
time(&start);
for(int i=0; i<maxIt; ++i)
{
reg_affine_getDeformationField_gpu(affineTransformation,
targetImage,
&deformationFieldImageArray_d);
Cuda::GetAffineDeformationField(affineTransformation,
targetImage,
&deformationFieldImageArray_d);
}
time(&end);
gpuTime=(end-start);
Expand Down
3 changes: 0 additions & 3 deletions reg-lib/cuda/BlockSize.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
namespace NiftyReg {
/* *************************************************************** */
struct BlockSize {
unsigned reg_affine_getDeformationField;
unsigned GetApproxJacobianValues2d;
unsigned GetApproxJacobianValues3d;
unsigned GetJacobianValues2d;
Expand All @@ -35,7 +34,6 @@ struct BlockSize {
/* *************************************************************** */
struct BlockSize100: public BlockSize {
BlockSize100() {
reg_affine_getDeformationField = 512; // 16 reg - 24 smem
GetApproxJacobianValues2d = 384; // 17 reg - 104 smem - 36 cmem
GetApproxJacobianValues3d = 256; // 27 reg - 356 smem - 108 cmem
GetJacobianValues2d = 256; // 29 reg - 32 smem - 16 cmem - 32 lmem
Expand All @@ -58,7 +56,6 @@ struct BlockSize100: public BlockSize {
/* *************************************************************** */
struct BlockSize300: public BlockSize {
BlockSize300() {
reg_affine_getDeformationField = 1024; // 23 reg
GetApproxJacobianValues2d = 768; // 34 reg
GetApproxJacobianValues3d = 640; // 46 reg
GetJacobianValues2d = 768; // 34 reg
Expand Down
2 changes: 1 addition & 1 deletion reg-lib/cuda/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE}
CudaConvolutionKernel.cpp
CudaDefContent.cpp
CudaF3dContent.cpp
CudaGlobalTransformation.cu
CudaKernelConvolution.cu
CudaKernelFactory.cpp
CudaLocalTransformation.cu
Expand All @@ -74,7 +75,6 @@ cuda_add_library(${NAME} ${NIFTYREG_LIBRARY_TYPE}
CudaResampling.cu
CudaTools.cu
resampleKernel.cu
_reg_globalTransformation_gpu.cu
_reg_nmi_gpu.cu
_reg_ssd_gpu.cu
)
Expand Down
4 changes: 2 additions & 2 deletions reg-lib/cuda/CudaCompute.cu
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
#include "CudaCompute.h"
#include "CudaF3dContent.h"
#include "CudaGlobalTransformation.hpp"
#include "CudaKernelConvolution.hpp"
#include "CudaLocalTransformation.hpp"
#include "CudaNormaliseGradient.hpp"
#include "CudaResampling.hpp"
#include "CudaOptimiser.hpp"
#include "_reg_globalTransformation_gpu.h"

/* *************************************************************** */
void CudaCompute::ResampleImage(int interpolation, float paddingValue) {
Expand Down Expand Up @@ -317,7 +317,7 @@ void CudaCompute::ExponentiateGradient(Content& conBwIn) {
if (affineTransformationBw) {
affineDisp = NiftiImage(deformationField, NiftiImage::Copy::ImageInfo);
affineDispCudaVec.resize(defFieldNumber);
reg_affine_getDeformationField_gpu(affineTransformationBw, affineDisp, affineDispCudaVec.data().get());
Cuda::GetAffineDeformationField(affineTransformationBw, affineDisp, affineDispCudaVec.data().get());
Cuda::GetDisplacementFromDeformation(affineDisp, affineDispCudaVec.data().get());
}

Expand Down
64 changes: 64 additions & 0 deletions reg-lib/cuda/CudaGlobalTransformation.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
/*
* CudaGlobalTransformation.cu
*
*
* Created by Marc Modat on 25/03/2009.
* Copyright (c) 2009-2018, University College London
* Copyright (c) 2018, NiftyReg Developers.
* All rights reserved.
* See the LICENSE.txt file in the nifty_reg root folder
*
*/

#include "CudaGlobalTransformation.hpp"
#include "_reg_common_cuda_kernels.cu"

/* *************************************************************** */
template<bool is3d, bool compose>
void GetAffineDeformationField(const mat44 *affineMatrix,
const nifti_image *deformationField,
float4 *deformationFieldCuda) {
const size_t voxelNumber = NiftiImage::calcVoxelNumber(deformationField, is3d ? 3 : 2);
const int3 imageDims = make_int3(deformationField->nx, deformationField->ny, deformationField->nz);
const mat44 *targetMatrix = deformationField->sform_code > 0 ? &deformationField->sto_xyz : &deformationField->qto_xyz;
const mat44 transMatrix = compose ? *affineMatrix : reg_mat44_mul(affineMatrix, targetMatrix);
Cuda::UniqueTextureObjectPtr deformationFieldTexturePtr; cudaTextureObject_t deformationFieldTexture = 0;
if constexpr (compose) {
deformationFieldTexturePtr = Cuda::CreateTextureObject(deformationFieldCuda, voxelNumber, cudaChannelFormatKindFloat, 4);
deformationFieldTexture = *deformationFieldTexturePtr;
}

thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), voxelNumber, [
deformationFieldCuda, deformationFieldTexture, transMatrix, imageDims
]__device__(const int index) {
float voxel[3];
if constexpr (compose) {
float4 defVal = tex1Dfetch<float4>(deformationFieldTexture, index);
voxel[0] = defVal.x; voxel[1] = defVal.y; voxel[2] = defVal.z;
} else {
auto dims = reg_indexToDims_cuda<is3d>(index, imageDims);
voxel[0] = static_cast<float>(dims.x);
voxel[1] = static_cast<float>(dims.y);
voxel[2] = static_cast<float>(dims.z);
}

// The transformation is applied
float position[3];
reg_mat44_mul_cuda<is3d>(transMatrix, voxel, position);

// The deformation field (real coordinates) is stored
deformationFieldCuda[index] = make_float4(position[0], position[1], position[2], 0);
});
}
/* *************************************************************** */
template<bool compose>
void Cuda::GetAffineDeformationField(const mat44 *affineMatrix,
const nifti_image *deformationField,
float4 *deformationFieldCuda) {
auto getAffineDeformationField = deformationField->nz > 1 ? ::GetAffineDeformationField<true, compose> :
::GetAffineDeformationField<false, compose>;
getAffineDeformationField(affineMatrix, deformationField, deformationFieldCuda);
}
template void Cuda::GetAffineDeformationField<false>(const mat44*, const nifti_image*, float4*);
template void Cuda::GetAffineDeformationField<true>(const mat44*, const nifti_image*, float4*);
/* *************************************************************** */
26 changes: 26 additions & 0 deletions reg-lib/cuda/CudaGlobalTransformation.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
/*
* CudaGlobalTransformation.hpp
*
*
* Created by Marc Modat on 25/03/2009.
* Copyright (c) 2009-2018, University College London
* Copyright (c) 2018, NiftyReg Developers.
* All rights reserved.
* See the LICENSE.txt file in the nifty_reg root folder
*
*/

#pragma once

#include "CudaCommon.hpp"

/* *************************************************************** */
namespace NiftyReg::Cuda {
/* *************************************************************** */
template<bool compose=false>
void GetAffineDeformationField(const mat44 *affineMatrix,
const nifti_image *targetImage,
float4 *deformationFieldCuda);
/* *************************************************************** */
} // namespace NiftyReg::Cuda
/* *************************************************************** */
18 changes: 9 additions & 9 deletions reg-lib/cuda/CudaLocalTransformation.cu
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

#include "CudaLocalTransformation.hpp"
#include "CudaLocalTransformationKernels.cu"
#include "_reg_globalTransformation_gpu.h"
#include "CudaGlobalTransformation.hpp"
#include "_reg_splineBasis.h"

/* *************************************************************** */
Expand Down Expand Up @@ -669,8 +669,8 @@ void GetDeformationFieldFromFlowField(nifti_image *flowField,
// Create a field that contains the affine component only
affineOnly = NiftiImage(deformationField, NiftiImage::Copy::ImageInfo);
affineOnlyCudaVec.resize(voxelNumber);
reg_affine_getDeformationField_gpu(reinterpret_cast<mat44*>(flowField->ext_list[0].edata),
affineOnly, affineOnlyCudaVec.data().get());
Cuda::GetAffineDeformationField(reinterpret_cast<mat44*>(flowField->ext_list[0].edata),
affineOnly, affineOnlyCudaVec.data().get());
SubtractImages(flowField, flowFieldCuda, affineOnlyCudaVec.data().get());
}
} else GetDisplacementFromDeformation(flowField, flowFieldCuda);
Expand Down Expand Up @@ -728,8 +728,8 @@ void GetDeformationFieldFromFlowField(nifti_image *flowField,
deformationField->intent_p2 = 0;
// If required an affine component is composed
if (flowField->num_ext > 1)
reg_affine_getDeformationField_gpu(reinterpret_cast<mat44*>(flowField->ext_list[1].edata),
deformationField, deformationFieldCuda, true);
Cuda::GetAffineDeformationField<true>(reinterpret_cast<mat44*>(flowField->ext_list[1].edata),
deformationField, deformationFieldCuda);
}
/* *************************************************************** */
void GetDefFieldFromVelocityGrid(nifti_image *velocityFieldGrid,
Expand Down Expand Up @@ -816,8 +816,8 @@ void GetIntermediateDefFieldFromVelGrid(nifti_image *velocityFieldGrid,
// Create a field that contains the affine component only
affineOnly = NiftiImage(deformationFields[0], NiftiImage::Copy::ImageInfo);
affineOnlyCudaVec.resize(voxelNumber);
reg_affine_getDeformationField_gpu(reinterpret_cast<mat44*>(flowField->ext_list[0].edata),
affineOnly, affineOnlyCudaVec.data().get());
Cuda::GetAffineDeformationField(reinterpret_cast<mat44*>(flowField->ext_list[0].edata),
affineOnly, affineOnlyCudaVec.data().get());
SubtractImages(flowField, flowFieldCuda, affineOnlyCudaVec.data().get());
}
} else GetDisplacementFromDeformation(flowField, flowFieldCuda);
Expand Down Expand Up @@ -856,8 +856,8 @@ void GetIntermediateDefFieldFromVelGrid(nifti_image *velocityFieldGrid,
// If required an affine component is composed
if (velocityFieldGrid->num_ext > 1) {
for (int i = 0; i <= squaringNumber; i++)
reg_affine_getDeformationField_gpu(reinterpret_cast<mat44*>(velocityFieldGrid->ext_list[1].edata),
deformationFields[i], deformationFieldCudaVecs[i].data().get(), true);
Cuda::GetAffineDeformationField<true>(reinterpret_cast<mat44*>(velocityFieldGrid->ext_list[1].edata),
deformationFields[i], deformationFieldCudaVecs[i].data().get());
}
}
/* *************************************************************** */
Expand Down
42 changes: 0 additions & 42 deletions reg-lib/cuda/_reg_globalTransformation_gpu.cu

This file was deleted.

20 changes: 0 additions & 20 deletions reg-lib/cuda/_reg_globalTransformation_gpu.h

This file was deleted.

39 changes: 0 additions & 39 deletions reg-lib/cuda/_reg_globalTransformation_kernels.cu

This file was deleted.

0 comments on commit 45698ba

Please sign in to comment.