From e568f9574fc5da690ec5616b2167c4f81b05a4c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Tue, 5 Mar 2024 13:52:49 +0000 Subject: [PATCH] Refactor _reg_maths* and _reg_common_cuda_kernels --- niftyreg_build_version.txt | 2 +- reg-apps/reg_average.cpp | 21 +- reg-apps/reg_benchmark.cpp | 2 +- reg-apps/reg_gpuinfo.cpp | 2 +- reg-apps/reg_jacobian.cpp | 2 +- reg-apps/reg_resample.cpp | 2 +- reg-apps/reg_tools.cpp | 4 +- reg-apps/reg_transform.cpp | 35 +- reg-io/_reg_ReadWriteMatrix.cpp | 10 +- reg-io/nrrd/reg_nrrd.cpp | 4 +- reg-lib/CMakeLists.txt | 3 +- reg-lib/Debug.hpp | 4 +- reg-lib/_reg_aladin.cpp | 10 +- reg-lib/_reg_aladin_sym.cpp | 11 +- reg-lib/_reg_base.cpp | 4 +- reg-lib/cl/ClAffineDeformationFieldKernel.cpp | 2 +- reg-lib/cpu/{_reg_maths.cpp => Maths.cpp} | 559 +++++++----------- reg-lib/cpu/Maths.hpp | 363 ++++++++++++ reg-lib/cpu/_reg_blockMatching.cpp | 14 +- reg-lib/cpu/_reg_blockMatching.h | 2 +- reg-lib/cpu/_reg_globalTrans.cpp | 206 ++++--- reg-lib/cpu/_reg_localTrans.cpp | 77 ++- reg-lib/cpu/_reg_localTrans_jac.cpp | 72 ++- reg-lib/cpu/_reg_localTrans_regul.cpp | 23 +- reg-lib/cpu/_reg_maths.h | 200 ------- reg-lib/cpu/_reg_maths_eigen.cpp | 205 ------- reg-lib/cpu/_reg_maths_eigen.h | 36 -- reg-lib/cpu/_reg_resampling.cpp | 45 +- reg-lib/cpu/_reg_ssd.cpp | 4 +- reg-lib/cpu/_reg_tools.cpp | 8 +- reg-lib/cpu/_reg_tools.h | 2 +- reg-lib/cuda/CudaCommon.hpp | 16 + reg-lib/cuda/CudaGlobalTransformation.cu | 7 +- reg-lib/cuda/CudaLocalTransformation.cu | 20 +- .../cuda/CudaLocalTransformationKernels.cu | 147 ++++- reg-lib/cuda/CudaOptimiser.cu | 1 - reg-lib/cuda/CudaResampling.cu | 15 +- reg-lib/cuda/CudaTools.cu | 10 +- reg-lib/cuda/CudaToolsKernels.cu | 8 +- reg-lib/cuda/_reg_common_cuda_kernels.cu | 157 ----- reg-lib/cuda/_reg_nmi_gpu.cu | 1 - reg-lib/cuda/affineDeformationKernel.cu | 2 +- reg-lib/cuda/blockMatchingKernel.cu | 2 +- reg-lib/cuda/resampleKernel.cu | 14 +- reg-test/reg_test_affineDeformationField.cpp | 6 +- reg-test/reg_test_be.cpp | 4 +- reg-test/reg_test_blockMatching.cpp | 2 +- reg-test/reg_test_regr_lts.cpp | 4 +- .../reg_test_voxelCentricToNodeCentric.cpp | 12 +- 49 files changed, 1026 insertions(+), 1336 deletions(-) rename reg-lib/cpu/{_reg_maths.cpp => Maths.cpp} (50%) create mode 100644 reg-lib/cpu/Maths.hpp delete mode 100644 reg-lib/cpu/_reg_maths.h delete mode 100644 reg-lib/cpu/_reg_maths_eigen.cpp delete mode 100644 reg-lib/cpu/_reg_maths_eigen.h delete mode 100644 reg-lib/cuda/_reg_common_cuda_kernels.cu diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 29aae8ee..7b53aa00 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -418 +419 diff --git a/reg-apps/reg_average.cpp b/reg-apps/reg_average.cpp index 0b57a922..07446e4d 100644 --- a/reg-apps/reg_average.cpp +++ b/reg-apps/reg_average.cpp @@ -18,7 +18,6 @@ #include "_reg_resampling.h" #include "_reg_globalTrans.h" #include "_reg_localTrans.h" -#include "_reg_maths_eigen.h" using PrecisionType = float; @@ -112,7 +111,7 @@ mat44 compute_average_matrices(size_t matrixNumber, // Input matrices are logged in place for(size_t m=0; mlts_inlier * matrixNumber; --m) @@ -205,7 +204,7 @@ mat44 compute_average_matrices(size_t matrixNumber, } } // The average matrix is exponentiated - average_matrix = reg_mat44_expm(&average_matrix); + average_matrix = Mat44Expm(&average_matrix); } // iteration number // Free the allocated array free(matrixWeight); @@ -230,15 +229,15 @@ mat44 compute_affine_demean(size_t matrixNumber, tempMatrix=nifti_quatern_to_mat44(qb,qc,qd,qx,qy,qz,1.f,1.f,1.f,qfac); // remove the rigid componenent from the affine matrix tempMatrix=nifti_mat44_inverse(tempMatrix); - tempMatrix=reg_mat44_mul(&tempMatrix,¤t_affine); + tempMatrix=tempMatrix * current_affine; // sum up all the affine matrices - tempMatrix = reg_mat44_logm(&tempMatrix); + tempMatrix = Mat44Logm(&tempMatrix); demeanMatrix = demeanMatrix + tempMatrix; } // The average matrix is normalised - demeanMatrix = reg_mat44_mul(&demeanMatrix,1.f/(float)matrixNumber); + demeanMatrix = demeanMatrix * (1.f / (float)matrixNumber); // The average matrix is exponentiated - demeanMatrix = reg_mat44_expm(&demeanMatrix); + demeanMatrix = Mat44Expm(&demeanMatrix); // The average matrix is inverted demeanMatrix = nifti_mat44_inverse(demeanMatrix); return demeanMatrix; @@ -293,11 +292,7 @@ int compute_nrr_demean(nifti_image *demean_field, affineTransformation=*reinterpret_cast(transformation->ext_list[0].edata); // Note that if the transformation is a flow field, only half-of the affine has be used if(transformation->num_ext>1 && deformationField->intent_p1!=DEF_VEL_FIELD) - { - affineTransformation=reg_mat44_mul( - reinterpret_cast(transformation->ext_list[1].edata), - &affineTransformation); - } + affineTransformation=reinterpret_cast(*transformation->ext_list[1].edata) * affineTransformation; } else reg_tool_ReadAffineFile(&affineTransformation,inputAffName[t]); // The affine component is substracted diff --git a/reg-apps/reg_benchmark.cpp b/reg-apps/reg_benchmark.cpp index 828b050e..dd439f62 100644 --- a/reg-apps/reg_benchmark.cpp +++ b/reg-apps/reg_benchmark.cpp @@ -153,7 +153,7 @@ int main(int argc, char **argv) originIndex[0] = -1.0f; originIndex[1] = -1.0f; originIndex[2] = -1.0f; - reg_mat44_mul(&(controlPointImage->qto_xyz), originIndex, originReal); + Mat44Mul(controlPointImage->qto_xyz, originIndex, originReal); controlPointImage->qto_xyz.m[0][3] = controlPointImage->qoffset_x = originReal[0]; controlPointImage->qto_xyz.m[1][3] = controlPointImage->qoffset_y = originReal[1]; controlPointImage->qto_xyz.m[2][3] = controlPointImage->qoffset_z = originReal[2]; diff --git a/reg-apps/reg_gpuinfo.cpp b/reg-apps/reg_gpuinfo.cpp index d4858ead..3f051047 100644 --- a/reg-apps/reg_gpuinfo.cpp +++ b/reg-apps/reg_gpuinfo.cpp @@ -1,4 +1,4 @@ -#include "_reg_maths.h" +#include "Maths.hpp" #include "Platform.h" #ifdef USE_CUDA diff --git a/reg-apps/reg_jacobian.cpp b/reg-apps/reg_jacobian.cpp index 27b517bf..06507407 100644 --- a/reg-apps/reg_jacobian.cpp +++ b/reg-apps/reg_jacobian.cpp @@ -233,7 +233,7 @@ int main(int argc, char **argv) if(!reg_isAnImageFileName(param->inputTransName)){ mat44 *affineTransformation=(mat44 *)malloc(sizeof(mat44)); reg_tool_ReadAffineFile(affineTransformation,param->inputTransName); - NR_COUT << reg_mat44_det(affineTransformation) << std::endl; + NR_COUT << Mat44Det(affineTransformation) << std::endl; return EXIT_SUCCESS; } diff --git a/reg-apps/reg_resample.cpp b/reg-apps/reg_resample.cpp index e2fe543d..9ab79df2 100755 --- a/reg-apps/reg_resample.cpp +++ b/reg-apps/reg_resample.cpp @@ -307,7 +307,7 @@ int main(int argc, char **argv) else { // No transformation is specified, an identity transformation is used - reg_mat44_eye(&inputAffineTransformation); + Mat44Eye(&inputAffineTransformation); } // Create a deformation field diff --git a/reg-apps/reg_tools.cpp b/reg-apps/reg_tools.cpp index 70ff5741..76b55ba5 100755 --- a/reg-apps/reg_tools.cpp +++ b/reg-apps/reg_tools.cpp @@ -498,7 +498,7 @@ int main(int argc, char **argv) { reg_tools_changeDatatype(image); nifti_image *normImage = nifti_dup(*image); - reg_heapSort(static_cast(normImage->data), normImage->nvox); + HeapSort(static_cast(normImage->data), normImage->nvox); float minValue = static_cast(normImage->data)[Floor(03*(int)normImage->nvox/100)]; float maxValue = static_cast(normImage->data)[Floor(97*(int)normImage->nvox/100)]; reg_tools_subtractValueFromImage(image,normImage,minValue); @@ -892,7 +892,7 @@ int main(int argc, char **argv) const size_t jacobianVoxelNumber = NiftiImage::calcVoxelNumber(def, 3); mat33 *jacobian = (mat33 *)malloc(jacobianVoxelNumber * sizeof(mat33)); for (size_t i = 0; i < jacobianVoxelNumber; ++i) - reg_mat33_eye(&jacobian[i]); + Mat33Eye(&jacobian[i]); // resample the original image into the space of the new image if(flag->interpFlag == 0){ param->interpOrder = 3; diff --git a/reg-apps/reg_transform.cpp b/reg-apps/reg_transform.cpp index 4cf0bfe5..ba427d31 100755 --- a/reg-apps/reg_transform.cpp +++ b/reg-apps/reg_transform.cpp @@ -16,7 +16,6 @@ #include "_reg_globalTrans.h" #include "_reg_localTrans.h" #include "_reg_tools.h" -#include "_reg_maths_eigen.h" #include "reg_transform.h" @@ -532,7 +531,7 @@ int main(int argc, char **argv) { if (affine1Trans != nullptr && affine2Trans != nullptr) { NR_INFO("Transformation 2 is an affine parametrisation:"); NR_INFO(param->input2TransName); - *affine1Trans = reg_mat44_mul(affine2Trans, affine1Trans); + *affine1Trans = *affine2Trans * *affine1Trans; reg_tool_WriteAffineFile(affine1Trans, param->outputTransName); } else { // Check if the reference image is required @@ -955,10 +954,10 @@ int main(int argc, char **argv) { // Update the sform if (image->sform_code > 0) { - image->sto_xyz = reg_mat44_mul(affineTransformation, &(image->sto_xyz)); + image->sto_xyz = *affineTransformation * image->sto_xyz; } else { image->sform_code = 1; - image->sto_xyz = reg_mat44_mul(affineTransformation, &(image->qto_xyz)); + image->sto_xyz = *affineTransformation * image->qto_xyz; } image->sto_ijk = nifti_mat44_inverse(image->sto_xyz); @@ -980,9 +979,9 @@ int main(int argc, char **argv) { affineTrans = (mat44 *)malloc(sizeof(mat44)); reg_tool_ReadAffineFile(affineTrans, param->inputTransName); // The affine transformation is halfed - *affineTrans = reg_mat44_logm(affineTrans); - *affineTrans = reg_mat44_mul(affineTrans, 0.5); - *affineTrans = reg_mat44_expm(affineTrans); + *affineTrans = Mat44Logm(affineTrans); + *affineTrans = *affineTrans * 0.5; + *affineTrans = Mat44Expm(affineTrans); // The affine transformation is saved reg_tool_WriteAffineFile(affineTrans, param->outputTransName); } else { @@ -1183,17 +1182,17 @@ int main(int argc, char **argv) { if (flag->makeAffFlag) { // Create all the required matrices mat44 rotationX; - reg_mat44_eye(&rotationX); + Mat44Eye(&rotationX); mat44 translation; - reg_mat44_eye(&translation); + Mat44Eye(&translation); mat44 rotationY; - reg_mat44_eye(&rotationY); + Mat44Eye(&rotationY); mat44 rotationZ; - reg_mat44_eye(&rotationZ); + Mat44Eye(&rotationZ); mat44 scaling; - reg_mat44_eye(&scaling); + Mat44Eye(&scaling); mat44 shearing; - reg_mat44_eye(&shearing); + Mat44Eye(&shearing); // Set up the rotation matrix along the YZ plane rotationX.m[1][1] = cosf(param->affTransParam[0]); rotationX.m[1][2] = -sinf(param->affTransParam[0]); @@ -1222,11 +1221,11 @@ int main(int argc, char **argv) { shearing.m[2][0] = param->affTransParam[10]; shearing.m[2][1] = param->affTransParam[11]; // Combine all the transformations - mat44 affine = reg_mat44_mul(&rotationY, &rotationZ); - affine = reg_mat44_mul(&rotationX, &affine); - affine = reg_mat44_mul(&scaling, &affine); - affine = reg_mat44_mul(&shearing, &affine); - affine = reg_mat44_mul(&translation, &affine); + mat44 affine = rotationY * rotationZ; + affine = rotationX * affine; + affine = scaling * affine; + affine = shearing * affine; + affine = translation * affine; // Save the new matrix reg_tool_WriteAffineFile(&affine, param->outputTransName); } diff --git a/reg-io/_reg_ReadWriteMatrix.cpp b/reg-io/_reg_ReadWriteMatrix.cpp index 8b399680..e37fc34c 100644 --- a/reg-io/_reg_ReadWriteMatrix.cpp +++ b/reg-io/_reg_ReadWriteMatrix.cpp @@ -70,11 +70,11 @@ void reg_tool_ReadAffineFile(mat44 *mat, absoluteFloating = nifti_mat44_inverse(absoluteFloating); *mat = nifti_mat44_inverse(*mat); - *mat = reg_mat44_mul(&absoluteFloating, mat); - *mat = reg_mat44_mul(mat, &absoluteReference); - *mat = reg_mat44_mul(floatingMatrix, mat); + *mat = absoluteFloating * *mat; + *mat = *mat * absoluteReference; + *mat = *floatingMatrix * *mat; mat44 tmp = nifti_mat44_inverse(*referenceMatrix); - *mat = reg_mat44_mul(mat, &tmp); + *mat = *mat * tmp; } NR_MAT44_DEBUG(*mat, "Affine matrix"); @@ -168,7 +168,7 @@ template T** reg_tool_ReadMatrixFile(char *filename, size_t nbLine, size_t nbColumn) { //THEN CONSTRUCT THE MATRIX // Allocate the matrices - T** mat = reg_matrix2DAllocate(nbLine, nbColumn); + T** mat = Matrix2dAlloc(nbLine, nbColumn); //STORE THE VALUES std::string line; std::ifstream matrixFile(filename); diff --git a/reg-io/nrrd/reg_nrrd.cpp b/reg-io/nrrd/reg_nrrd.cpp index 225d6f11..9462a634 100644 --- a/reg-io/nrrd/reg_nrrd.cpp +++ b/reg-io/nrrd/reg_nrrd.cpp @@ -167,7 +167,7 @@ nifti_image *reg_io_nrdd2nifti(Nrrd *nrrdImage) // The space orientation is extracted and converted into a matrix mat44 qform_orientation_matrix; - reg_mat44_eye(&qform_orientation_matrix); + Mat44Eye(&qform_orientation_matrix); if(nrrdImage->space==nrrdSpaceRightAnteriorSuperior || nrrdImage->space==nrrdSpaceRightAnteriorSuperiorTime || nrrdImage->space==nrrdSpace3DRightHanded || @@ -251,7 +251,7 @@ nifti_image *reg_io_nrdd2nifti(Nrrd *nrrdImage) if(nrrdImage->axis[1].spaceDirection[0]!=std::numeric_limits::quiet_NaN()) { niiImage->sform_code=1; - reg_mat44_eye(&niiImage->sto_xyz); + Mat44Eye(&niiImage->sto_xyz); for(int i=0; i<(niiImage->ndim<3?niiImage->ndim:3); ++i) { for(int j=0; j<(niiImage->ndim<3?niiImage->ndim:3); ++j) diff --git a/reg-lib/CMakeLists.txt b/reg-lib/CMakeLists.txt index ac7a34b1..039e26ee 100755 --- a/reg-lib/CMakeLists.txt +++ b/reg-lib/CMakeLists.txt @@ -12,8 +12,7 @@ endif(USE_OPENCL) ##BUILD THE CPU LIBRARIES #----------------------------------------------------------------------------- add_library(_reg_maths ${NIFTYREG_LIBRARY_TYPE} - cpu/_reg_maths.cpp - cpu/_reg_maths_eigen.cpp + cpu/Maths.cpp ) install(TARGETS _reg_maths RUNTIME DESTINATION bin diff --git a/reg-lib/Debug.hpp b/reg-lib/Debug.hpp index 93e452eb..826b13ed 100644 --- a/reg-lib/Debug.hpp +++ b/reg-lib/Debug.hpp @@ -68,11 +68,11 @@ inline std::string StripFunctionName(const std::string& funcName) { #define NR_INFO(msg) NR_COUT << "[NiftyReg INFO] " << msg << std::endl /* *************************************************************** */ #ifndef NDEBUG -#define NR_MAT44(mat, title) reg_mat44_disp(mat, "[NiftyReg DEBUG] "s + (title)) +#define NR_MAT44(mat, title) Mat44Disp(mat, "[NiftyReg DEBUG] "s + (title)) #define NR_MAT44_DEBUG(mat, title) NR_MAT44(mat, title) #define NR_MAT44_VERBOSE(mat, title) NR_MAT44(mat, title) #else -#define NR_MAT44(mat, title) reg_mat44_disp(mat, title) +#define NR_MAT44(mat, title) Mat44Disp(mat, title) #define NR_MAT44_DEBUG(mat, title) #define NR_MAT44_VERBOSE(mat, title) if (this->verbose) NR_MAT44(mat, "[NiftyReg INFO] "s + (title)) #endif diff --git a/reg-lib/_reg_aladin.cpp b/reg-lib/_reg_aladin.cpp index 35b5a2dd..032aeb97 100644 --- a/reg-lib/_reg_aladin.cpp +++ b/reg-lib/_reg_aladin.cpp @@ -177,9 +177,9 @@ void reg_aladin::InitialiseRegistration() { referenceCenter[2] = (float)(this->inputReference->nz) / 2.0f; //From pixel coordinates to real coordinates float floatingRealPosition[3]; - reg_mat44_mul(floatingMatrix, floatingCenter, floatingRealPosition); + Mat44Mul(*floatingMatrix, floatingCenter, floatingRealPosition); float referenceRealPosition[3]; - reg_mat44_mul(referenceMatrix, referenceCenter, referenceRealPosition); + Mat44Mul(*referenceMatrix, referenceCenter, referenceRealPosition); //Set translation to the transformation matrix this->affineTransformation->m[0][3] = floatingRealPosition[0] - referenceRealPosition[0]; this->affineTransformation->m[1][3] = floatingRealPosition[1] - referenceRealPosition[1]; @@ -207,7 +207,7 @@ void reg_aladin::InitialiseRegistration() { referenceCentre[2] /= referenceCount; float refCOM[3]{}; if (this->inputReference->sform_code > 0) - reg_mat44_mul(&this->inputReference->sto_xyz, referenceCentre, refCOM); + Mat44Mul(this->inputReference->sto_xyz, referenceCentre, refCOM); float floatingCentre[3] = { 0, 0, 0 }; float floatingCount = 0; @@ -231,8 +231,8 @@ void reg_aladin::InitialiseRegistration() { floatingCentre[2] /= floatingCount; float floCOM[3]{}; if (this->inputFloating->sform_code > 0) - reg_mat44_mul(&this->inputFloating->sto_xyz, floatingCentre, floCOM); - reg_mat44_eye(this->affineTransformation.get()); + Mat44Mul(this->inputFloating->sto_xyz, floatingCentre, floCOM); + Mat44Eye(this->affineTransformation.get()); this->affineTransformation->m[0][3] = floCOM[0] - refCOM[0]; this->affineTransformation->m[1][3] = floCOM[1] - refCOM[1]; this->affineTransformation->m[2][3] = floCOM[2] - refCOM[2]; diff --git a/reg-lib/_reg_aladin_sym.cpp b/reg-lib/_reg_aladin_sym.cpp index 610405bd..62cdd753 100644 --- a/reg-lib/_reg_aladin_sym.cpp +++ b/reg-lib/_reg_aladin_sym.cpp @@ -1,5 +1,4 @@ #include "_reg_aladin_sym.h" -#include "_reg_maths_eigen.h" /* *************************************************************** */ template @@ -81,7 +80,7 @@ void reg_aladin_sym::InitialiseRegistration() { referenceCentre[2] /= referenceCount; float refCOG[3]{}; if (this->inputReference->sform_code > 0) - reg_mat44_mul(&(this->inputReference->sto_xyz), referenceCentre, refCOG); + Mat44Mul(this->inputReference->sto_xyz, referenceCentre, refCOG); float floatingCentre[3] = { 0, 0, 0 }; float floatingCount = 0; @@ -106,8 +105,8 @@ void reg_aladin_sym::InitialiseRegistration() { floatingCentre[2] /= floatingCount; float floCOG[3]{}; if (this->inputFloating->sform_code > 0) - reg_mat44_mul(&(this->inputFloating->sto_xyz), floatingCentre, floCOG); - reg_mat44_eye(this->affineTransformation.get()); + Mat44Mul(this->inputFloating->sto_xyz, floatingCentre, floCOG); + Mat44Eye(this->affineTransformation.get()); this->affineTransformation->m[0][3] = floCOG[0] - refCOG[0]; this->affineTransformation->m[1][3] = floCOG[1] - refCOG[1]; this->affineTransformation->m[2][3] = floCOG[2] - refCOG[2]; @@ -143,9 +142,9 @@ void reg_aladin_sym::UpdateTransformationMatrix(int type) { mat44 bInverted = nifti_mat44_inverse(*this->affineTransformationBw); // We average the forward and inverted backward matrix - *this->affineTransformation = reg_mat44_avg2(this->affineTransformation.get(), &bInverted); + *this->affineTransformation = Mat44Avg2(this->affineTransformation.get(), &bInverted); // We average the inverted forward and backward matrix - *this->affineTransformationBw = reg_mat44_avg2(&fInverted, this->affineTransformationBw.get()); + *this->affineTransformationBw = Mat44Avg2(&fInverted, this->affineTransformationBw.get()); for (int i = 0; i < 3; ++i) { this->affineTransformation->m[3][i] = 0.f; this->affineTransformationBw->m[3][i] = 0.f; diff --git a/reg-lib/_reg_base.cpp b/reg-lib/_reg_base.cpp index cc5e8be5..4eb441ef 100644 --- a/reg-lib/_reg_base.cpp +++ b/reg-lib/_reg_base.cpp @@ -402,7 +402,7 @@ void reg_base::Initialise() { reg_tools_changeDatatype(tmpReference); // Extract the robust range of the reference image T *refDataPtr = static_cast(tmpReference->data); - reg_heapSort(refDataPtr, tmpReference->nvox); + HeapSort(refDataPtr, tmpReference->nvox); // Update the reference threshold values if no value has been setup by the user if (referenceThresholdLow[0] == std::numeric_limits::lowest()) referenceThresholdLow[0] = refDataPtr[Round((float)tmpReference->nvox * 0.02f)]; @@ -414,7 +414,7 @@ void reg_base::Initialise() { reg_tools_changeDatatype(tmpFloating); // Extract the robust range of the floating image T *floDataPtr = static_cast(tmpFloating->data); - reg_heapSort(floDataPtr, tmpFloating->nvox); + HeapSort(floDataPtr, tmpFloating->nvox); // Update the floating threshold values if no value has been setup by the user if (floatingThresholdLow[0] == std::numeric_limits::lowest()) floatingThresholdLow[0] = floDataPtr[Round((float)tmpFloating->nvox * 0.02f)]; diff --git a/reg-lib/cl/ClAffineDeformationFieldKernel.cpp b/reg-lib/cl/ClAffineDeformationFieldKernel.cpp index 073fcaa6..8314a51f 100644 --- a/reg-lib/cl/ClAffineDeformationFieldKernel.cpp +++ b/reg-lib/cl/ClAffineDeformationFieldKernel.cpp @@ -89,7 +89,7 @@ void ClAffineDeformationFieldKernel::Calculate(bool compose) { const size_t globalWorkSize[dims] = {xBlocks * xThreads, yBlocks * yThreads, zBlocks * zThreads}; const size_t localWorkSize[dims] = {xThreads, yThreads, zThreads}; - mat44 transformationMatrix = compose ? *affineTransformation : reg_mat44_mul(affineTransformation, referenceMatrix); + mat44 transformationMatrix = compose ? *affineTransformation : *affineTransformation * *referenceMatrix; float* trans = (float *)malloc(16 * sizeof(float)); mat44ToCptr(transformationMatrix, trans); diff --git a/reg-lib/cpu/_reg_maths.cpp b/reg-lib/cpu/Maths.cpp similarity index 50% rename from reg-lib/cpu/_reg_maths.cpp rename to reg-lib/cpu/Maths.cpp index 19ed9210..6abb2f72 100644 --- a/reg-lib/cpu/_reg_maths.cpp +++ b/reg-lib/cpu/Maths.cpp @@ -1,86 +1,29 @@ -#include "_reg_tools.h" +#define USE_EIGEN -#define mat(i,j,dim) mat[i*dim+j] +#include "_reg_tools.h" +#include "Eigen/Core" +#include "unsupported/Eigen/MatrixFunctions" /* *************************************************************** */ -template -T* reg_matrix1DAllocate(size_t arraySize) { - T* res = (T*)malloc(arraySize * sizeof(T)); - return res; -} -template bool* reg_matrix1DAllocate(size_t arraySize); -template float* reg_matrix1DAllocate(size_t arraySize); -template double* reg_matrix1DAllocate(size_t arraySize); -/* *************************************************************** */ -template -void reg_matrix1DDeallocate(T* mat) { - free(mat); -} -template void reg_matrix1DDeallocate(bool* mat); -template void reg_matrix1DDeallocate(float* mat); -template void reg_matrix1DDeallocate(double* mat); -/* *************************************************************** */ -template -T** reg_matrix2DAllocate(size_t arraySizeX, size_t arraySizeY) { - T** res; - res = (T**)malloc(arraySizeX * sizeof(T*)); - for (size_t i = 0; i < arraySizeX; i++) { - res[i] = (T*)malloc(arraySizeY * sizeof(T)); - } - return res; -} -template float** reg_matrix2DAllocate(size_t arraySizeX, size_t arraySizeY); -template double** reg_matrix2DAllocate(size_t arraySizeX, size_t arraySizeY); +namespace NiftyReg { /* *************************************************************** */ template -void reg_matrix2DDeallocate(size_t arraySizeX, T** mat) { - for (size_t i = 0; i < arraySizeX; i++) { - free(mat[i]); - } - free(mat); -} -template void reg_matrix2DDeallocate(size_t arraySizeX, float** mat); -template void reg_matrix2DDeallocate(size_t arraySizeX, double** mat); -/* *************************************************************** */ -template -T** reg_matrix2DTranspose(T** mat, size_t arraySizeX, size_t arraySizeY) { - T** res; - res = (T**)malloc(arraySizeY * sizeof(T*)); - for (size_t i = 0; i < arraySizeY; i++) { - res[i] = (T*)malloc(arraySizeX * sizeof(T)); - } - for (size_t i = 0; i < arraySizeX; i++) { - for (size_t j = 0; j < arraySizeY; j++) { - res[j][i] = mat[i][j]; - } - } - return res; -} -template float** reg_matrix2DTranspose(float** mat, size_t arraySizeX, size_t arraySizeY); -template double** reg_matrix2DTranspose(double** mat, size_t arraySizeX, size_t arraySizeY); -/* *************************************************************** */ -template -T** reg_matrix2DMultiply(T** mat1, size_t mat1X, size_t mat1Y, T** mat2, size_t mat2X, size_t mat2Y, bool transposeMat2) { +T** Matrix2dMultiply(T **mat1, const size_t mat1X, const size_t mat1Y, T **mat2, const size_t mat2X, const size_t mat2Y, const bool transposeMat2) { if (transposeMat2 == false) { // First check that the dimension are appropriate if (mat1Y != mat2X) NR_FATAL_ERROR("Matrices can not be multiplied due to their size: [" + std::to_string(mat1X) + " " + std::to_string(mat1Y) + "] [" + std::to_string(mat2X) + " " + std::to_string(mat2Y) + "]"); - size_t nbElement = mat1Y; - double resTemp = 0; - T** res = reg_matrix2DAllocate(mat1X, mat2Y); - + T **res = Matrix2dAlloc(mat1X, mat2Y); for (size_t i = 0; i < mat1X; i++) { for (size_t j = 0; j < mat2Y; j++) { - resTemp = 0; - for (size_t k = 0; k < nbElement; k++) { + double resTemp = 0; + for (size_t k = 0; k < mat1Y; k++) resTemp += static_cast(mat1[i][k]) * static_cast(mat2[k][j]); - } res[i][j] = static_cast(resTemp); } } - //Output return res; } else { // First check that the dimension are appropriate @@ -88,43 +31,34 @@ T** reg_matrix2DMultiply(T** mat1, size_t mat1X, size_t mat1Y, T** mat2, size_t NR_FATAL_ERROR("Matrices can not be multiplied due to their size: [" + std::to_string(mat1X) + " " + std::to_string(mat1Y) + "] [" + std::to_string(mat2Y) + " " + std::to_string(mat2X) + "]"); - size_t nbElement = mat1Y; - double resTemp = 0; - T** res = reg_matrix2DAllocate(mat1X, mat2X); - + T **res = Matrix2dAlloc(mat1X, mat2X); for (size_t i = 0; i < mat1X; i++) { for (size_t j = 0; j < mat2X; j++) { - resTemp = 0; - for (size_t k = 0; k < nbElement; k++) { + double resTemp = 0; + for (size_t k = 0; k < mat1Y; k++) resTemp += static_cast(mat1[i][k]) * static_cast(mat2[j][k]); - } res[i][j] = static_cast(resTemp); } } - //Output return res; } } -template float** reg_matrix2DMultiply(float** mat1, size_t mat1X, size_t mat1Y, float** mat2, size_t mat2X, size_t mat2Y, bool transposeMat2); -template double** reg_matrix2DMultiply(double** mat1, size_t mat1X, size_t mat1Y, double** mat2, size_t mat2X, size_t mat2Y, bool transposeMat2); +template float** Matrix2dMultiply(float** mat1, const size_t mat1X, const size_t mat1Y, float** mat2, const size_t mat2X, const size_t mat2Y, const bool transposeMat2); +template double** Matrix2dMultiply(double** mat1, const size_t mat1X, const size_t mat1Y, double** mat2, const size_t mat2X, const size_t mat2Y, const bool transposeMat2); /* *************************************************************** */ template -void reg_matrix2DMultiply(T** mat1, size_t mat1X, size_t mat1Y, T** mat2, size_t mat2X, size_t mat2Y, T** resT, bool transposeMat2) { +void Matrix2dMultiply(T **mat1, const size_t mat1X, const size_t mat1Y, T **mat2, const size_t mat2X, const size_t mat2Y, T **resT, const bool transposeMat2) { if (transposeMat2 == false) { // First check that the dimension are appropriate if (mat1Y != mat2X) NR_FATAL_ERROR("Matrices can not be multiplied due to their size: [" + std::to_string(mat1X) + " " + std::to_string(mat1Y) + "] [" + std::to_string(mat2X) + " " + std::to_string(mat2Y) + "]"); - size_t nbElement = mat1Y; - double resTemp; - for (size_t i = 0; i < mat1X; i++) { for (size_t j = 0; j < mat2Y; j++) { - resTemp = 0; - for (size_t k = 0; k < nbElement; k++) { + double resTemp = 0; + for (size_t k = 0; k < mat1Y; k++) resTemp += static_cast(mat1[i][k]) * static_cast(mat2[k][j]); - } resT[i][j] = static_cast(resTemp); } } @@ -134,27 +68,23 @@ void reg_matrix2DMultiply(T** mat1, size_t mat1X, size_t mat1Y, T** mat2, size_t NR_FATAL_ERROR("Matrices can not be multiplied due to their size: [" + std::to_string(mat1X) + " " + std::to_string(mat1Y) + "] [" + std::to_string(mat2Y) + " " + std::to_string(mat2X) + "]"); - size_t nbElement = mat1Y; - double resTemp; - for (size_t i = 0; i < mat1X; i++) { for (size_t j = 0; j < mat2X; j++) { - resTemp = 0; - for (size_t k = 0; k < nbElement; k++) { + double resTemp = 0; + for (size_t k = 0; k < mat1Y; k++) resTemp += static_cast(mat1[i][k]) * static_cast(mat2[j][k]); - } resT[i][j] = static_cast(resTemp); } } } } -template void reg_matrix2DMultiply(float** mat1, size_t mat1X, size_t mat1Y, float** mat2, size_t mat2X, size_t mat2Y, float** resT, bool transposeMat2); -template void reg_matrix2DMultiply(double** mat1, size_t mat1X, size_t mat1Y, double** mat2, size_t mat2X, size_t mat2Y, double** resT, bool transposeMat2); +template void Matrix2dMultiply(float** mat1, const size_t mat1X, const size_t mat1Y, float** mat2, const size_t mat2X, const size_t mat2Y, float** resT, const bool transposeMat2); +template void Matrix2dMultiply(double** mat1, const size_t mat1X, const size_t mat1Y, double** mat2, const size_t mat2X, const size_t mat2Y, double** resT, const bool transposeMat2); /* *************************************************************** */ // Multiply a matrix with a vector - we assume correct dimension template -T* reg_matrix2DVectorMultiply(T** mat, size_t m, size_t n, T* vect) { - T* res = reg_matrix1DAllocate(m); +T* Matrix2dVectorMultiply(T **mat, const size_t m, const size_t n, T* vect) { + T* res = Matrix1dAlloc(m); for (size_t i = 0; i < m; i++) { double resTemp = 0; for (size_t k = 0; k < n; k++) { @@ -164,11 +94,11 @@ T* reg_matrix2DVectorMultiply(T** mat, size_t m, size_t n, T* vect) { } return res; } -template float* reg_matrix2DVectorMultiply(float** mat, size_t m, size_t n, float* vect); -template double* reg_matrix2DVectorMultiply(double** mat, size_t m, size_t n, double* vect); +template float* Matrix2dVectorMultiply(float** mat, const size_t m, const size_t n, float* vect); +template double* Matrix2dVectorMultiply(double** mat, const size_t m, const size_t n, double* vect); /* *************************************************************** */ template -void reg_matrix2DVectorMultiply(T** mat, size_t m, size_t n, T* vect, T* res) { +void Matrix2dVectorMultiply(T **mat, const size_t m, const size_t n, T* vect, T* res) { for (size_t i = 0; i < m; i++) { double resTemp = 0; for (size_t k = 0; k < n; k++) { @@ -177,11 +107,11 @@ void reg_matrix2DVectorMultiply(T** mat, size_t m, size_t n, T* vect, T* res) { res[i] = static_cast(resTemp); } } -template void reg_matrix2DVectorMultiply(float** mat, size_t m, size_t n, float* vect, float* res); -template void reg_matrix2DVectorMultiply(double** mat, size_t m, size_t n, double* vect, double* res); +template void Matrix2dVectorMultiply(float** mat, const size_t m, const size_t n, float* vect, float* res); +template void Matrix2dVectorMultiply(double** mat, const size_t m, const size_t n, double* vect, double* res); /* *************************************************************** */ // Heap sort -void reg_heapSort(float *array_tmp, int *index_tmp, int blockNum) { +void HeapSort(float *array_tmp, int *index_tmp, int blockNum) { float *array = &array_tmp[-1]; int *index = &index_tmp[-1]; int l = (blockNum >> 1) + 1; @@ -223,7 +153,7 @@ void reg_heapSort(float *array_tmp, int *index_tmp, int blockNum) { /* *************************************************************** */ // Heap sort template -void reg_heapSort(DataType *array_tmp, int blockNum) { +void HeapSort(DataType *array_tmp, int blockNum) { DataType *array = &array_tmp[-1]; int l = (blockNum >> 1) + 1; int ir = blockNum; @@ -254,31 +184,11 @@ void reg_heapSort(DataType *array_tmp, int blockNum) { array[i] = val; } } -template void reg_heapSort(float *array_tmp, int blockNum); -template void reg_heapSort(double *array_tmp, int blockNum); -/* *************************************************************** */ -bool operator==(mat44 A, mat44 B) { - for (unsigned i = 0; i < 4; ++i) { - for (unsigned j = 0; j < 4; ++j) { - if (A.m[i][j] != B.m[i][j]) - return false; - } - } - return true; -} -/* *************************************************************** */ -bool operator!=(mat44 A, mat44 B) { - for (unsigned i = 0; i < 4; ++i) { - for (unsigned j = 0; j < 4; ++j) { - if (A.m[i][j] != B.m[i][j]) - return true; - } - } - return false; -} +template void HeapSort(float *array_tmp, int blockNum); +template void HeapSort(double *array_tmp, int blockNum); /* *************************************************************** */ template -T reg_mat44_det(mat44 const* A) { +T Mat44Det(const mat44 *A) { double D = static_cast(A->m[0][0]) * static_cast(A->m[1][1]) * static_cast(A->m[2][2]) * static_cast(A->m[3][3]) - static_cast(A->m[0][0]) * static_cast(A->m[1][1]) * static_cast(A->m[3][2]) * static_cast(A->m[2][3]) @@ -306,129 +216,10 @@ T reg_mat44_det(mat44 const* A) { + static_cast(A->m[3][0]) * static_cast(A->m[2][1]) * static_cast(A->m[1][2]) * static_cast(A->m[0][3]); return static_cast(D); } -template float reg_mat44_det(mat44 const* A); -template double reg_mat44_det(mat44 const* A); -/* *************************************************************** */ -void reg_mat33_to_nan(mat33 *A) { - for (int i = 0; i < 3; ++i) - for (int j = 0; j < 3; ++j) - A->m[i][j] = std::numeric_limits::quiet_NaN(); -} -/* *************************************************************** */ -mat33 reg_mat44_to_mat33(mat44 const* A) { - mat33 out; - out.m[0][0] = A->m[0][0]; - out.m[0][1] = A->m[0][1]; - out.m[0][2] = A->m[0][2]; - out.m[1][0] = A->m[1][0]; - out.m[1][1] = A->m[1][1]; - out.m[1][2] = A->m[1][2]; - out.m[2][0] = A->m[2][0]; - out.m[2][1] = A->m[2][1]; - out.m[2][2] = A->m[2][2]; - return out; -} -/* *************************************************************** */ -mat44 reg_mat44_mul(mat44 const* A, mat44 const* B) { - mat44 R; - for (int i = 0; i < 4; i++) { - for (int j = 0; j < 4; j++) { - R.m[i][j] = static_cast(static_cast(A->m[i][0]) * static_cast(B->m[0][j]) + - static_cast(A->m[i][1]) * static_cast(B->m[1][j]) + - static_cast(A->m[i][2]) * static_cast(B->m[2][j]) + - static_cast(A->m[i][3]) * static_cast(B->m[3][j])); - } - } - return R; -} -/* *************************************************************** */ -mat44 operator*(mat44 A, mat44 B) { - return reg_mat44_mul(&A, &B); -} -/* *************************************************************** */ -void reg_mat33_mul(mat44 const* mat, float const* in, float *out) { - out[0] = static_cast(static_cast(in[0]) * static_cast(mat->m[0][0]) + - static_cast(in[1]) * static_cast(mat->m[0][1]) + - static_cast(mat->m[0][3])); - out[1] = static_cast(static_cast(in[0]) * static_cast(mat->m[1][0]) + - static_cast(in[1]) * static_cast(mat->m[1][1]) + - static_cast(mat->m[1][3])); -} -/* *************************************************************** */ -void reg_mat33_mul(mat33 const* mat, float const* in, float *out) { - out[0] = static_cast(static_cast(in[0]) * static_cast(mat->m[0][0]) + - static_cast(in[1]) * static_cast(mat->m[0][1]) + - static_cast(mat->m[0][2])); - out[1] = static_cast(static_cast(in[0]) * static_cast(mat->m[1][0]) + - static_cast(in[1]) * static_cast(mat->m[1][1]) + - static_cast(mat->m[1][2])); -} -/* *************************************************************** */ -mat33 reg_mat33_mul(mat33 const* A, mat33 const* B) { - mat33 R; - for (int i = 0; i < 3; i++) { - for (int j = 0; j < 3; j++) { - R.m[i][j] = static_cast(static_cast(A->m[i][0]) * static_cast(B->m[0][j]) + - static_cast(A->m[i][1]) * static_cast(B->m[1][j]) + - static_cast(A->m[i][2]) * static_cast(B->m[2][j])); - } - } - return R; -} -/* *************************************************************** */ -mat33 operator*(mat33 A, mat33 B) { - return reg_mat33_mul(&A, &B); -} -/* *************************************************************** */ -mat33 reg_mat33_add(mat33 const* A, mat33 const* B) { - mat33 R; - for (int i = 0; i < 3; i++) { - for (int j = 0; j < 3; j++) { - R.m[i][j] = static_cast(static_cast(A->m[i][j]) + static_cast(B->m[i][j])); - } - } - return R; -} +template float Mat44Det(const mat44 *A); +template double Mat44Det(const mat44 *A); /* *************************************************************** */ -mat33 reg_mat33_trans(mat33 A) { - mat33 R; - for (int i = 0; i < 3; i++) { - for (int j = 0; j < 3; j++) { - R.m[j][i] = A.m[i][j]; - } - } - return R; -} -/* *************************************************************** */ -mat33 operator+(mat33 A, mat33 B) { - return reg_mat33_add(&A, &B); -} -/* *************************************************************** */ -mat44 reg_mat44_add(mat44 const* A, mat44 const* B) { - mat44 R; - for (int i = 0; i < 4; i++) { - for (int j = 0; j < 4; j++) { - R.m[i][j] = static_cast(static_cast(A->m[i][j]) + static_cast(B->m[i][j])); - } - } - return R; -} -/* *************************************************************** */ -mat44 operator+(mat44 A, mat44 B) { - return reg_mat44_add(&A, &B); -} -/* *************************************************************** */ -mat33 reg_mat33_minus(mat33 const* A, mat33 const* B) { - mat33 R; - for (int i = 0; i < 3; i++) { - for (int j = 0; j < 3; j++) { - R.m[i][j] = static_cast(static_cast(A->m[i][j]) - static_cast(B->m[i][j])); - } - } - return R; -} -/* *************************************************************** */ -void reg_mat33_diagonalize(mat33 const* A, mat33 * Q, mat33 * D) { +void Mat33Diagonalize(const mat33 *A, mat33 *Q, mat33 *D) { // A must be a symmetric matrix. // returns Q and D such that // Diagonal matrix D = QT * A * Q; and A = Q*D*QT @@ -522,126 +313,188 @@ void reg_mat33_diagonalize(mat33 const* A, mat33 * Q, mat33 * D) { q[3] /= mq; } } - -/* *************************************************************** */ -mat33 operator-(mat33 A, mat33 B) { - return reg_mat33_minus(&A, &B); -} /* *************************************************************** */ -void reg_mat33_eye(mat33 *mat) { - mat->m[0][0] = 1.f; - mat->m[0][1] = mat->m[0][2] = 0.f; - mat->m[1][1] = 1.f; - mat->m[1][0] = mat->m[1][2] = 0.f; - mat->m[2][2] = 1.f; - mat->m[2][0] = mat->m[2][1] = 0.f; +void Mat44Disp(const mat44 mat, const std::string& title) { + NR_COUT << title << ":\n" + << mat.m[0][0] << "\t" << mat.m[0][1] << "\t" << mat.m[0][2] << "\t" << mat.m[0][3] << "\n" + << mat.m[1][0] << "\t" << mat.m[1][1] << "\t" << mat.m[1][2] << "\t" << mat.m[1][3] << "\n" + << mat.m[2][0] << "\t" << mat.m[2][1] << "\t" << mat.m[2][2] << "\t" << mat.m[2][3] << "\n" + << mat.m[3][0] << "\t" << mat.m[3][1] << "\t" << mat.m[3][2] << "\t" << mat.m[3][3] << std::endl; } /* *************************************************************** */ -mat44 reg_mat44_minus(mat44 const* A, mat44 const* B) { - mat44 R; - for (int i = 0; i < 4; i++) { - for (int j = 0; j < 4; j++) { - R.m[i][j] = static_cast(static_cast(A->m[i][j]) - static_cast(B->m[i][j])); - } +/** @brief SVD +* @param in input matrix to decompose - in place +* @param size_m row +* @param size_n colomn +* @param w diagonal term +* @param v rotation part +*/ +template +void Svd(T **in, const size_t size_m, const size_t size_n, T * w, T **v) { + if (size_m == 0 || size_n == 0) + NR_FATAL_ERROR("The specified matrix is empty"); + +#ifdef _WIN32 + long sm, sn, sn2; + long size__m = (long)size_m, size__n = (long)size_n; +#else + size_t sm, sn, sn2; + size_t size__m = size_m, size__n = size_n; +#endif + Eigen::MatrixXd m(size_m, size_n); + + //Convert to Eigen matrix +#ifdef _OPENMP +#pragma omp parallel for default(none) \ + shared(in,m, size__m, size__n) \ + private(sn) +#endif + for (sm = 0; sm < size__m; sm++) + for (sn = 0; sn < size__n; sn++) + m(sm, sn) = static_cast(in[sm][sn]); + + Eigen::JacobiSVD Svd(m, Eigen::ComputeThinU | Eigen::ComputeThinV); + +#ifdef _OPENMP +#pragma omp parallel for default(none) \ + shared(in,Svd,v,w, size__n,size__m) \ + private(sn2, sm) +#endif + for (sn = 0; sn < size__n; sn++) { + w[sn] = static_cast(Svd.singularValues()(sn)); + for (sn2 = 0; sn2 < size__n; sn2++) + v[sn2][sn] = static_cast(Svd.matrixV()(sn2, sn)); + for (sm = 0; sm < size__m; sm++) + in[sm][sn] = static_cast(Svd.matrixU()(sm, sn)); } - return R; -} -/* *************************************************************** */ -mat44 operator-(mat44 A, mat44 B) { - return reg_mat44_minus(&A, &B); -} -/* *************************************************************** */ -void reg_mat44_eye(mat44 *mat) { - mat->m[0][0] = 1.f; - mat->m[0][1] = mat->m[0][2] = mat->m[0][3] = 0.f; - mat->m[1][1] = 1.f; - mat->m[1][0] = mat->m[1][2] = mat->m[1][3] = 0.f; - mat->m[2][2] = 1.f; - mat->m[2][0] = mat->m[2][1] = mat->m[2][3] = 0.f; - mat->m[3][3] = 1.f; - mat->m[3][0] = mat->m[3][1] = mat->m[3][2] = 0.f; } +template void Svd(float **in, const size_t m, const size_t n, float * w, float **v); +template void Svd(double **in, const size_t m, const size_t n, double * w, double **v); /* *************************************************************** */ -void reg_mat44_mul(mat44 const* mat, - float const* in, - float *out) { - out[0] = static_cast(static_cast(mat->m[0][0]) * static_cast(in[0]) + - static_cast(mat->m[0][1]) * static_cast(in[1]) + - static_cast(mat->m[0][2]) * static_cast(in[2]) + - static_cast(mat->m[0][3])); - out[1] = static_cast(static_cast(mat->m[1][0]) * static_cast(in[0]) + - static_cast(mat->m[1][1]) * static_cast(in[1]) + - static_cast(mat->m[1][2]) * static_cast(in[2]) + - static_cast(mat->m[1][3])); - out[2] = static_cast(static_cast(mat->m[2][0]) * static_cast(in[0]) + - static_cast(mat->m[2][1]) * static_cast(in[1]) + - static_cast(mat->m[2][2]) * static_cast(in[2]) + - static_cast(mat->m[2][3])); +template +T Matrix2dDet(T **mat, const size_t m, const size_t n) { + if (m != n) + NR_FATAL_ERROR("The matrix have to be square: [" + std::to_string(m) + " " + std::to_string(n) + "]"); + + double res; + if (m == 2) { + res = static_cast(mat[0][0]) * static_cast(mat[1][1]) - static_cast(mat[1][0]) * static_cast(mat[0][1]); + } else if (m == 3) { + res = (static_cast(mat[0][0]) * (static_cast(mat[1][1]) * static_cast(mat[2][2]) - static_cast(mat[1][2]) * static_cast(mat[2][1]))) - + (static_cast(mat[0][1]) * (static_cast(mat[1][0]) * static_cast(mat[2][2]) - static_cast(mat[1][2]) * static_cast(mat[2][0]))) + + (static_cast(mat[0][2]) * (static_cast(mat[1][0]) * static_cast(mat[2][1]) - static_cast(mat[1][1]) * static_cast(mat[2][0]))); + } else { + // Convert to Eigen format + Eigen::MatrixXd eigenRes(m, n); + for (size_t i = 0; i < m; i++) + for (size_t j = 0; j < n; j++) + eigenRes(i, j) = static_cast(mat[i][j]); + res = eigenRes.determinant(); + } + return static_cast(res); } +template float Matrix2dDet(float **mat, const size_t m, const size_t n); +template double Matrix2dDet(double **mat, const size_t m, const size_t n); /* *************************************************************** */ -void reg_mat44_mul(mat44 const* mat, - double const* in, - double *out) { - double matD[4][4]; - for (int i = 0; i < 4; ++i) - for (int j = 0; j < 4; ++j) - matD[i][j] = static_cast(mat->m[i][j]); +void Mat33Expm(mat33 *tensorIn) { + int sm, sn; + Eigen::Matrix3d tensor; + + // Convert to Eigen format + for (sm = 0; sm < 3; sm++) { + for (sn = 0; sn < 3; sn++) { + float val = tensorIn->m[sm][sn]; + if (val != val) return; + tensor(sm, sn) = static_cast(val); + } + } + + // Compute exp(E) + tensor = tensor.exp(); + + // Convert the result to mat33 format + for (sm = 0; sm < 3; sm++) + for (sn = 0; sn < 3; sn++) + tensorIn->m[sm][sn] = static_cast(tensor(sm, sn)); +} +/* *************************************************************** */ +mat44 Mat44Expm(const mat44 *mat) { + mat44 X; + Eigen::Matrix4d m; + for (size_t i = 0; i < 4; ++i) + for (size_t j = 0; j < 4; ++j) + m(i, j) = static_cast(mat->m[i][j]); - out[0] = matD[0][0] * in[0] + - matD[0][1] * in[1] + - matD[0][2] * in[2] + - matD[0][3]; - out[1] = matD[1][0] * in[0] + - matD[1][1] * in[1] + - matD[1][2] * in[2] + - matD[1][3]; - out[2] = matD[2][0] * in[0] + - matD[2][1] * in[1] + - matD[2][2] * in[2] + - matD[2][3]; - return; + m = m.exp(); + + for (size_t i = 0; i < 4; ++i) + for (size_t j = 0; j < 4; ++j) + X.m[i][j] = static_cast(m(i, j)); + + return X; } /* *************************************************************** */ -mat44 reg_mat44_mul(mat44 const* A, double scalar) { +void Mat33Logm(mat33 *tensorIn) { + int sm, sn; + Eigen::Matrix3d tensor; + + // Convert to Eigen format + bool all_zeros = true; + double det = 0; + for (sm = 0; sm < 3; sm++) { + for (sn = 0; sn < 3; sn++) { + float val = tensorIn->m[sm][sn]; + if (val != 0.f) all_zeros = false; + if (val != val) return; + tensor(sm, sn) = static_cast(val); + } + } + // Actually R case requires invertible and no negative real ev, + // but the only observed case so far was non-invertible. + // determinant is not a perfect check for invertibility and + // identity with zero not great either, but the alternative + // is a general eigensolver and the logarithm function should + // suceed unless convergence just isn't happening. + det = tensor.determinant(); + if (all_zeros || det == 0) { + Mat33ToNan(tensorIn); + return; + } + + // Compute the actual matrix log + tensor = tensor.log(); + + // Convert the result to mat33 format + for (sm = 0; sm < 3; sm++) + for (sn = 0; sn < 3; sn++) + tensorIn->m[sm][sn] = static_cast(tensor(sm, sn)); +} +/* *************************************************************** */ +mat44 Mat44Logm(const mat44 *mat) { + mat44 X; + Eigen::Matrix4d m; + for (char i = 0; i < 4; ++i) + for (char j = 0; j < 4; ++j) + m(i, j) = static_cast(mat->m[i][j]); + m = m.log(); + for (char i = 0; i < 4; ++i) + for (char j = 0; j < 4; ++j) + X.m[i][j] = static_cast(m(i, j)); + return X; +} +/* *************************************************************** */ +mat44 Mat44Avg2(const mat44 *A, const mat44 *B) { mat44 out; - out.m[0][0] = A->m[0][0] * scalar; - out.m[0][1] = A->m[0][1] * scalar; - out.m[0][2] = A->m[0][2] * scalar; - out.m[0][3] = A->m[0][3] * scalar; - out.m[1][0] = A->m[1][0] * scalar; - out.m[1][1] = A->m[1][1] * scalar; - out.m[1][2] = A->m[1][2] * scalar; - out.m[1][3] = A->m[1][3] * scalar; - out.m[2][0] = A->m[2][0] * scalar; - out.m[2][1] = A->m[2][1] * scalar; - out.m[2][2] = A->m[2][2] * scalar; - out.m[2][3] = A->m[2][3] * scalar; - out.m[3][0] = A->m[3][0] * scalar; - out.m[3][1] = A->m[3][1] * scalar; - out.m[3][2] = A->m[3][2] * scalar; - out.m[3][3] = A->m[3][3] * scalar; - return out; -} -/* *************************************************************** */ -void reg_mat44_disp(const mat44& mat, const std::string& title) { - NR_COUT << title << ":\n" - << mat.m[0][0] << "\t" << mat.m[0][1] << "\t" << mat.m[0][2] << "\t" << mat.m[0][3] << "\n" - << mat.m[1][0] << "\t" << mat.m[1][1] << "\t" << mat.m[1][2] << "\t" << mat.m[1][3] << "\n" - << mat.m[2][0] << "\t" << mat.m[2][1] << "\t" << mat.m[2][2] << "\t" << mat.m[2][3] << "\n" - << mat.m[3][0] << "\t" << mat.m[3][1] << "\t" << mat.m[3][2] << "\t" << mat.m[3][3] << std::endl; -} -/* *************************************************************** */ -//is it square distance or just distance? -// Helper function: Get the square of the Euclidean distance -double get_square_distance3D(float * first_point3D, float * second_point3D) { - return sqrt(Square(first_point3D[0] - second_point3D[0]) + - Square(first_point3D[1] - second_point3D[1]) + - Square(first_point3D[2] - second_point3D[2])); + mat44 logA = Mat44Logm(A); + mat44 logB = Mat44Logm(B); + for (int i = 0; i < 4; ++i) { + logA.m[3][i] = 0.f; + logB.m[3][i] = 0.f; + } + logA = logA + logB; + out = logA * 0.5; + return Mat44Expm(&out); } /* *************************************************************** */ -//is it square distance or just distance? -double get_square_distance2D(float * first_point2D, float * second_point2D) { - return sqrt(Square(first_point2D[0] - second_point2D[0]) + - Square(first_point2D[1] - second_point2D[1])); -} +} // namespace NiftyReg /* *************************************************************** */ diff --git a/reg-lib/cpu/Maths.hpp b/reg-lib/cpu/Maths.hpp new file mode 100644 index 00000000..56782eda --- /dev/null +++ b/reg-lib/cpu/Maths.hpp @@ -0,0 +1,363 @@ +/** + * @file Maths.hpp + * @brief Library that contains small math routines + * @author Marc Modat + * @date 25/03/2009 + * + * Created by Marc Modat on 25/03/2009. + * Copyright (c) 2009-2018, University College London + * Copyright (c) 2018, NiftyReg Developers. + * All rights reserved. + * See the LICENSE.txt file in the nifty_reg root folder + * + */ + +#pragma once + +#include "RNifti.h" + +#ifdef _OPENMP +#include +#endif + +#if USE_SSE +#include +#include +#ifdef __SSE3__ +#include +#endif +#endif + +#define _USE_MATH_DEFINES +#include + +#ifdef __CUDACC__ +#define DEVICE __host__ __device__ +#else +#define DEVICE +#endif + +typedef enum { + DEF_FIELD, + DISP_FIELD, + CUB_SPLINE_GRID, + DEF_VEL_FIELD, + DISP_VEL_FIELD, + SPLINE_VEL_GRID, + LIN_SPLINE_GRID +} NREG_TRANS_TYPE; + +/* *************************************************************** */ +namespace NiftyReg { +/* *************************************************************** */ +// The functions in the standard library are slower; so, these are implemented +template +DEVICE inline T Square(const T& x) { + return x * x; +} +template +DEVICE inline T Cube(const T& x) { + return x * x * x; +} +template +DEVICE inline int Floor(const T& x) { + const int i = static_cast(x); + return i - (x < i); +} +template +DEVICE inline int Ceil(const T& x) { + const int i = static_cast(x); + return i + (x > i); +} +template +DEVICE inline int Round(const T& x) { + return static_cast(x + (x >= 0 ? 0.5 : -0.5)); +} +/* *************************************************************** */ +DEVICE inline void Divide(const int num, const int denom, int& quot, int& rem) { + // This will be optimised by the compiler into a single div instruction + quot = num / denom; + rem = num % denom; +} +/* *************************************************************** */ +template +DEVICE inline T* Matrix1dAlloc(const size_t arraySize) { + return static_cast(malloc(arraySize * sizeof(T))); +} +/* *************************************************************** */ +template +DEVICE inline void Matrix1dDealloc(T *mat) { + free(mat); +} +/* *************************************************************** */ +template +DEVICE inline T** Matrix2dAlloc(const size_t arraySizeX, const size_t arraySizeY) { + T **res; + res = static_cast(malloc(arraySizeX * sizeof(T*))); + for (size_t i = 0; i < arraySizeX; i++) + res[i] = static_cast(malloc(arraySizeY * sizeof(T))); + return res; +} +/* *************************************************************** */ +template +DEVICE inline void Matrix2dDealloc(const size_t arraySizeX, T **mat) { + for (size_t i = 0; i < arraySizeX; i++) + free(mat[i]); + free(mat); +} +/* *************************************************************** */ +template +DEVICE inline T** Matrix2dTranspose(T **mat, const size_t arraySizeX, const size_t arraySizeY) { + T **res; + res = static_cast(malloc(arraySizeY * sizeof(T*))); + for (size_t i = 0; i < arraySizeY; i++) + res[i] = static_cast(malloc(arraySizeX * sizeof(T))); + for (size_t i = 0; i < arraySizeX; i++) + for (size_t j = 0; j < arraySizeY; j++) + res[j][i] = mat[i][j]; + return res; +} +/* *************************************************************** */ +template +T** Matrix2dMultiply(T **mat1, const size_t mat1X, const size_t mat1Y, T **mat2, const size_t mat2X, const size_t mat2Y, const bool transposeMat2); +template +void Matrix2dMultiply(T **mat1, const size_t mat1X, const size_t mat1Y, T **mat2, const size_t mat2X, const size_t mat2Y, T **res, const bool transposeMat2); +/* *************************************************************** */ +template +T* Matrix2dVectorMultiply(T **mat, const size_t m, const size_t n, T *vect); +template +void Matrix2dVectorMultiply(T **mat, const size_t m, const size_t n, T *vect, T *res); +/* *************************************************************** */ +/// @brief Subtract two 3-by-3 matrices +DEVICE inline mat33 operator-(const mat33 A, const mat33 B) { + mat33 R; + for (int i = 0; i < 3; i++) + for (int j = 0; j < 3; j++) + R.m[i][j] = static_cast(static_cast(A.m[i][j]) - static_cast(B.m[i][j])); + return R; +} +/* *************************************************************** */ +/// @brief Multiply two 3-by-3 matrices +DEVICE inline mat33 operator*(const mat33 A, const mat33 B) { + mat33 R; + for (int i = 0; i < 3; i++) + for (int j = 0; j < 3; j++) + R.m[i][j] = static_cast(static_cast(A.m[i][0]) * static_cast(B.m[0][j]) + + static_cast(A.m[i][1]) * static_cast(B.m[1][j]) + + static_cast(A.m[i][2]) * static_cast(B.m[2][j])); + return R; +} +/* *************************************************************** */ +/// @brief Multiply a vector with a 3-by-3 matrix +DEVICE inline void Mat33Mul(const mat33 mat, const float(&in)[2], float(&out)[2]) { + out[0] = static_cast(static_cast(in[0]) * static_cast(mat.m[0][0]) + + static_cast(in[1]) * static_cast(mat.m[0][1]) + + static_cast(mat.m[0][2])); + out[1] = static_cast(static_cast(in[0]) * static_cast(mat.m[1][0]) + + static_cast(in[1]) * static_cast(mat.m[1][1]) + + static_cast(mat.m[1][2])); +} +/* *************************************************************** */ +/// @brief Multiply a vector with a 3-by-3 matrix +DEVICE inline void Mat33Mul(const mat44 mat, const float (&in)[2], float (&out)[2]) { + out[0] = static_cast(static_cast(in[0]) * static_cast(mat.m[0][0]) + + static_cast(in[1]) * static_cast(mat.m[0][1]) + + static_cast(mat.m[0][3])); + out[1] = static_cast(static_cast(in[0]) * static_cast(mat.m[1][0]) + + static_cast(in[1]) * static_cast(mat.m[1][1]) + + static_cast(mat.m[1][3])); +} +/* *************************************************************** */ +/// @brief Multiply a scalar with a 3-by-3 matrix multiplied by a vector +template +DEVICE inline void Mat33Mul(const mat33 mat, const float (&in)[3], const float weight, float (&out)[3]) { + out[0] = weight * (mat.m[0][0] * in[0] + mat.m[1][0] * in[1] + mat.m[2][0] * in[2]); + out[1] = weight * (mat.m[0][1] * in[0] + mat.m[1][1] * in[1] + mat.m[2][1] * in[2]); + if constexpr (is3d) + out[2] = weight * (mat.m[0][2] * in[0] + mat.m[1][2] * in[1] + mat.m[2][2] * in[2]); +} +/* *************************************************************** */ +/// @brief Transpose a 3-by-3 matrix +DEVICE inline mat33 Mat33Trans(const mat33 A) { + mat33 R; + for (int i = 0; i < 3; i++) + for (int j = 0; j < 3; j++) + R.m[j][i] = A.m[i][j]; + return R; +} +/* *************************************************************** */ +/// @brief Diagonalize a 3-by-3 matrix +void Mat33Diagonalize(const mat33 *A, mat33 *Q, mat33 *D); +/* *************************************************************** */ +/// @brief Set up a 3-by-3 matrix with an identity +DEVICE inline void Mat33Eye(mat33 *mat) { + mat->m[0][0] = 1.f; + mat->m[0][1] = mat->m[0][2] = 0.f; + mat->m[1][1] = 1.f; + mat->m[1][0] = mat->m[1][2] = 0.f; + mat->m[2][2] = 1.f; + mat->m[2][0] = mat->m[2][1] = 0.f; +} +/* *************************************************************** */ +DEVICE inline void Mat33ToNan(mat33 *A) { + for (int i = 0; i < 3; ++i) + for (int j = 0; j < 3; ++j) + A->m[i][j] = std::numeric_limits::quiet_NaN(); +} +/* *************************************************************** */ +/// @brief Transform a mat44 to a mat33 matrix +DEVICE inline mat33 Mat44ToMat33(const mat44 *A) { + mat33 out; + out.m[0][0] = A->m[0][0]; + out.m[0][1] = A->m[0][1]; + out.m[0][2] = A->m[0][2]; + out.m[1][0] = A->m[1][0]; + out.m[1][1] = A->m[1][1]; + out.m[1][2] = A->m[1][2]; + out.m[2][0] = A->m[2][0]; + out.m[2][1] = A->m[2][1]; + out.m[2][2] = A->m[2][2]; + return out; +} +/* *************************************************************** */ +template +void HeapSort(T *array_tmp, int blockNum); +void HeapSort(float *array_tmp, int *index_tmp, int blockNum); +/* *************************************************************** */ +DEVICE inline bool operator==(const mat44 A, const mat44 B) { + for (char i = 0; i < 4; ++i) + for (char j = 0; j < 4; ++j) + if (A.m[i][j] != B.m[i][j]) + return false; + return true; +} +/* *************************************************************** */ +DEVICE inline bool operator!=(const mat44 A, const mat44 B) { + return !(A == B); +} +/* *************************************************************** */ +/// @brief Multiply two 4-by-4 matrices +DEVICE inline mat44 operator*(const mat44 A, const mat44 B) { + mat44 R; + for (int i = 0; i < 4; i++) + for (int j = 0; j < 4; j++) + R.m[i][j] = static_cast(static_cast(A.m[i][0]) * static_cast(B.m[0][j]) + + static_cast(A.m[i][1]) * static_cast(B.m[1][j]) + + static_cast(A.m[i][2]) * static_cast(B.m[2][j]) + + static_cast(A.m[i][3]) * static_cast(B.m[3][j])); + return R; +} +/* *************************************************************** */ +/// @brief Multiply a 4-by-4 matrix with a scalar +DEVICE inline mat44 operator*(const mat44 mat, const double scalar) { + mat44 out; + out.m[0][0] = mat.m[0][0] * scalar; + out.m[0][1] = mat.m[0][1] * scalar; + out.m[0][2] = mat.m[0][2] * scalar; + out.m[0][3] = mat.m[0][3] * scalar; + out.m[1][0] = mat.m[1][0] * scalar; + out.m[1][1] = mat.m[1][1] * scalar; + out.m[1][2] = mat.m[1][2] * scalar; + out.m[1][3] = mat.m[1][3] * scalar; + out.m[2][0] = mat.m[2][0] * scalar; + out.m[2][1] = mat.m[2][1] * scalar; + out.m[2][2] = mat.m[2][2] * scalar; + out.m[2][3] = mat.m[2][3] * scalar; + out.m[3][0] = mat.m[3][0] * scalar; + out.m[3][1] = mat.m[3][1] * scalar; + out.m[3][2] = mat.m[3][2] * scalar; + out.m[3][3] = mat.m[3][3] * scalar; + return out; +} +/* *************************************************************** */ +/// @brief Multiply a vector with a 4-by-4 matrix +template +DEVICE inline void Mat44Mul(const mat44 mat, const T(&in)[3], T(&out)[3]) { + out[0] = static_cast(static_cast(mat.m[0][0]) * static_cast(in[0]) + + static_cast(mat.m[0][1]) * static_cast(in[1]) + + static_cast(mat.m[0][2]) * static_cast(in[2]) + + static_cast(mat.m[0][3])); + out[1] = static_cast(static_cast(mat.m[1][0]) * static_cast(in[0]) + + static_cast(mat.m[1][1]) * static_cast(in[1]) + + static_cast(mat.m[1][2]) * static_cast(in[2]) + + static_cast(mat.m[1][3])); + if constexpr (is3d) + out[2] = static_cast(static_cast(mat.m[2][0]) * static_cast(in[0]) + + static_cast(mat.m[2][1]) * static_cast(in[1]) + + static_cast(mat.m[2][2]) * static_cast(in[2]) + + static_cast(mat.m[2][3])); +} +/* *************************************************************** */ +/// @brief Add two 4-by-4 matrices +DEVICE inline mat44 operator+(const mat44 A, const mat44 B) { + mat44 R; + for (int i = 0; i < 4; i++) + for (int j = 0; j < 4; j++) + R.m[i][j] = static_cast(static_cast(A.m[i][j]) + static_cast(B.m[i][j])); + return R; +} +/* *************************************************************** */ +/// @brief Subtract two 4-by-4 matrices +DEVICE inline mat44 operator-(const mat44 A, const mat44 B) { + mat44 R; + for (int i = 0; i < 4; i++) + for (int j = 0; j < 4; j++) + R.m[i][j] = static_cast(static_cast(A.m[i][j]) - static_cast(B.m[i][j])); + return R; +} +/* *************************************************************** */ +/// @brief Set up a 4-by-4 matrix with an identity +DEVICE inline void Mat44Eye(mat44 *mat) { + mat->m[0][0] = 1.f; + mat->m[0][1] = mat->m[0][2] = mat->m[0][3] = 0.f; + mat->m[1][1] = 1.f; + mat->m[1][0] = mat->m[1][2] = mat->m[1][3] = 0.f; + mat->m[2][2] = 1.f; + mat->m[2][0] = mat->m[2][1] = mat->m[2][3] = 0.f; + mat->m[3][3] = 1.f; + mat->m[3][0] = mat->m[3][1] = mat->m[3][2] = 0.f; +} +/* *************************************************************** */ +/// @brief Compute the determinant of a 4-by-4 matrix +template +T Mat44Det(const mat44 *A); +/* *************************************************************** */ +/// @brief Display a mat44 matrix +void Mat44Disp(const mat44 mat, const std::string& title); +/* *************************************************************** */ +//is it square distance or just distance? +DEVICE inline double SquareDistance2d(const float *first_point2D, const float *second_point2D) { + return sqrt(Square(first_point2D[0] - second_point2D[0]) + + Square(first_point2D[1] - second_point2D[1])); +} +/* *************************************************************** */ +//is it square distance or just distance? +DEVICE inline double SquareDistance3d(const float *first_point3D, const float *second_point3D) { + return sqrt(Square(first_point3D[0] - second_point3D[0]) + + Square(first_point3D[1] - second_point3D[1]) + + Square(first_point3D[2] - second_point3D[2])); +} +/* *************************************************************** */ +template +void Svd(T **in, const size_t m, const size_t n, T *w, T **v); +/* *************************************************************** */ +template +T Matrix2dDet(T **mat, const size_t m, const size_t n); +/* *************************************************************** */ +/// @brief Compute the log of a 3-by-3 matrix +void Mat33Expm(mat33 *tensorIn); +/* *************************************************************** */ +/// @brief Compute the exp of a 4-by-4 matrix +mat44 Mat44Expm(const mat44 *mat); +/* *************************************************************** */ +/// @brief Compute the log of a 3-by-3 matrix +void Mat33Logm(mat33 *tensorIn); +/* *************************************************************** */ +/// @brief Compute the log of a 4-by-4 matrix +mat44 Mat44Logm(const mat44 *mat); +/* *************************************************************** */ +/// @brief Compute the average of two matrices using a log-euclidean framework +mat44 Mat44Avg2(const mat44 *A, const mat44 *b); +/* *************************************************************** */ +} // namespace NiftyReg +/* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_blockMatching.cpp b/reg-lib/cpu/_reg_blockMatching.cpp index e91ef03a..fce081f5 100755 --- a/reg-lib/cpu/_reg_blockMatching.cpp +++ b/reg-lib/cpu/_reg_blockMatching.cpp @@ -185,7 +185,7 @@ void _reg_set_active_blocks(nifti_image *referenceImage, _reg_blockMatchingParam params->activeBlockNumber = params->activeBlockNumber < ((int)params->totalBlockNumber - unusableBlock) ? params->activeBlockNumber : (params->totalBlockNumber - unusableBlock); //params->activeBlockNumber = params->totalBlockNumber - unusableBlock; - reg_heapSort(varianceArray, indexArray, params->totalBlockNumber); + HeapSort(varianceArray, indexArray, params->totalBlockNumber); int *indexArrayPtr = &indexArray[params->totalBlockNumber - 1]; int count = 0; for (int i = 0; i < params->activeBlockNumber; i++) { @@ -432,13 +432,13 @@ void block_matching_method2D(nifti_image * reference, nifti_image * warped, _reg bestDisplacement[1] += referencePosition_temp[1]; bestDisplacement[2] = 0.0f; - reg_mat44_mul(referenceMatrix_xyz, referencePosition_temp, tempPosition); + Mat44Mul(*referenceMatrix_xyz, referencePosition_temp, tempPosition); z = 2 * params->totalBlock[blockIndex]; params->referencePosition[z] = tempPosition[0]; params->referencePosition[z + 1] = tempPosition[1]; - reg_mat44_mul(referenceMatrix_xyz, bestDisplacement, tempPosition); + Mat44Mul(*referenceMatrix_xyz, bestDisplacement, tempPosition); params->warpedPosition[z] = tempPosition[0]; params->warpedPosition[z + 1] = tempPosition[1]; @@ -664,13 +664,13 @@ void block_matching_method3D(nifti_image * reference, bestDisplacement[1] += referencePosition_temp[1]; bestDisplacement[2] += referencePosition_temp[2]; - reg_mat44_mul(referenceMatrix_xyz, referencePosition_temp, tempPosition); + Mat44Mul(*referenceMatrix_xyz, referencePosition_temp, tempPosition); z = 3 * params->totalBlock[blockIndex]; params->referencePosition[z] = tempPosition[0]; params->referencePosition[z+1] = tempPosition[1]; params->referencePosition[z+2] = tempPosition[2]; - reg_mat44_mul(referenceMatrix_xyz, bestDisplacement, tempPosition); + Mat44Mul(*referenceMatrix_xyz, bestDisplacement, tempPosition); params->warpedPosition[z] = tempPosition[0]; params->warpedPosition[z + 1] = tempPosition[1]; params->warpedPosition[z + 2] = tempPosition[2]; @@ -757,7 +757,7 @@ void optimize(_reg_blockMatchingParam *params, //Can have undefined = NaN in the warped image now - //to not loose the correspondence - so check that: if(in[0] == in[0]){ - reg_mat33_mul(transformation_matrix, in, out); + Mat33Mul(*transformation_matrix, in, out); referencePositionVect.push_back(params->referencePosition[index]); referencePositionVect.push_back(params->referencePosition[index+1]); @@ -802,7 +802,7 @@ void optimize(_reg_blockMatchingParam *params, //Can have undefined = NaN in the warped image now - //to not loose the correspondence - so check that: if(in[0] == in[0]){ - reg_mat44_mul(transformation_matrix, in, out); + Mat44Mul(*transformation_matrix, in, out); referencePositionVect.push_back(params->referencePosition[index]); referencePositionVect.push_back(params->referencePosition[index+1]); diff --git a/reg-lib/cpu/_reg_blockMatching.h b/reg-lib/cpu/_reg_blockMatching.h index f370df90..bf8095a0 100755 --- a/reg-lib/cpu/_reg_blockMatching.h +++ b/reg-lib/cpu/_reg_blockMatching.h @@ -14,7 +14,7 @@ #pragma once -#include "_reg_maths.h" +#include "Maths.hpp" #define TOLERANCE 0.001 #define MAX_ITERATIONS 30 diff --git a/reg-lib/cpu/_reg_globalTrans.cpp b/reg-lib/cpu/_reg_globalTrans.cpp index a2e8ef60..dcf59de7 100755 --- a/reg-lib/cpu/_reg_globalTrans.cpp +++ b/reg-lib/cpu/_reg_globalTrans.cpp @@ -11,8 +11,7 @@ */ #include "_reg_globalTrans.h" -#include "_reg_maths.h" -#include "_reg_maths_eigen.h" +#include "Maths.hpp" /* *************************************************************** */ /* *************************************************************** */ @@ -28,15 +27,13 @@ void reg_affine_deformationField2D(mat44 *affineTransformation, mat44 *referenceMatrix; if(deformationFieldImage->sform_code>0) - { - referenceMatrix=&(deformationFieldImage->sto_xyz); - } - else referenceMatrix=&(deformationFieldImage->qto_xyz); + referenceMatrix=&deformationFieldImage->sto_xyz; + else referenceMatrix=&deformationFieldImage->qto_xyz; mat44 transformationMatrix; if(composition) transformationMatrix = *affineTransformation; - else transformationMatrix = reg_mat44_mul(affineTransformation, referenceMatrix); + else transformationMatrix = *affineTransformation * *referenceMatrix; double voxel[3]={0,0,0}, position[3]={0,0,0}; int x=0, y=0; @@ -61,9 +58,9 @@ void reg_affine_deformationField2D(mat44 *affineTransformation, { voxel[0] = (double) deformationFieldPtrX[index]; voxel[1] = (double) deformationFieldPtrY[index]; - reg_mat44_mul(&transformationMatrix, voxel, position); + Mat44Mul(transformationMatrix, voxel, position); } - else reg_mat44_mul(&transformationMatrix, voxel, position); + else Mat44Mul(transformationMatrix, voxel, position); /* the deformation field (real coordinates) is stored */ deformationFieldPtrX[index] = (FieldTYPE) position[0]; @@ -87,15 +84,13 @@ void reg_affine_deformationField3D(mat44 *affineTransformation, mat44 *referenceMatrix; if(deformationFieldImage->sform_code>0) - { - referenceMatrix=&(deformationFieldImage->sto_xyz); - } - else referenceMatrix=&(deformationFieldImage->qto_xyz); + referenceMatrix=&deformationFieldImage->sto_xyz; + else referenceMatrix=&deformationFieldImage->qto_xyz; mat44 transformationMatrix; if(composition) transformationMatrix = *affineTransformation; - else transformationMatrix = reg_mat44_mul(affineTransformation, referenceMatrix); + else transformationMatrix = *affineTransformation * *referenceMatrix; double voxel[3]={0,0,0}, position[3]={0,0,0}; int x=0, y=0, z=0; @@ -124,7 +119,7 @@ void reg_affine_deformationField3D(mat44 *affineTransformation, voxel[1]= (double) deformationFieldPtrY[index]; voxel[2]= (double) deformationFieldPtrZ[index]; } - reg_mat44_mul(&transformationMatrix, voxel, position); + Mat44Mul(transformationMatrix, voxel, position); /* the deformation field (real coordinates) is stored */ deformationFieldPtrX[index] = (FieldTYPE) position[0]; @@ -207,9 +202,9 @@ void estimate_rigid_transformation2D(float** points1, float** points2, int num_p centroid_warpedFloat[0] = static_cast(centroid_warped[0]); centroid_warpedFloat[1] = static_cast(centroid_warped[1]); - float * w = reg_matrix1DAllocate(2); - float **v = reg_matrix2DAllocate(2, 2); - float **r = reg_matrix2DAllocate(2, 2); + float * w = Matrix1dAlloc(2); + float **v = Matrix2dAlloc(2, 2); + float **r = Matrix2dAlloc(2, 2); // Demean the input points for (int j = 0; j < num_points; ++j) { @@ -220,24 +215,24 @@ void estimate_rigid_transformation2D(float** points1, float** points2, int num_p points2[j][1] = static_cast(static_cast(points2[j][1]) - static_cast(centroid_warpedFloat[1])); } - float **p1t = reg_matrix2DTranspose(points1, num_points, 2); - float **u = reg_matrix2DMultiply(p1t,2, num_points, points2, num_points, 2, false); + float **p1t = Matrix2dTranspose(points1, num_points, 2); + float **u = Matrix2dMultiply(p1t,2, num_points, points2, num_points, 2, false); - svd(u, 2, 2, w, v); + Svd(u, 2, 2, w, v); // Calculate transpose - float **ut = reg_matrix2DTranspose(u, 2, 2); + float **ut = Matrix2dTranspose(u, 2, 2); // Calculate the rotation matrix - reg_matrix2DMultiply(v, 2, 2, ut, 2, 2, r, false); + Matrix2dMultiply(v, 2, 2, ut, 2, 2, r, false); - float det = reg_matrix2DDet(r, 2, 2); + float det = Matrix2dDet(r, 2, 2); // Take care of possible reflection if (det < 0) { v[0][1] = -v[0][1]; v[1][1] = -v[1][1]; - reg_matrix2DMultiply(v, 2, 2, ut, 2, 2, r, false); + Matrix2dMultiply(v, 2, 2, ut, 2, 2, r, false); } // Calculate the translation @@ -271,12 +266,12 @@ void estimate_rigid_transformation2D(float** points1, float** points2, int num_p transformation->m[3][3] = 1.0f; // Do the deletion here - reg_matrix2DDeallocate(2, u); - reg_matrix1DDeallocate(w); - reg_matrix2DDeallocate(2, v); - reg_matrix2DDeallocate(2, ut); - reg_matrix2DDeallocate(2, r); - // reg_matrix2DDeallocate(2, p1t); + Matrix2dDealloc(2, u); + Matrix1dDealloc(w); + Matrix2dDealloc(2, v); + Matrix2dDealloc(2, ut); + Matrix2dDealloc(2, r); + // Matrix2dDealloc(2, p1t); for(size_t dance=0;dance<2;++dance) free(p1t[dance]); free(p1t); } /* *************************************************************** */ @@ -284,8 +279,8 @@ void estimate_rigid_transformation2D(std::vector<_reg_sorted_point2D> &points, m { unsigned num_points = points.size(); - float** points1 = reg_matrix2DAllocate(num_points, 2); - float** points2 = reg_matrix2DAllocate(num_points, 2); + float** points1 = Matrix2dAlloc(num_points, 2); + float** points2 = Matrix2dAlloc(num_points, 2); for (unsigned i = 0; i < num_points; i++) { points1[i][0] = points[i].reference[0]; points1[i][1] = points[i].reference[1]; @@ -294,8 +289,8 @@ void estimate_rigid_transformation2D(std::vector<_reg_sorted_point2D> &points, m } estimate_rigid_transformation2D(points1, points2, num_points, transformation); //FREE MEMORY - reg_matrix2DDeallocate(num_points, points1); - reg_matrix2DDeallocate(num_points, points2); + Matrix2dDealloc(num_points, points1); + Matrix2dDealloc(num_points, points2); } /* *************************************************************** */ void estimate_rigid_transformation3D(float** points1, float** points2, int num_points, mat44 * transformation) @@ -335,9 +330,9 @@ void estimate_rigid_transformation3D(float** points1, float** points2, int num_p centroid_warpedFloat[1] = static_cast(centroid_warped[1]); centroid_warpedFloat[2] = static_cast(centroid_warped[2]); - float * w = reg_matrix1DAllocate(3); - float **v = reg_matrix2DAllocate(3, 3); - float **r = reg_matrix2DAllocate(3, 3); + float * w = Matrix1dAlloc(3); + float **v = Matrix2dAlloc(3, 3); + float **r = Matrix2dAlloc(3, 3); // Demean the input points for (int j = 0; j < num_points; ++j) { @@ -349,27 +344,27 @@ void estimate_rigid_transformation3D(float** points1, float** points2, int num_p points2[j][1] = static_cast(static_cast(points2[j][1]) - static_cast(centroid_warpedFloat[1])); points2[j][2] = static_cast(static_cast(points2[j][2]) - static_cast(centroid_warpedFloat[2])); } - //T** reg_matrix2DTranspose(T** mat, size_t arraySizeX, size_t arraySizeY); - //T** reg_matrix2DMultiply(T** mat1, size_t mat1X, size_t mat1Y, T** mat2, size_t mat2X, size_t mat2Y, bool transposeMat2); - float **p1t = reg_matrix2DTranspose(points1, num_points, 3); - float **u = reg_matrix2DMultiply(p1t,3, num_points, points2, num_points, 3, false); + //T** Matrix2dTranspose(T** mat, size_t arraySizeX, size_t arraySizeY); + //T** Matrix2dMultiply(T** mat1, size_t mat1X, size_t mat1Y, T** mat2, size_t mat2X, size_t mat2Y, bool transposeMat2); + float **p1t = Matrix2dTranspose(points1, num_points, 3); + float **u = Matrix2dMultiply(p1t,3, num_points, points2, num_points, 3, false); - svd(u, 3, 3, w, v); + Svd(u, 3, 3, w, v); // Calculate transpose - float **ut = reg_matrix2DTranspose(u, 3, 3); + float **ut = Matrix2dTranspose(u, 3, 3); // Calculate the rotation matrix - reg_matrix2DMultiply(v, 3, 3, ut, 3, 3, r, false); + Matrix2dMultiply(v, 3, 3, ut, 3, 3, r, false); - float det = reg_matrix2DDet(r, 3, 3); + float det = Matrix2dDet(r, 3, 3); // Take care of possible reflection if (det < 0) { v[0][2] = -v[0][2]; v[1][2] = -v[1][2]; v[2][2] = -v[2][2]; - reg_matrix2DMultiply(v, 3, 3, ut, 3, 3, r, false); + Matrix2dMultiply(v, 3, 3, ut, 3, 3, r, false); } // Calculate the translation @@ -407,19 +402,19 @@ void estimate_rigid_transformation3D(float** points1, float** points2, int num_p transformation->m[3][3] = 1.0f; // Do the deletion here - reg_matrix2DDeallocate(3, u); - reg_matrix1DDeallocate(w); - reg_matrix2DDeallocate(3, v); - reg_matrix2DDeallocate(3, ut); - reg_matrix2DDeallocate(3, r); - reg_matrix2DDeallocate(3, p1t); + Matrix2dDealloc(3, u); + Matrix1dDealloc(w); + Matrix2dDealloc(3, v); + Matrix2dDealloc(3, ut); + Matrix2dDealloc(3, r); + Matrix2dDealloc(3, p1t); } /* *************************************************************** */ void estimate_rigid_transformation3D(std::vector<_reg_sorted_point3D> &points, mat44 * transformation) { unsigned num_points = points.size(); - float** points1 = reg_matrix2DAllocate(num_points, 3); - float** points2 = reg_matrix2DAllocate(num_points, 3); + float** points1 = Matrix2dAlloc(num_points, 3); + float** points2 = Matrix2dAlloc(num_points, 3); for (unsigned i = 0; i < num_points; i++) { points1[i][0] = points[i].reference[0]; points1[i][1] = points[i].reference[1]; @@ -430,8 +425,8 @@ void estimate_rigid_transformation3D(std::vector<_reg_sorted_point3D> &points, m } estimate_rigid_transformation3D(points1, points2, num_points, transformation); //FREE MEMORY - reg_matrix2DDeallocate(num_points, points1); - reg_matrix2DDeallocate(num_points, points2); + Matrix2dDealloc(num_points, points1); + Matrix2dDealloc(num_points, points2); } /* *************************************************************** */ void estimate_affine_transformation2D(float** points1, float** points2, int num_points, mat44 * transformation) @@ -439,7 +434,7 @@ void estimate_affine_transformation2D(float** points1, float** points2, int num_ //We assume same number of points in both arrays int num_equations = num_points * 2; unsigned c = 0; - float** A = reg_matrix2DAllocate(num_equations, 6); + float** A = Matrix2dAlloc(num_equations, 6); for (int k = 0; k < num_points; ++k) { c = k * 2; @@ -455,10 +450,10 @@ void estimate_affine_transformation2D(float** points1, float** points2, int num_ A[c + 1][5] = 1.0f; } - float* w = reg_matrix1DAllocate(6); - float** v = reg_matrix2DAllocate(6, 6); + float* w = Matrix1dAlloc(6); + float** v = Matrix2dAlloc(6, 6); - svd(A, num_equations, 6, w, v); + Svd(A, num_equations, 6, w, v); for (unsigned k = 0; k < 6; ++k) { if (w[k] < 0.0001) { @@ -479,19 +474,19 @@ void estimate_affine_transformation2D(float** points1, float** points2, int num_ } } - float** r = reg_matrix2DAllocate(6, num_equations); - reg_matrix2DMultiply(v, 6, 6, A, num_equations, 6, r, true); + float** r = Matrix2dAlloc(6, num_equations); + Matrix2dMultiply(v, 6, 6, A, num_equations, 6, r, true); // Now r contains the pseudoinverse // Create vector b and then multiple r*b to get the affine paramsA - float* b = reg_matrix1DAllocate(num_equations); + float* b = Matrix1dAlloc(num_equations); for (int k = 0; k < num_points; ++k) { c = k * 2; b[c] = points2[k][0]; b[c + 1] = points2[k][1]; } - float* transform = reg_matrix1DAllocate(6); - reg_matrix2DVectorMultiply(r, 6, num_equations, b, transform); + float* transform = Matrix1dAlloc(6); + Matrix2dVectorMultiply(r, 6, num_equations, b, transform); transformation->m[0][0] = transform[0]; transformation->m[0][1] = transform[1]; @@ -514,19 +509,19 @@ void estimate_affine_transformation2D(float** points1, float** points2, int num_ transformation->m[3][3] = 1.0f; // Do the deletion here - reg_matrix1DDeallocate(transform); - reg_matrix1DDeallocate(b); - reg_matrix2DDeallocate(6, r); - reg_matrix2DDeallocate(6, v); - reg_matrix1DDeallocate(w); - reg_matrix2DDeallocate(num_equations, A); + Matrix1dDealloc(transform); + Matrix1dDealloc(b); + Matrix2dDealloc(6, r); + Matrix2dDealloc(6, v); + Matrix1dDealloc(w); + Matrix2dDealloc(num_equations, A); } /* *************************************************************** */ void estimate_affine_transformation2D(std::vector<_reg_sorted_point2D> &points, mat44 * transformation) { unsigned num_points = points.size(); - float** points1 = reg_matrix2DAllocate(num_points, 2); - float** points2 = reg_matrix2DAllocate(num_points, 2); + float** points1 = Matrix2dAlloc(num_points, 2); + float** points2 = Matrix2dAlloc(num_points, 2); for (unsigned i = 0; i < num_points; i++) { points1[i][0] = points[i].reference[0]; points1[i][1] = points[i].reference[1]; @@ -535,8 +530,8 @@ void estimate_affine_transformation2D(std::vector<_reg_sorted_point2D> &points, } estimate_affine_transformation2D(points1, points2, num_points, transformation); //FREE MEMORY - reg_matrix2DDeallocate(num_points, points1); - reg_matrix2DDeallocate(num_points, points2); + Matrix2dDealloc(num_points, points1); + Matrix2dDealloc(num_points, points2); } /* *************************************************************** */ // estimate an affine transformation using least square @@ -548,7 +543,7 @@ void estimate_affine_transformation3D(float** points1, float** points2, int num_ // we need at least 4 points. Assuming we have that here. int num_equations = num_points * 3; unsigned c = 0; - float** A = reg_matrix2DAllocate(num_equations, 12); + float** A = Matrix2dAlloc(num_equations, 12); for (int k = 0; k < num_points; ++k) { c = k * 3; @@ -571,10 +566,10 @@ void estimate_affine_transformation3D(float** points1, float** points2, int num_ A[c + 2][11] = 1.0f; } - float* w = reg_matrix1DAllocate(12); - float** v = reg_matrix2DAllocate(12, 12); - // Now we can compute our svd - svd(A, num_equations, 12, w, v); + float* w = Matrix1dAlloc(12); + float** v = Matrix2dAlloc(12, 12); + // Now we can compute our Svd + Svd(A, num_equations, 12, w, v); // First we make sure that the really small singular values // are set to 0. and compute the inverse by taking the reciprocal @@ -600,11 +595,11 @@ void estimate_affine_transformation3D(float** points1, float** points2, int num_ // Now multiply the matrices together // Pseudoinverse = v * w * A(transpose) - float** r = reg_matrix2DAllocate(12, num_equations); - reg_matrix2DMultiply(v, 12, 12, A, num_equations, 12, r, true); + float** r = Matrix2dAlloc(12, num_equations); + Matrix2dMultiply(v, 12, 12, A, num_equations, 12, r, true); // Now r contains the pseudoinverse // Create vector b and then multiple rb to get the affine paramsA - float* b = reg_matrix1DAllocate(num_equations); + float* b = Matrix1dAlloc(num_equations); for (int k = 0; k < num_points; ++k) { c = k * 3; b[c] = points2[k][0]; @@ -612,9 +607,9 @@ void estimate_affine_transformation3D(float** points1, float** points2, int num_ b[c + 2] = points2[k][2]; } - float * transform = reg_matrix1DAllocate(12); + float * transform = Matrix1dAlloc(12); //mul_matvec(r, 12, num_equations, b, transform); - reg_matrix2DVectorMultiply(r, 12, num_equations, b, transform); + Matrix2dVectorMultiply(r, 12, num_equations, b, transform); transformation->m[0][0] = transform[0]; transformation->m[0][1] = transform[1]; @@ -637,20 +632,20 @@ void estimate_affine_transformation3D(float** points1, float** points2, int num_ transformation->m[3][3] = 1.0f; // Do the deletion here - reg_matrix1DDeallocate(transform); - reg_matrix1DDeallocate(b); - reg_matrix2DDeallocate(12, r); - reg_matrix2DDeallocate(12, v); - reg_matrix1DDeallocate(w); - reg_matrix2DDeallocate(num_equations, A); + Matrix1dDealloc(transform); + Matrix1dDealloc(b); + Matrix2dDealloc(12, r); + Matrix2dDealloc(12, v); + Matrix1dDealloc(w); + Matrix2dDealloc(num_equations, A); } /* *************************************************************** */ // estimate an affine transformation using least square void estimate_affine_transformation3D(std::vector<_reg_sorted_point3D> &points, mat44 * transformation) { unsigned num_points = points.size(); - float** points1 = reg_matrix2DAllocate(num_points, 3); - float** points2 = reg_matrix2DAllocate(num_points, 3); + float** points1 = Matrix2dAlloc(num_points, 3); + float** points2 = Matrix2dAlloc(num_points, 3); for (unsigned i = 0; i < num_points; i++) { points1[i][0] = points[i].reference[0]; points1[i][1] = points[i].reference[1]; @@ -661,8 +656,8 @@ void estimate_affine_transformation3D(std::vector<_reg_sorted_point3D> &points, } estimate_affine_transformation3D(points1, points2, num_points, transformation); //FREE MEMORY - reg_matrix2DDeallocate(num_points, points1); - reg_matrix2DDeallocate(num_points, points2); + Matrix2dDealloc(num_points, points1); + Matrix2dDealloc(num_points, points2); } /* *************************************************************** */ ///LTS 2D @@ -671,7 +666,7 @@ void optimize_2D(float* referencePosition, float* warpedPosition, mat44 * final, bool affine) { // Set the current transformation to identity - reg_mat44_eye(final); + Mat44Eye(final); const unsigned num_points = activeBlockNumber; unsigned long num_equations = num_points * 2; @@ -705,13 +700,11 @@ void optimize_2D(float* referencePosition, float* warpedPosition, { // Transform the points in the reference for (unsigned j = 0; j < num_points * 2; j += 2) - { - reg_mat33_mul(final, &referencePosition[j], &newWarpedPosition[j]); - } + Mat33Mul(*final, reinterpret_cast(referencePosition[j]), reinterpret_cast(newWarpedPosition[j])); queue = std::multimap(); for (unsigned j = 0; j < num_points * 2; j += 2) { - distance = get_square_distance2D(&newWarpedPosition[j], &warpedPosition[j]); + distance = SquareDistance2d(&newWarpedPosition[j], &warpedPosition[j]); queue.insert(std::pair(distance, _reg_sorted_point2D(&referencePosition[j], &warpedPosition[j], distance))); } @@ -754,7 +747,7 @@ void optimize_3D(float *referencePosition, float *warpedPosition, mat44 *final, bool affine) { // Set the current transformation to identity - reg_mat44_eye(final); + Mat44Eye(final); const unsigned num_points = activeBlockNumber; unsigned long num_equations = num_points * 3; @@ -785,13 +778,12 @@ void optimize_3D(float *referencePosition, float *warpedPosition, for (int count = 0; count < max_iter; ++count) { // Transform the points in the reference - for (unsigned j = 0; j < num_points * 3; j+=3) { - reg_mat44_mul(final, &referencePosition[j], &newWarpedPosition[j]); - } + for (unsigned j = 0; j < num_points * 3; j+=3) + Mat44Mul(*final, reinterpret_cast(referencePosition[j]), reinterpret_cast(newWarpedPosition[j])); queue = std::multimap(); for (unsigned j = 0; j < num_points * 3; j+= 3) { - distance = get_square_distance3D(&newWarpedPosition[j], &warpedPosition[j]); + distance = SquareDistance3d(&newWarpedPosition[j], &warpedPosition[j]); queue.insert(std::pair(distance, _reg_sorted_point3D(&referencePosition[j], @@ -823,6 +815,6 @@ void optimize_3D(float *referencePosition, float *warpedPosition, estimate_rigid_transformation3D(top_points, final); } } - delete [] newWarpedPosition; + delete[] newWarpedPosition; } /* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_localTrans.cpp b/reg-lib/cpu/_reg_localTrans.cpp index c3e17149..d070bee1 100755 --- a/reg-lib/cpu/_reg_localTrans.cpp +++ b/reg-lib/cpu/_reg_localTrans.cpp @@ -11,7 +11,6 @@ */ #include "_reg_localTrans.h" -#include "_reg_maths_eigen.h" // Due to SSE usage creates incorrect test results #if defined(BUILD_TESTS) && !defined(NDEBUG) @@ -82,7 +81,7 @@ void reg_createControlPointGrid(NiftiImage& controlPointGridImage, originIndex[1] = -1.0f; originIndex[2] = 0.0f; if (referenceImage->nz > 1) originIndex[2] = -1.0f; - reg_mat44_mul(&controlPointGridImage->qto_xyz, originIndex, originReal); + Mat44Mul(controlPointGridImage->qto_xyz, originIndex, originReal); controlPointGridImage->qto_xyz.m[0][3] = controlPointGridImage->qoffset_x = originReal[0]; controlPointGridImage->qto_xyz.m[1][3] = controlPointGridImage->qoffset_y = originReal[1]; controlPointGridImage->qto_xyz.m[2][3] = controlPointGridImage->qoffset_z = originReal[2]; @@ -114,7 +113,7 @@ void reg_createControlPointGrid(NiftiImage& controlPointGridImage, controlPointGridImage->sto_xyz.m[3][3] = referenceImage->sto_xyz.m[3][3]; // Origin is shifted from 1 control point in the sform - reg_mat44_mul(&controlPointGridImage->sto_xyz, originIndex, originReal); + Mat44Mul(controlPointGridImage->sto_xyz, originIndex, originReal); controlPointGridImage->sto_xyz.m[0][3] = originReal[0]; controlPointGridImage->sto_xyz.m[1][3] = originReal[1]; controlPointGridImage->sto_xyz.m[2][3] = originReal[2]; @@ -152,24 +151,24 @@ void reg_createSymmetricControlPointGrids(NiftiImage& forwardGridImage, mat44 halfForwardAffine, halfBackwardAffine; if (forwardAffineTrans != nullptr) { // Compute half of the affine transformation - ref to flo - halfForwardAffine = reg_mat44_logm(forwardAffineTrans); - halfForwardAffine = reg_mat44_mul(&halfForwardAffine, .5f); - halfForwardAffine = reg_mat44_expm(&halfForwardAffine); + halfForwardAffine = Mat44Logm(forwardAffineTrans); + halfForwardAffine = halfForwardAffine * 0.5f; + halfForwardAffine = Mat44Expm(&halfForwardAffine); // Compute half of the affine transformation - flo to ref // Note that this is done twice for symmetry consideration halfBackwardAffine = nifti_mat44_inverse(*forwardAffineTrans); - halfBackwardAffine = reg_mat44_logm(&halfBackwardAffine); - halfBackwardAffine = reg_mat44_mul(&halfBackwardAffine, .5f); - halfBackwardAffine = reg_mat44_expm(&halfBackwardAffine); + halfBackwardAffine = Mat44Logm(&halfBackwardAffine); + halfBackwardAffine = halfBackwardAffine * 0.5f; + halfBackwardAffine = Mat44Expm(&halfBackwardAffine); NR_WARN("Note that the symmetry of the registration is affected by the input affine transformation"); } else { - reg_mat44_eye(&halfForwardAffine); - reg_mat44_eye(&halfBackwardAffine); + Mat44Eye(&halfForwardAffine); + Mat44Eye(&halfBackwardAffine); } // Update the reference and floating transformation to propagate to a mid space - referenceImageSpace = reg_mat44_mul(&halfForwardAffine, &referenceImageSpace); - floatingImageSpace = reg_mat44_mul(&halfBackwardAffine, &floatingImageSpace); + referenceImageSpace = halfForwardAffine * referenceImageSpace; + floatingImageSpace = halfBackwardAffine * floatingImageSpace; // Define the largest field of view in the mid space float minPosition[3] = { 0, 0, 0 }, maxPosition[3] = { 0, 0, 0 }; @@ -197,11 +196,11 @@ void reg_createSymmetricControlPointGrids(NiftiImage& forwardGridImage, }; float out[3]; for (int c = 0; c < 8; ++c) { - reg_mat44_mul(&referenceImageSpace, referenceImageCorners[c], out); + Mat44Mul(referenceImageSpace, referenceImageCorners[c], out); referenceImageCorners[c][0] = out[0]; referenceImageCorners[c][1] = out[1]; referenceImageCorners[c][2] = out[2]; - reg_mat44_mul(&floatingImageSpace, floatingImageCorners[c], out); + Mat44Mul(floatingImageSpace, floatingImageCorners[c], out); floatingImageCorners[c][0] = out[0]; floatingImageCorners[c][1] = out[1]; floatingImageCorners[c][2] = out[2]; @@ -299,10 +298,10 @@ void reg_createSymmetricControlPointGrids(NiftiImage& forwardGridImage, // Set the control point grid image orientation forwardGridImage->qform_code = backwardGridImage->qform_code = 0; forwardGridImage->sform_code = backwardGridImage->sform_code = 1; - reg_mat44_eye(&forwardGridImage->sto_xyz); - reg_mat44_eye(&backwardGridImage->sto_xyz); - reg_mat44_eye(&forwardGridImage->sto_ijk); - reg_mat44_eye(&backwardGridImage->sto_ijk); + Mat44Eye(&forwardGridImage->sto_xyz); + Mat44Eye(&backwardGridImage->sto_xyz); + Mat44Eye(&forwardGridImage->sto_ijk); + Mat44Eye(&backwardGridImage->sto_ijk); for (unsigned i = 0; i < 3; ++i) { if (referenceImage->nz > 1 || i < 2) { forwardGridImage->sto_xyz.m[i][i] = backwardGridImage->sto_xyz.m[i][i] = spacing[i]; @@ -320,7 +319,7 @@ void reg_createSymmetricControlPointGrids(NiftiImage& forwardGridImage, forwardGridImage->intent_p1 = backwardGridImage->intent_p1 = CUB_SPLINE_GRID; // Set the affine matrices mat44 identity; - reg_mat44_eye(&identity); + Mat44Eye(&identity); if (forwardGridImage->ext_list != nullptr) free(forwardGridImage->ext_list); if (backwardGridImage->ext_list != nullptr) @@ -1570,28 +1569,28 @@ void reg_voxelCentricToNodeCentric(nifti_image *nodeImage, if (nodeImage->ext_list[0].edata != nullptr) { mat44 temp = *(reinterpret_cast(nodeImage->ext_list[0].edata)); temp = nifti_mat44_inverse(temp); - transformation = reg_mat44_mul(&temp, &transformation); + transformation = temp * transformation; } } // millimetre to voxel in the reference image if (voxelImage->sform_code > 0) - transformation = reg_mat44_mul(&voxelImage->sto_ijk, &transformation); - else transformation = reg_mat44_mul(&voxelImage->qto_ijk, &transformation); + transformation = voxelImage->sto_ijk * transformation; + else transformation = voxelImage->qto_ijk * transformation; // The information has to be reoriented mat33 reorientation; // Voxel to millimetre contains the orientation of the image that is used // to compute the spatial gradient (floating image) if (voxelToMillimetre != nullptr) { - reorientation = reg_mat44_to_mat33(voxelToMillimetre); + reorientation = Mat44ToMat33(voxelToMillimetre); if (nodeImage->num_ext > 0) { if (nodeImage->ext_list[0].edata != nullptr) { - mat33 temp = reg_mat44_to_mat33(reinterpret_cast(nodeImage->ext_list[0].edata)); + mat33 temp = Mat44ToMat33(reinterpret_cast(nodeImage->ext_list[0].edata)); temp = nifti_mat33_inverse(temp); reorientation = nifti_mat33_mul(temp, reorientation); } } - } else reg_mat33_eye(&reorientation); + } else Mat33Eye(&reorientation); // The information has to be weighted float ratio[3] = { nodeImage->dx, nodeImage->dy, nodeImage->dz }; for (int i = 0; i < (nodeImage->nz > 1 ? 3 : 2); ++i) { @@ -1611,7 +1610,7 @@ void reg_voxelCentricToNodeCentric(nifti_image *nodeImage, nodeCoord[1] = static_cast(y); for (int x = 0; x < nodeImage->nx; x++) { nodeCoord[0] = static_cast(x); - reg_mat44_mul(&transformation, nodeCoord, voxelCoord); + Mat44Mul(transformation, nodeCoord, voxelCoord); // linear interpolation is performed DataType basisX[2], basisY[2], basisZ[2] = { 0, 0 }; int pre[3] = { @@ -2173,7 +2172,7 @@ void reg_spline_refineControlPointGrid(nifti_image *controlPointGrid, originIndex[1] = -1.0f; originIndex[2] = 0.0f; if (referenceImage->nz > 1) originIndex[2] = -1.0f; - reg_mat44_mul(&(controlPointGrid->qto_xyz), originIndex, originReal); + Mat44Mul(controlPointGrid->qto_xyz, originIndex, originReal); if (controlPointGrid->qform_code == 0 && controlPointGrid->sform_code == 0) controlPointGrid->qform_code = 1; controlPointGrid->qto_xyz.m[0][3] = controlPointGrid->qoffset_x = originReal[0]; @@ -2211,7 +2210,7 @@ void reg_spline_refineControlPointGrid(nifti_image *controlPointGrid, float originIndex[3]; originIndex[0] = originIndex[1] = originIndex[2] = -1; if (referenceImage->nz <= 1) originIndex[2] = 0; - reg_mat44_mul(&(controlPointGrid->sto_xyz), originIndex, originReal); + Mat44Mul(controlPointGrid->sto_xyz, originIndex, originReal); controlPointGrid->sto_xyz.m[0][3] = originReal[0]; controlPointGrid->sto_xyz.m[1][3] = originReal[1]; controlPointGrid->sto_xyz.m[2][3] = originReal[2]; @@ -2228,7 +2227,7 @@ void reg_spline_refineControlPointGrid(nifti_image *controlPointGrid, // The origin is shifted by one node when compared to the previous origin float nodeCoord[3] = { 1, 1, 1 }; float newOrigin[3]; - reg_mat44_mul(&controlPointGrid->sto_xyz, nodeCoord, newOrigin); + Mat44Mul(controlPointGrid->sto_xyz, nodeCoord, newOrigin); controlPointGrid->sto_xyz.m[0][3] = newOrigin[0]; controlPointGrid->sto_xyz.m[1][3] = newOrigin[1]; if (controlPointGrid->nz > 1) @@ -2398,7 +2397,6 @@ void reg_defField_compose3D(const nifti_image *deformationField, df_real2Voxel.m[2][1] * realDef[1] + df_real2Voxel.m[2][2] * realDef[2] + df_real2Voxel.m[2][3]; - //reg_mat44_mul(df_real2Voxel, realDef, voxel); // Linear interpolation to compute the new deformation pre[0] = Floor(voxel[0]); @@ -2501,7 +2499,7 @@ inline static int FastWarp(double x, double y, double z, nifti_image *deformatio FieldTYPE *wpz; int xw, yw, zw, dxw, dyw, dxyw, dxyzw; double wxf, wyf, wzf, wyzf; - double world[4], position[4]; + double world[3], position[3]; FieldTYPE *warpdata = static_cast(deformationField->data); @@ -2527,8 +2525,7 @@ inline static int FastWarp(double x, double y, double z, nifti_image *deformatio world[0] = x; world[1] = y; world[2] = z; - world[3] = 1; - reg_mat44_mul(deformationFieldIJKMatrix, world, position); + Mat44Mul(*deformationFieldIJKMatrix, world, position); x = position[0]; y = position[1]; z = position[2]; @@ -2947,13 +2944,12 @@ void reg_defFieldInvert3D(nifti_image *inputDeformationField, if (inputDeformationField->sform_code > 0) InXYZMatrix = &inputDeformationField->sto_xyz; else InXYZMatrix = &inputDeformationField->qto_xyz; - float center[4], center2[4]; - double centerout[4], delta[4]; + float center[3], center2[3]; + double centerout[3], delta[3]; center[0] = static_cast(inputDeformationField->nx / 2); center[1] = static_cast(inputDeformationField->ny / 2); center[2] = static_cast(inputDeformationField->nz / 2); - center[3] = 1; - reg_mat44_mul(InXYZMatrix, center, center2); + Mat44Mul(*InXYZMatrix, center, center2); FastWarp(center2[0], center2[1], center2[2], inputDeformationField, ¢erout[0], ¢erout[1], ¢erout[2]); delta[0] = center2[0] - centerout[0]; delta[1] = center2[1] - centerout[1]; @@ -2962,7 +2958,7 @@ void reg_defFieldInvert3D(nifti_image *inputDeformationField, int i, x, y, z; - double position[4], pars[4], arrayy[4][3]; + double position[3], pars[3], arrayy[4][3]; struct ddata dat; DataType *outData; #ifdef _OPENMP @@ -2986,8 +2982,7 @@ void reg_defFieldInvert3D(nifti_image *inputDeformationField, position[0] = x; position[1] = y; position[2] = z; - position[3] = 1; - reg_mat44_mul(OutXYZMatrix, position, pars); + Mat44Mul(*OutXYZMatrix, position, pars); dat.gx = pars[0]; dat.gy = pars[1]; dat.gz = pars[2]; diff --git a/reg-lib/cpu/_reg_localTrans_jac.cpp b/reg-lib/cpu/_reg_localTrans_jac.cpp index 75c0b6ee..303057cb 100755 --- a/reg-lib/cpu/_reg_localTrans_jac.cpp +++ b/reg-lib/cpu/_reg_localTrans_jac.cpp @@ -74,8 +74,8 @@ void reg_linear_spline_jacobian3D(nifti_image *splineControlPoint, // Define a matrix to reorient the Jacobian matrices and normalise them by the grid spacing mat33 reorientation,jacobianMatrix; if(splineControlPoint->sform_code>0) - reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_ijk); - else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_ijk); + reorientation = Mat44ToMat33(&splineControlPoint->sto_ijk); + else reorientation = Mat44ToMat33(&splineControlPoint->qto_ijk); // Useful variables int x, y, z; @@ -143,13 +143,11 @@ void reg_linear_spline_jacobian3D(nifti_image *splineControlPoint, else transformation=referenceImage->qto_xyz; // affine: mm to mm if(splineControlPoint->num_ext>0) - transformation=reg_mat44_mul( - reinterpret_cast(splineControlPoint->ext_list[0].edata), - &transformation); + transformation=reinterpret_cast(*splineControlPoint->ext_list[0].edata) * transformation; // grid: mm to voxel if(splineControlPoint->sform_code>0) - transformation=reg_mat44_mul(&(splineControlPoint->sto_ijk), &transformation); - else transformation=reg_mat44_mul(&(splineControlPoint->qto_ijk), &transformation); + transformation=splineControlPoint->sto_ijk * transformation; + else transformation=splineControlPoint->qto_ijk * transformation; float imageCoord[3], gridCoord[3]; for(z=0; znz; z++) @@ -163,7 +161,7 @@ void reg_linear_spline_jacobian3D(nifti_image *splineControlPoint, { imageCoord[0]=x; // Compute the position in the grid - reg_mat44_mul(&transformation,imageCoord,gridCoord); + Mat44Mul(transformation,imageCoord,gridCoord); // Compute the anterior node coord pre[0]=Floor(gridCoord[0]); pre[1]=Floor(gridCoord[1]); @@ -265,8 +263,8 @@ void reg_cubic_spline_jacobian2D(nifti_image *splineControlPoint, // Define a matrice to reorient the Jacobian matrices and normalise them by the grid spacing mat33 reorientation,jacobianMatrix; if(splineControlPoint->sform_code>0) - reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_ijk); - else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_ijk); + reorientation = Mat44ToMat33(&splineControlPoint->sto_ijk); + else reorientation = Mat44ToMat33(&splineControlPoint->qto_ijk); // Useful variables int x, y, incr0; @@ -361,13 +359,11 @@ void reg_cubic_spline_jacobian2D(nifti_image *splineControlPoint, else transformation=referenceImage->qto_xyz; // affine: mm to mm if(splineControlPoint->num_ext>0) - transformation=reg_mat44_mul( - reinterpret_cast(splineControlPoint->ext_list[0].edata), - &transformation); + transformation=reinterpret_cast(*splineControlPoint->ext_list[0].edata) * transformation; // grid: mm to voxel if(splineControlPoint->sform_code>0) - transformation=reg_mat44_mul(&(splineControlPoint->sto_ijk), &transformation); - else transformation=reg_mat44_mul(&(splineControlPoint->qto_ijk), &transformation); + transformation=splineControlPoint->sto_ijk * transformation; + else transformation=splineControlPoint->qto_ijk * transformation; float imageCoord[3], gridCoord[3], basis; imageCoord[2]=0; @@ -380,7 +376,7 @@ void reg_cubic_spline_jacobian2D(nifti_image *splineControlPoint, { imageCoord[0]=x; // Compute the position in the grid - reg_mat44_mul(&transformation,imageCoord,gridCoord); + Mat44Mul(transformation,imageCoord,gridCoord); // Compute the anterior node coord pre[0]=Floor(gridCoord[0]); pre[1]=Floor(gridCoord[1]); @@ -539,8 +535,8 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint, // Define a matrice to reorient the Jacobian matrices and normalise them by the grid spacing mat33 reorientation,jacobianMatrix; if(splineControlPoint->sform_code>0) - reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_ijk); - else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_ijk); + reorientation = Mat44ToMat33(&splineControlPoint->sto_ijk); + else reorientation = Mat44ToMat33(&splineControlPoint->qto_ijk); // Useful variables int x, y, z, incr0; @@ -771,13 +767,11 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint, else transformation=referenceImage->qto_xyz; // affine: mm to mm if(splineControlPoint->num_ext>0) - transformation=reg_mat44_mul( - reinterpret_cast(splineControlPoint->ext_list[0].edata), - &transformation); + transformation=reinterpret_cast(*splineControlPoint->ext_list[0].edata) * transformation; // grid: mm to voxel if(splineControlPoint->sform_code>0) - transformation=reg_mat44_mul(&(splineControlPoint->sto_ijk), &transformation); - else transformation=reg_mat44_mul(&(splineControlPoint->qto_ijk), &transformation); + transformation=splineControlPoint->sto_ijk * transformation; + else transformation=splineControlPoint->qto_ijk * transformation; float imageCoord[3], gridCoord[3], basis; for(z=0; znz; z++) @@ -792,7 +786,7 @@ void reg_cubic_spline_jacobian3D(nifti_image *splineControlPoint, { imageCoord[0]=x; // Compute the position in the grid - reg_mat44_mul(&transformation,imageCoord,gridCoord); + Mat44Mul(transformation,imageCoord,gridCoord); // Compute the anterior node coord pre[0]=Floor(gridCoord[0]); pre[1]=Floor(gridCoord[1]); @@ -1342,8 +1336,8 @@ void reg_spline_jacobianDetGradient2D(nifti_image *splineControlPoint, // Matrices to be used to convert the gradient from voxel to mm mat33 jacobianMatrix, reorientation; if(splineControlPoint->sform_code>0) - reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_xyz); - else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_xyz); + reorientation = Mat44ToMat33(&splineControlPoint->sto_xyz); + else reorientation = Mat44ToMat33(&splineControlPoint->qto_xyz); // Ratio to be used for normalisation size_t jacobianNumber; @@ -1580,8 +1574,8 @@ void reg_spline_jacobianDetGradient3D(nifti_image *splineControlPoint, // Matrices to be used to convert the gradient from voxel to mm mat33 jacobianMatrix, reorientation; if(splineControlPoint->sform_code>0) - reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_xyz); - else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_xyz); + reorientation = Mat44ToMat33(&splineControlPoint->sto_xyz); + else reorientation = Mat44ToMat33(&splineControlPoint->qto_xyz); // Ratio to be used for normalisation size_t jacobianNumber; @@ -1946,8 +1940,8 @@ double reg_spline_correctFolding2D(nifti_image *splineControlPoint, mat33 jacobianMatrix, reorientation; if(splineControlPoint->sform_code>0) - reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_xyz); - else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_xyz); + reorientation = Mat44ToMat33(&splineControlPoint->sto_xyz); + else reorientation = Mat44ToMat33(&splineControlPoint->qto_xyz); const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 3); DataType *controlPointPtrX = static_cast(splineControlPoint->data); @@ -2195,8 +2189,8 @@ double reg_spline_correctFolding3D(nifti_image *splineControlPoint, mat33 jacobianMatrix, reorientation; if(splineControlPoint->sform_code>0) - reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_xyz); - else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_xyz); + reorientation = Mat44ToMat33(&splineControlPoint->sto_xyz); + else reorientation = Mat44ToMat33(&splineControlPoint->qto_xyz); const size_t nodeNumber = NiftiImage::calcVoxelNumber(splineControlPoint, 3); DataType *controlPointPtrX = static_cast(splineControlPoint->data); @@ -2629,13 +2623,13 @@ void reg_defField_getJacobianMap2D(nifti_image *deformationField, if(deformationField->sform_code>0) { reg_getRealImageSpacing(deformationField,spacing); - reorientation=nifti_mat33_inverse(nifti_mat33_polar(reg_mat44_to_mat33(&deformationField->sto_xyz))); + reorientation=nifti_mat33_inverse(nifti_mat33_polar(Mat44ToMat33(&deformationField->sto_xyz))); } else { spacing[0]=deformationField->dx; spacing[1]=deformationField->dy; - reorientation=nifti_mat33_inverse(nifti_mat33_polar(reg_mat44_to_mat33(&deformationField->qto_xyz))); + reorientation=nifti_mat33_inverse(nifti_mat33_polar(Mat44ToMat33(&deformationField->qto_xyz))); } DataType *deformationPtrX = static_cast(deformationField->data); @@ -2738,14 +2732,14 @@ void reg_defField_getJacobianMap3D(nifti_image *deformationField, if(deformationField->sform_code>0) { reg_getRealImageSpacing(deformationField,spacing); - reorientation=nifti_mat33_inverse(nifti_mat33_polar(reg_mat44_to_mat33(&deformationField->sto_xyz))); + reorientation=nifti_mat33_inverse(nifti_mat33_polar(Mat44ToMat33(&deformationField->sto_xyz))); } else { spacing[0]=deformationField->dx; spacing[1]=deformationField->dy; spacing[2]=deformationField->dz; - reorientation=nifti_mat33_inverse(nifti_mat33_polar(reg_mat44_to_mat33(&deformationField->qto_xyz))); + reorientation=nifti_mat33_inverse(nifti_mat33_polar(Mat44ToMat33(&deformationField->qto_xyz))); } DataType *deformationPtrX = static_cast(deformationField->data); @@ -2943,11 +2937,11 @@ void reg_defField_GetJacobianMatFromFlowField_core(mat33* jacobianMatrices, // The Jacobian matrices are initialised with identity or the initial affine mat33 affineMatrix; - reg_mat33_eye(&affineMatrix); + Mat33Eye(&affineMatrix); if(flowFieldImage->num_ext>0) { if(flowFieldImage->ext_list[0].edata!=nullptr) - affineMatrix = reg_mat44_to_mat33(reinterpret_cast(flowFieldImage->ext_list[0].edata)); + affineMatrix = Mat44ToMat33(reinterpret_cast(flowFieldImage->ext_list[0].edata)); else NR_FATAL_ERROR("The affine matrix is expected to be stored in the flow field"); } const size_t voxelNumber = NiftiImage::calcVoxelNumber(flowFieldImage, 3); @@ -2982,7 +2976,7 @@ void reg_defField_GetJacobianMatFromFlowField_core(mat33* jacobianMatrices, if(flowFieldImage->num_ext>1) { if(flowFieldImage->ext_list[1].edata!=nullptr) - affineMatrix = reg_mat44_to_mat33(reinterpret_cast(flowFieldImage->ext_list[1].edata)); + affineMatrix = Mat44ToMat33(reinterpret_cast(flowFieldImage->ext_list[1].edata)); else NR_FATAL_ERROR("The affine matrix is expected to be stored in the flow field"); for(size_t i=0; isform_code > 0) - reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_ijk); - else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_ijk); + reorientation = Mat44ToMat33(&splineControlPoint->sto_ijk); + else reorientation = Mat44ToMat33(&splineControlPoint->qto_ijk); #ifdef _OPENMP #pragma omp parallel for default(none) \ @@ -583,8 +583,8 @@ double reg_spline_approxLinearEnergyValue3D(const nifti_image *splineControlPoin // Matrix to use to convert the gradient from mm to voxel mat33 reorientation; if (splineControlPoint->sform_code > 0) - reorientation = reg_mat44_to_mat33(&splineControlPoint->sto_ijk); - else reorientation = reg_mat44_to_mat33(&splineControlPoint->qto_ijk); + reorientation = Mat44ToMat33(&splineControlPoint->sto_ijk); + else reorientation = Mat44ToMat33(&splineControlPoint->qto_ijk); #ifdef _OPENMP #pragma omp parallel for default(none) \ @@ -688,7 +688,7 @@ void reg_spline_approxLinearEnergyGradient2D(const nifti_image *splineControlPoi set_first_order_basis_values(basisX, basisY); // Matrix to use to convert the gradient from mm to voxel - const mat33 reorientation = reg_mat44_to_mat33(splineControlPoint->sform_code > 0 ? &splineControlPoint->sto_ijk : &splineControlPoint->qto_ijk); + const mat33 reorientation = Mat44ToMat33(splineControlPoint->sform_code > 0 ? &splineControlPoint->sto_ijk : &splineControlPoint->qto_ijk); const mat33 invReorientation = nifti_mat33_inverse(reorientation); const DataType approxRatio = weight / static_cast(nodeNumber); @@ -756,7 +756,7 @@ void reg_spline_approxLinearEnergyGradient3D(const nifti_image *splineControlPoi set_first_order_basis_values(basisX, basisY, basisZ); // Matrix to use to convert the gradient from mm to voxel - const mat33 reorientation = reg_mat44_to_mat33(splineControlPoint->sform_code > 0 ? &splineControlPoint->sto_ijk : &splineControlPoint->qto_ijk); + const mat33 reorientation = Mat44ToMat33(splineControlPoint->sform_code > 0 ? &splineControlPoint->sto_ijk : &splineControlPoint->qto_ijk); const mat33 invReorientation = nifti_mat33_inverse(reorientation); const DataType approxRatio = weight / static_cast(nodeNumber); @@ -863,9 +863,9 @@ double reg_spline_getLandmarkDistance_core(const nifti_image *controlPointImage, const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3); double constraintValue = 0; size_t l, index; - float refPosition[4]; - float defPosition[4]; - float floPosition[4]; + float refPosition[3]; + float defPosition[3]; + float floPosition[3]; int previous[3], a, b, c; DataType basisX[4], basisY[4], basisZ[4], basis; const mat44 *gridRealToVox = &(controlPointImage->qto_ijk); @@ -888,9 +888,8 @@ double reg_spline_getLandmarkDistance_core(const nifti_image *controlPointImage, refPosition[2] = landmarkReference[l * imageDim + 2]; floPosition[2] = landmarkFloating[l * imageDim + 2]; } else refPosition[2] = floPosition[2] = 0; - refPosition[3] = floPosition[3] = 1; // Convert the reference position to voxel in the control point grid space - reg_mat44_mul(gridRealToVox, refPosition, defPosition); + Mat44Mul(*gridRealToVox, refPosition, defPosition); // Extract the corresponding nodes previous[0] = Floor(defPosition[0]) - 1; @@ -1003,7 +1002,7 @@ void reg_spline_getLandmarkDistanceGradient_core(const nifti_image *controlPoint floPosition[2] = landmarkFloating[l * imageDim + 2]; } else refPosition[2] = floPosition[2] = 0; // Convert the reference position to voxel in the control point grid space - reg_mat44_mul(gridRealToVox, refPosition, defPosition); + Mat44Mul(*gridRealToVox, refPosition, defPosition); if (imageDim == 2) defPosition[2] = 0; // Extract the corresponding nodes previous[0] = Floor(defPosition[0]) - 1; diff --git a/reg-lib/cpu/_reg_maths.h b/reg-lib/cpu/_reg_maths.h deleted file mode 100644 index 42c0cddd..00000000 --- a/reg-lib/cpu/_reg_maths.h +++ /dev/null @@ -1,200 +0,0 @@ -/** - * @file _reg_maths.h - * @brief Library that contains small math routines - * @author Marc Modat - * @date 25/03/2009 - * - * Created by Marc Modat on 25/03/2009. - * Copyright (c) 2009-2018, University College London - * Copyright (c) 2018, NiftyReg Developers. - * All rights reserved. - * See the LICENSE.txt file in the nifty_reg root folder - * - */ - -#pragma once - -#include "RNifti.h" - -#ifdef _OPENMP -#include -#endif - -#if USE_SSE -#include -#include -#ifdef __SSE3__ -#include -#endif -#endif - -#define _USE_MATH_DEFINES -#include - -#ifdef __CUDACC__ -#define DEVICE __host__ __device__ -#else -#define DEVICE -#endif - -typedef enum { - DEF_FIELD, - DISP_FIELD, - CUB_SPLINE_GRID, - DEF_VEL_FIELD, - DISP_VEL_FIELD, - SPLINE_VEL_GRID, - LIN_SPLINE_GRID -} NREG_TRANS_TYPE; - -/* *************************************************************** */ -namespace NiftyReg { -/* *************************************************************** */ -// The functions in the standard library are slower; so, these are implemented -template -DEVICE inline T Square(const T& x) { - return x * x; -} -template -DEVICE inline T Cube(const T& x) { - return x * x * x; -} -template -DEVICE inline int Floor(const T& x) { - const int i = static_cast(x); - return i - (x < i); -} -template -DEVICE inline int Ceil(const T& x) { - const int i = static_cast(x); - return i + (x > i); -} -template -DEVICE inline int Round(const T& x) { - return static_cast(x + (x >= 0 ? 0.5 : -0.5)); -} -/* *************************************************************** */ -} // namespace NiftyReg -/* *************************************************************** */ -template -T* reg_matrix1DAllocate(size_t arraySize); -/* *************************************************************** */ -template -void reg_matrix1DDeallocate(T* mat); -/* *************************************************************** */ -template -T** reg_matrix2DAllocate(size_t arraySizeX, size_t arraySizeY); -/* *************************************************************** */ -template -void reg_matrix2DDeallocate(size_t arraySizeX, T** mat); -/* *************************************************************** */ -template -T** reg_matrix2DTranspose(T** mat, size_t arraySizeX, size_t arraySizeY); -/* *************************************************************** */ -template -T** reg_matrix2DMultiply(T** mat1, size_t mat1X, size_t mat1Y, T** mat2, size_t mat2X, size_t mat2Y, bool transposeMat2); -template -void reg_matrix2DMultiply(T** mat1, size_t mat1X, size_t mat1Y, T** mat2, size_t mat2X, size_t mat2Y, T** res, bool transposeMat2); -/* *************************************************************** */ -template -T* reg_matrix2DVectorMultiply(T** mat, size_t m, size_t n, T* vect); -template -void reg_matrix2DVectorMultiply(T** mat, size_t m, size_t n, T* vect, T* res); -/* *************************************************************** */ -/** @brief Add two 3-by-3 matrices -*/ -mat33 reg_mat33_add(mat33 const* A, mat33 const* B); -mat33 operator+(mat33 A, mat33 B); -/* *************************************************************** */ -/** @brief Multiply two 3-by-3 matrices -*/ -mat33 reg_mat33_mul(mat33 const* A, - mat33 const* B); -mat33 operator*(mat33 A, - mat33 B); -/* *************************************************************** */ -//The mat33 represent a 3x3 matrix -void reg_mat33_mul(mat44 const* mat, float const* in, float *out); -void reg_mat33_mul(mat33 const* mat, float const* in, float *out); -/* *************************************************************** */ -/** @brief Subtract two 3-by-3 matrices -*/ -mat33 reg_mat33_minus(mat33 const* A, mat33 const* B); -mat33 operator-(mat33 A, mat33 B); -/* *************************************************************** */ -/** @brief Transpose a 3-by-3 matrix -*/ -mat33 reg_mat33_trans(mat33 A); -/* *************************************************************** */ -/** @brief Diagonalize a 3-by-3 matrix -*/ -void reg_mat33_diagonalize(mat33 const* A, mat33 * Q, mat33 * D); -/* *************************************************************** */ -/** @brief Set up a 3-by-3 matrix with an identity -*/ -void reg_mat33_eye(mat33 *mat); -/* *************************************************************** */ -/** @brief Compute the determinant of a 3-by-3 matrix -*/ -void reg_mat33_to_nan(mat33 *A); -/* *************************************************************** */ -/** @brief Transform a mat44 to a mat33 matrix -*/ -mat33 reg_mat44_to_mat33(mat44 const* A); -void reg_heapSort(float *array_tmp, int *index_tmp, int blockNum); -/* *************************************************************** */ -template -void reg_heapSort(T *array_tmp,int blockNum); -/* *************************************************************** */ -bool operator==(mat44 A,mat44 B); -/* *************************************************************** */ -bool operator!=(mat44 A,mat44 B); -/* *************************************************************** */ -/** @brief Multiply two 4-by-4 matrices - */ -mat44 reg_mat44_mul(mat44 const* A, - mat44 const* B); -mat44 operator*(mat44 A, - mat44 B); -/* *************************************************************** */ -/** @brief Multiply a vector with a 4-by-4 matrix - */ -void reg_mat44_mul(mat44 const* mat, - float const* in, - float *out); - -void reg_mat44_mul(mat44 const* mat, - double const* in, - double *out); -/* *************************************************************** */ -/** @brief Multiply a 4-by-4 matrix with a scalar - */ -mat44 reg_mat44_mul(mat44 const* mat, - double scalar); -/* *************************************************************** */ -/** @brief Add two 4-by-4 matrices - */ -mat44 reg_mat44_add(mat44 const* A, mat44 const* B); -mat44 operator+(mat44 A,mat44 B); -/* *************************************************************** */ -/** @brief Subtract two 4-by-4 matrices - */ -mat44 reg_mat44_minus(mat44 const* A, mat44 const* B); -mat44 operator-(mat44 A,mat44 B); -/* *************************************************************** */ -/** @brief Set up a 4-by-4 matrix with an identity - */ -void reg_mat44_eye(mat44 *mat); -/* *************************************************************** */ -/** @brief Compute the determinant of a 4-by-4 matrix - */ -template T reg_mat44_det(mat44 const* A); -/* *************************************************************** */ -/** @brief Display a mat44 matrix - */ -void reg_mat44_disp(const mat44& mat, const std::string& title); -/* *************************************************************** */ -double get_square_distance3D(float * first_point3D, float * second_point3D); -/* *************************************************************** */ -double get_square_distance2D(float * first_point2D, float * second_point2D); -/* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_maths_eigen.cpp b/reg-lib/cpu/_reg_maths_eigen.cpp deleted file mode 100644 index 444a1721..00000000 --- a/reg-lib/cpu/_reg_maths_eigen.cpp +++ /dev/null @@ -1,205 +0,0 @@ -#define USE_EIGEN - -#include "_reg_maths_eigen.h" -#include "_reg_maths.h" -#include "Debug.hpp" - -// Eigen headers are in there because of the nvcc preprocessing step -#include "Eigen/Core" -#include "Eigen/SVD" -#include "unsupported/Eigen/MatrixFunctions" - -//_reg_maths_eigen.cpp -/* *************************************************************** */ -/** @brief SVD -* @param in input matrix to decompose - in place -* @param size_m row -* @param size_n colomn -* @param w diagonal term -* @param v rotation part -*/ -template -void svd(T **in, size_t size_m, size_t size_n, T * w, T **v) { - if (size_m == 0 || size_n == 0) - NR_FATAL_ERROR("The specified matrix is empty"); - -#ifdef _WIN32 - long sm, sn, sn2; - long size__m = (long)size_m, size__n = (long)size_n; -#else - size_t sm, sn, sn2; - size_t size__m = size_m, size__n = size_n; -#endif - Eigen::MatrixXd m(size_m, size_n); - - //Convert to Eigen matrix -#ifdef _OPENMP -#pragma omp parallel for default(none) \ - shared(in,m, size__m, size__n) \ - private(sn) -#endif - for (sm = 0; sm < size__m; sm++) - { - for (sn = 0; sn < size__n; sn++) - { - m(sm, sn) = static_cast(in[sm][sn]); - } - } - - Eigen::JacobiSVD svd(m, Eigen::ComputeThinU | Eigen::ComputeThinV); - -#ifdef _OPENMP -#pragma omp parallel for default(none) \ - shared(in,svd,v,w, size__n,size__m) \ - private(sn2, sm) -#endif - for (sn = 0; sn < size__n; sn++) { - w[sn] = static_cast(svd.singularValues()(sn)); - for (sn2 = 0; sn2 < size__n; sn2++) { - v[sn2][sn] = static_cast(svd.matrixV()(sn2, sn)); - } - for (sm = 0; sm < size__m; sm++) { - in[sm][sn] = static_cast(svd.matrixU()(sm, sn)); - } - } -} -template void svd(float **in, size_t m, size_t n, float * w, float **v); -template void svd(double **in, size_t m, size_t n, double * w, double **v); -/* *************************************************************** */ -template -T reg_matrix2DDet(T** mat, size_t m, size_t n) { - if (m != n) - NR_FATAL_ERROR("The matrix have to be square: [" + std::to_string(m) + " " + std::to_string(n) + "]"); - - double res; - if (m == 2) { - res = static_cast(mat[0][0]) * static_cast(mat[1][1]) - static_cast(mat[1][0]) * static_cast(mat[0][1]); - } - else if (m == 3) { - res = (static_cast(mat[0][0]) * (static_cast(mat[1][1]) * static_cast(mat[2][2]) - static_cast(mat[1][2]) * static_cast(mat[2][1]))) - - (static_cast(mat[0][1]) * (static_cast(mat[1][0]) * static_cast(mat[2][2]) - static_cast(mat[1][2]) * static_cast(mat[2][0]))) + - (static_cast(mat[0][2]) * (static_cast(mat[1][0]) * static_cast(mat[2][1]) - static_cast(mat[1][1]) * static_cast(mat[2][0]))); - } - else { - // Convert to Eigen format - Eigen::MatrixXd eigenRes(m, n); - for (size_t i = 0; i < m; i++) { - for (size_t j = 0; j < n; j++) { - eigenRes(i, j) = static_cast(mat[i][j]); - } - } - res = eigenRes.determinant(); - } - return static_cast(res); -} -template float reg_matrix2DDet(float** mat, size_t m, size_t n); -template double reg_matrix2DDet(double** mat, size_t m, size_t n); -/* *************************************************************** */ -void reg_mat33_expm(mat33 *in_tensor) -{ - int sm, sn; - Eigen::Matrix3d tensor; - - // Convert to Eigen format - for (sm = 0; sm < 3; sm++){ - for (sn = 0; sn < 3; sn++){ - float val=in_tensor->m[sm][sn]; - if(val!=val) return; - tensor(sm, sn) = static_cast(val); - } - } - - // Compute exp(E) - tensor = tensor.exp(); - - // Convert the result to mat33 format - for (sm = 0; sm < 3; sm++) - for (sn = 0; sn < 3; sn++) - in_tensor->m[sm][sn] = static_cast(tensor(sm, sn)); -} -/* *************************************************************** */ -mat44 reg_mat44_expm(mat44 const* mat) -{ - mat44 X; - Eigen::Matrix4d m; - for (size_t i = 0; i < 4; ++i) { - for (size_t j = 0; j < 4; ++j) { - m(i, j) = static_cast(mat->m[i][j]); - } - } - m = m.exp(); - // - for (size_t i = 0; i < 4; ++i) - for (size_t j = 0; j < 4; ++j) - X.m[i][j] = static_cast(m(i, j)); - - return X; -} -/* *************************************************************** */ -void reg_mat33_logm(mat33 *in_tensor) -{ - int sm, sn; - Eigen::Matrix3d tensor; - - // Convert to Eigen format - bool all_zeros = true; - double det = 0; - for (sm = 0; sm < 3; sm++){ - for (sn = 0; sn < 3; sn++){ - float val=in_tensor->m[sm][sn]; - if(val!=0.f) all_zeros=false; - if(val!=val) return; - tensor(sm, sn) = static_cast(val); - } - } - // Actually R case requires invertible and no negative real ev, - // but the only observed case so far was non-invertible. - // determinant is not a perfect check for invertibility and - // identity with zero not great either, but the alternative - // is a general eigensolver and the logarithm function should - // suceed unless convergence just isn't happening. - det = tensor.determinant(); - if(all_zeros || det == 0){ - reg_mat33_to_nan(in_tensor); - return; - } - - // Compute the actual matrix log - tensor = tensor.log(); - - // Convert the result to mat33 format - for (sm = 0; sm < 3; sm++) - for (sn = 0; sn < 3; sn++) - in_tensor->m[sm][sn] = static_cast(tensor(sm, sn)); -} -/* *************************************************************** */ -mat44 reg_mat44_logm(mat44 const* mat) -{ - mat44 X; - Eigen::Matrix4d m; - for (size_t i = 0; i < 4; ++i) { - for (size_t j = 0; j < 4; ++j) { - m(i, j) = static_cast(mat->m[i][j]); - } - } - m = m.log(); - for (size_t i = 0; i < 4; ++i) - for (size_t j = 0; j < 4; ++j) - X.m[i][j] = static_cast(m(i, j)); - return X; -} -/* *************************************************************** */ -mat44 reg_mat44_avg2(mat44 const* A, mat44 const* B) -{ - mat44 out; - mat44 logA = reg_mat44_logm(A); - mat44 logB = reg_mat44_logm(B); - for (int i = 0; i < 4; ++i) { - logA.m[3][i] = 0.f; - logB.m[3][i] = 0.f; - } - logA = reg_mat44_add(&logA, &logB); - out = reg_mat44_mul(&logA, 0.5); - return reg_mat44_expm(&out); - -} diff --git a/reg-lib/cpu/_reg_maths_eigen.h b/reg-lib/cpu/_reg_maths_eigen.h deleted file mode 100644 index 20867b69..00000000 --- a/reg-lib/cpu/_reg_maths_eigen.h +++ /dev/null @@ -1,36 +0,0 @@ -#pragma once - -#include "RNifti.h" - -/* *************************************************************** */ -/* Functions calling the Eigen library */ -/* See http://eigen.tuxfamily.org/index.php?title=Main_Page */ -/* *************************************************************** */ - -/* *************************************************************** */ -template -void svd(T **in, size_t m, size_t n, T * w, T **v); -/* *************************************************************** */ -template -T reg_matrix2DDet(T** mat, size_t m, size_t n); -/* *************************************************************** */ -/** @brief Compute the log of a 3-by-3 matrix -*/ -void reg_mat33_expm(mat33 *in_tensor); -/* *************************************************************** */ -/** @brief Compute the exp of a 4-by-4 matrix -*/ -mat44 reg_mat44_expm(const mat44 *mat); -/* *************************************************************** */ -/** @brief Compute the log of a 3-by-3 matrix -*/ -void reg_mat33_logm(mat33 *in_tensor); -/* *************************************************************** */ -/** @brief Compute the log of a 4-by-4 matrix -*/ -mat44 reg_mat44_logm(const mat44 *mat); -/* *************************************************************** */ -/** @brief Compute the average of two matrices using a log-euclidean -* framework -*/ -mat44 reg_mat44_avg2(mat44 const* A, mat44 const* b); diff --git a/reg-lib/cpu/_reg_resampling.cpp b/reg-lib/cpu/_reg_resampling.cpp index 0d9d1785..6fe684c5 100755 --- a/reg-lib/cpu/_reg_resampling.cpp +++ b/reg-lib/cpu/_reg_resampling.cpp @@ -11,8 +11,7 @@ */ #include "_reg_resampling.h" -#include "_reg_maths.h" -#include "_reg_maths_eigen.h" +#include "Maths.hpp" #include "_reg_tools.h" #define SINC_KERNEL_RADIUS 3 @@ -165,7 +164,7 @@ void reg_dti_resampling_preprocessing(nifti_image *floatingImage, diffTensor[tid].m[2][2] = static_cast(floatingIntensityZZ[floatingIndex]); // Compute the log of the diffusion tensor. - reg_mat33_logm(&diffTensor[tid]); + Mat33Logm(&diffTensor[tid]); // Write this out as a new image floatingIntensityXX[floatingIndex] = static_cast(diffTensor[tid].m[0][0]); @@ -257,10 +256,10 @@ void reg_dti_resampling_postprocessing(nifti_image *inputImage, inputTensor[tid].m[2][2] = static_cast(inputIntensityZZ[warpedIndex]); // Exponentiate the warped tensor if (warpedImage == nullptr) { - reg_mat33_expm(&inputTensor[tid]); + Mat33Expm(&inputTensor[tid]); testSum = 0; } else { - reg_mat33_eye(&warpedTensor[tid]); + Mat33Eye(&warpedTensor[tid]); warpedTensor[tid].m[0][0] = static_cast(warpedXX[warpedIndex]); warpedTensor[tid].m[0][1] = static_cast(warpedXY[warpedIndex]); warpedTensor[tid].m[1][0] = warpedTensor[tid].m[0][1]; @@ -396,7 +395,7 @@ void ResampleImage3D(const nifti_image *floatingImage, world[2] = static_cast(deformationFieldPtrZ[index]); // real -> voxel; floating space - reg_mat44_mul(floatingIJKMatrix, world, position); + Mat44Mul(*floatingIJKMatrix, world, position); previous[0] = Floor(position[0]); previous[1] = Floor(position[1]); @@ -576,7 +575,7 @@ void ResampleImage2D(const nifti_image *floatingImage, world[2] = 0; // real -> voxel; floating space - reg_mat44_mul(floatingIJKMatrix, world, position); + Mat44Mul(*floatingIJKMatrix, world, position); previous[0] = Floor(position[0]); previous[1] = Floor(position[1]); @@ -921,7 +920,7 @@ void ResampleImage3D_PSF_Sinc(const nifti_image *floatingImage, psfWorld[2] /= resamplingWeightSum; // real -> voxel; floating space - reg_mat44_mul(floatingIJKMatrix, psfWorld, position); + Mat44Mul(*floatingIJKMatrix, psfWorld, position); previous[0] = Floor(position[0]); previous[1] = Floor(position[1]); @@ -1128,11 +1127,11 @@ void ResampleImage3D_PSF(const nifti_image *floatingImage, // T=P+A*S*At A = nifti_mat33_inverse(jacMat[index]); - ASAt = A * S * reg_mat33_trans(A); + ASAt = A * S * Mat33Trans(A); TmS = T - ASAt; - reg_mat33_diagonalize(&TmS, &TmS_EigVec, &TmS_EigVal); + Mat33Diagonalize(&TmS, &TmS_EigVec, &TmS_EigVal); // If eigen values are less than 0, set them to 0. // Also, invert the eigenvalues to estimate the inverse. @@ -1148,7 +1147,7 @@ void ResampleImage3D_PSF(const nifti_image *floatingImage, } } - TmS_EigVec_trans = reg_mat33_trans(TmS_EigVec); + TmS_EigVec_trans = Mat33Trans(TmS_EigVec); P = TmS_EigVec * TmS_EigVal * TmS_EigVec_trans; invP = TmS_EigVec * TmS_EigVal_inv * TmS_EigVec_trans; currentDeterminant = TmS_EigVal.m[0][0] * TmS_EigVal.m[1][1] * TmS_EigVal.m[2][2]; @@ -1157,16 +1156,16 @@ void ResampleImage3D_PSF(const nifti_image *floatingImage, A = nifti_mat33_inverse(jacMat[index]); - ASAt = A * S * reg_mat33_trans(A); + ASAt = A * S * Mat33Trans(A); mat33 S_EigVec, S_EigVal; // % rotate S // [ZS, DS] = eig(S); - reg_mat33_diagonalize(&ASAt, &S_EigVec, &S_EigVal); + Mat33Diagonalize(&ASAt, &S_EigVec, &S_EigVal); // T1 = ZS'*T*ZS; - mat33 T1 = reg_mat33_trans(S_EigVec) * T * S_EigVec; + mat33 T1 = Mat33Trans(S_EigVec) * T * S_EigVec; // % Volume-preserving scale of S to make it isotropic // detS = prod(diag(DS)); @@ -1191,12 +1190,12 @@ void ResampleImage3D_PSF(const nifti_image *floatingImage, } // T2 = LambdaN*T1*LambdaN'; - mat33 T2 = LambdaN * T1 * reg_mat33_trans(LambdaN); + mat33 T2 = LambdaN * T1 * Mat33Trans(LambdaN); // % Rotate to make thing axis-aligned // [ZT2, DT2] = eig(T2); mat33 T2_EigVec, T2_EigVal; - reg_mat33_diagonalize(&T2, &T2_EigVec, &T2_EigVal); + Mat33Diagonalize(&T2, &T2_EigVec, &T2_EigVal); // % Optimal solution in the transformed axis-aligned space // DP2 = diag(max(sqrt(detS),diag(DT2))); @@ -1213,11 +1212,11 @@ void ResampleImage3D_PSF(const nifti_image *floatingImage, // % Roll back the transforms // Q = ZS*invLambdaN*ZT2*DQ2*ZT2'*invLambdaN*ZS' - mat33 Q = S_EigVec * invLambdaN * T2_EigVec * DP2 * reg_mat33_trans(T2_EigVec) * invLambdaN * reg_mat33_trans(S_EigVec); + mat33 Q = S_EigVec * invLambdaN * T2_EigVec * DP2 * Mat33Trans(T2_EigVec) * invLambdaN * Mat33Trans(S_EigVec); // P=Q-S TmS = Q - S; invP = nifti_mat33_inverse(TmS); - reg_mat33_diagonalize(&TmS, &TmS_EigVec, &TmS_EigVal); + Mat33Diagonalize(&TmS, &TmS_EigVec, &TmS_EigVal); currentDeterminant = TmS_EigVal.m[0][0] * TmS_EigVal.m[1][1] * TmS_EigVal.m[2][2]; currentDeterminant = currentDeterminant < 0.000001f ? 0.000001f : currentDeterminant; @@ -1322,7 +1321,7 @@ void ResampleImage3D_PSF(const nifti_image *floatingImage, psfWorld[2] /= resamplingWeightSum; // real -> voxel; floating space - reg_mat44_mul(floatingIJKMatrix, psfWorld, position); + Mat44Mul(*floatingIJKMatrix, psfWorld, position); previous[0] = Floor(position[0]); previous[1] = Floor(position[1]); @@ -1531,7 +1530,7 @@ void reg_bilinearResampleGradient(const nifti_image *floatingImage, } // Reorientation matrix is assessed in order to remove the rigid component - mat33 reorient = nifti_mat33_inverse(nifti_mat33_polar(reg_mat44_to_mat33(&deformationField->sto_xyz))); + mat33 reorient = nifti_mat33_inverse(nifti_mat33_polar(Mat44ToMat33(&deformationField->sto_xyz))); // Some useful variables mat33 jacMat; @@ -1701,7 +1700,7 @@ void reg_trilinearResampleGradient(const nifti_image *floatingImage, } // Reorientation matrix is assessed in order to remove the rigid component - mat33 reorient = nifti_mat33_inverse(nifti_mat33_polar(reg_mat44_to_mat33(&deformationField->sto_xyz))); + mat33 reorient = nifti_mat33_inverse(nifti_mat33_polar(Mat44ToMat33(&deformationField->sto_xyz))); // Some useful variables mat33 jacMat; @@ -1980,7 +1979,7 @@ void TrilinearImageGradient(const nifti_image *floatingImage, world[2] = (FieldType)deformationFieldPtrZ[index]; /* real -> voxel; floating space */ - reg_mat44_mul(floatingIJKMatrix, world, position); + Mat44Mul(*floatingIJKMatrix, world, position); previous[0] = Floor(position[0]); previous[1] = Floor(position[1]); @@ -2257,7 +2256,7 @@ void CubicSplineImageGradient3D(const nifti_image *floatingImage, world[2] = (FieldType)deformationFieldPtrZ[index]; /* real -> voxel; floating space */ - reg_mat44_mul(floatingIJKMatrix, world, position); + Mat44Mul(*floatingIJKMatrix, world, position); previous[0] = Floor(position[0]); previous[1] = Floor(position[1]); diff --git a/reg-lib/cpu/_reg_ssd.cpp b/reg-lib/cpu/_reg_ssd.cpp index b000fbd4..2a4bddfb 100755 --- a/reg-lib/cpu/_reg_ssd.cpp +++ b/reg-lib/cpu/_reg_ssd.cpp @@ -349,7 +349,7 @@ void GetDiscretisedValueSSD_core3D(nifti_image *controlPointGridImage, mat44 *image_mm2vox = &refImage->qto_ijk; if (refImage->sform_code > 0) image_mm2vox = &refImage->sto_ijk; - mat44 grid2img_vox = reg_mat44_mul(image_mm2vox, grid_vox2mm); + mat44 grid2img_vox = *image_mm2vox * *grid_vox2mm; // Compute the block size const int blockSize[3] = { @@ -404,7 +404,7 @@ void GetDiscretisedValueSSD_core3D(nifti_image *controlPointGridImage, controlPointGridImage->nx * cpy + cpx; // Compute the corresponding image voxel position - reg_mat44_mul(&grid2img_vox, gridVox, imageVox); + Mat44Mul(grid2img_vox, gridVox, imageVox); imageVox[0] = static_cast(Round(imageVox[0])); imageVox[1] = static_cast(Round(imageVox[1])); imageVox[2] = static_cast(Round(imageVox[2])); diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp index 017d6029..21aa5869 100755 --- a/reg-lib/cpu/_reg_tools.cpp +++ b/reg-lib/cpu/_reg_tools.cpp @@ -253,22 +253,22 @@ void reg_tools_removeSCLInfo(nifti_image *image) { void reg_getRealImageSpacing(nifti_image *image, float *spacingValues) { float indexVoxel1[3] = { 0, 0, 0 }; float indexVoxel2[3], realVoxel1[3], realVoxel2[3]; - reg_mat44_mul(&(image->sto_xyz), indexVoxel1, realVoxel1); + Mat44Mul(image->sto_xyz, indexVoxel1, realVoxel1); indexVoxel2[1] = indexVoxel2[2] = 0; indexVoxel2[0] = 1; - reg_mat44_mul(&(image->sto_xyz), indexVoxel2, realVoxel2); + Mat44Mul(image->sto_xyz, indexVoxel2, realVoxel2); spacingValues[0] = sqrtf(Square(realVoxel1[0] - realVoxel2[0]) + Square(realVoxel1[1] - realVoxel2[1]) + Square(realVoxel1[2] - realVoxel2[2])); indexVoxel2[0] = indexVoxel2[2] = 0; indexVoxel2[1] = 1; - reg_mat44_mul(&(image->sto_xyz), indexVoxel2, realVoxel2); + Mat44Mul(image->sto_xyz, indexVoxel2, realVoxel2); spacingValues[1] = sqrtf(Square(realVoxel1[0] - realVoxel2[0]) + Square(realVoxel1[1] - realVoxel2[1]) + Square(realVoxel1[2] - realVoxel2[2])); if (image->nz > 1) { indexVoxel2[0] = indexVoxel2[1] = 0; indexVoxel2[2] = 1; - reg_mat44_mul(&(image->sto_xyz), indexVoxel2, realVoxel2); + Mat44Mul(image->sto_xyz, indexVoxel2, realVoxel2); spacingValues[2] = sqrtf(Square(realVoxel1[0] - realVoxel2[0]) + Square(realVoxel1[1] - realVoxel2[1]) + Square(realVoxel1[2] - realVoxel2[2])); } } diff --git a/reg-lib/cpu/_reg_tools.h b/reg-lib/cpu/_reg_tools.h index 650e6c71..f0475e5d 100755 --- a/reg-lib/cpu/_reg_tools.h +++ b/reg-lib/cpu/_reg_tools.h @@ -20,7 +20,7 @@ #include #include #include -#include "_reg_maths.h" +#include "Maths.hpp" #include "Debug.hpp" using namespace NiftyReg; diff --git a/reg-lib/cuda/CudaCommon.hpp b/reg-lib/cuda/CudaCommon.hpp index b5872e56..9bd528c3 100644 --- a/reg-lib/cuda/CudaCommon.hpp +++ b/reg-lib/cuda/CudaCommon.hpp @@ -118,5 +118,21 @@ UniqueTextureObjectPtr CreateTextureObject(const DataType *devPtr, const cudaChannelFormatKind channelFormat, const unsigned channelCount); /* *************************************************************** */ +template +__device__ __inline__ int3 IndexToDims(const int index, const int3 dims) { + int quot = 0, rem; + if constexpr (is3d) + Divide(index, dims.x * dims.y, quot, rem); + else rem = index; + const int z = quot; + Divide(rem, dims.x, quot, rem); + const int y = quot, x = rem; + return { x, y, z }; +} +/* *************************************************************** */ +__device__ __inline__ int3 IndexToDims(const int index, const int3 dims) { + return dims.z > 1 ? IndexToDims(index, dims) : IndexToDims(index, dims); +} +/* *************************************************************** */ } // namespace NiftyReg::Cuda /* *************************************************************** */ diff --git a/reg-lib/cuda/CudaGlobalTransformation.cu b/reg-lib/cuda/CudaGlobalTransformation.cu index a5c0b82f..076f91b2 100644 --- a/reg-lib/cuda/CudaGlobalTransformation.cu +++ b/reg-lib/cuda/CudaGlobalTransformation.cu @@ -11,7 +11,6 @@ */ #include "CudaGlobalTransformation.hpp" -#include "_reg_common_cuda_kernels.cu" /* *************************************************************** */ template @@ -21,7 +20,7 @@ void GetAffineDeformationField(const mat44 *affineMatrix, const size_t voxelNumber = NiftiImage::calcVoxelNumber(deformationField, is3d ? 3 : 2); const int3 imageDims = make_int3(deformationField->nx, deformationField->ny, deformationField->nz); const mat44 *targetMatrix = deformationField->sform_code > 0 ? &deformationField->sto_xyz : &deformationField->qto_xyz; - const mat44 transMatrix = compose ? *affineMatrix : reg_mat44_mul(affineMatrix, targetMatrix); + const mat44 transMatrix = compose ? *affineMatrix : *affineMatrix * *targetMatrix; Cuda::UniqueTextureObjectPtr deformationFieldTexturePtr; cudaTextureObject_t deformationFieldTexture = 0; if constexpr (compose) { deformationFieldTexturePtr = Cuda::CreateTextureObject(deformationFieldCuda, voxelNumber, cudaChannelFormatKindFloat, 4); @@ -36,7 +35,7 @@ void GetAffineDeformationField(const mat44 *affineMatrix, float4 defVal = tex1Dfetch(deformationFieldTexture, index); voxel[0] = defVal.x; voxel[1] = defVal.y; voxel[2] = defVal.z; } else { - auto dims = reg_indexToDims_cuda(index, imageDims); + auto dims = Cuda::IndexToDims(index, imageDims); voxel[0] = static_cast(dims.x); voxel[1] = static_cast(dims.y); voxel[2] = static_cast(dims.z); @@ -44,7 +43,7 @@ void GetAffineDeformationField(const mat44 *affineMatrix, // The transformation is applied float position[3]; - reg_mat44_mul_cuda(transMatrix, voxel, position); + Mat44Mul(transMatrix, voxel, position); // The deformation field (real coordinates) is stored deformationFieldCuda[index] = make_float4(position[0], position[1], position[2], 0); diff --git a/reg-lib/cuda/CudaLocalTransformation.cu b/reg-lib/cuda/CudaLocalTransformation.cu index 2c98a8ca..ce733da6 100644 --- a/reg-lib/cuda/CudaLocalTransformation.cu +++ b/reg-lib/cuda/CudaLocalTransformation.cu @@ -84,7 +84,7 @@ __device__ SecondDerivative GetApproxSecondDerivative(const int index, cudaTextureObject_t controlPointTexture, const int3 controlPointImageDim, const Basis2nd basis) { - const auto [x, y, z] = reg_indexToDims_cuda(index, controlPointImageDim); + const auto [x, y, z] = IndexToDims(index, controlPointImageDim); if (!isGradient && (x < 1 || x >= controlPointImageDim.x - 1 || y < 1 || y >= controlPointImageDim.y - 1 || (is3d && (z < 1 || z >= controlPointImageDim.z - 1)))) return {}; @@ -206,7 +206,7 @@ void ApproxBendingEnergyGradient(nifti_image *controlPointImage, const float approxRatio = bendingEnergyWeight / (float)controlPointNumber; thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), controlPointNumber, [controlPointImageDim, basis, secondDerivativesTexture, transGradientCuda, approxRatio]__device__(const int index) { - const auto [x, y, z] = reg_indexToDims_cuda(index, controlPointImageDim); + const auto [x, y, z] = IndexToDims(index, controlPointImageDim); typename SecondDerivative::Type gradientValue{}; if constexpr (is3d) { for (int c = z - 1, basInd = 0; c < z + 2; c++) { @@ -270,7 +270,7 @@ void ComputeApproxJacobianValues(const nifti_image *controlPointImage, auto controlPointTexture = Cuda::CreateTextureObject(controlPointImageCuda, controlPointNumber, cudaChannelFormatKindFloat, 4); // Need to reorient the Jacobian matrix using the header information - real to voxel conversion - const mat33 reorientation = reg_mat44_to_mat33(controlPointImage->sform_code > 0 ? &controlPointImage->sto_xyz : &controlPointImage->qto_xyz); + const mat33 reorientation = Mat44ToMat33(controlPointImage->sform_code > 0 ? &controlPointImage->sto_xyz : &controlPointImage->qto_xyz); // The Jacobian matrix is computed for every control point if (controlPointImage->nz > 1) { @@ -306,7 +306,7 @@ void ComputeJacobianValues(const nifti_image *controlPointImage, auto controlPointTexture = Cuda::CreateTextureObject(controlPointImageCuda, controlPointNumber, cudaChannelFormatKindFloat, 4); // Need to reorient the Jacobian matrix using the header information - real to voxel conversion - const mat33 reorientation = reg_mat44_to_mat33(controlPointImage->sform_code > 0 ? &controlPointImage->sto_xyz : &controlPointImage->qto_xyz); + const mat33 reorientation = Mat44ToMat33(controlPointImage->sform_code > 0 ? &controlPointImage->sto_xyz : &controlPointImage->qto_xyz); // The Jacobian matrix is computed for every voxel if (controlPointImage->nz > 1) { @@ -398,7 +398,7 @@ void GetJacobianPenaltyTermGradient(const nifti_image *referenceImage, } // Need to disorient the Jacobian matrix using the header information - voxel to real conversion - const mat33 reorientation = reg_mat44_to_mat33(controlPointImage->sform_code > 0 ? &controlPointImage->sto_ijk : &controlPointImage->qto_ijk); + const mat33 reorientation = Mat44ToMat33(controlPointImage->sform_code > 0 ? &controlPointImage->sto_ijk : &controlPointImage->qto_ijk); const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3); const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); @@ -511,7 +511,7 @@ double CorrectFolding(const nifti_image *referenceImage, } // Need to disorient the Jacobian matrix using the header information - voxel to real conversion - const mat33 reorientation = reg_mat44_to_mat33(controlPointImage->sform_code > 0 ? &controlPointImage->sto_ijk : &controlPointImage->qto_ijk); + const mat33 reorientation = Mat44ToMat33(controlPointImage->sform_code > 0 ? &controlPointImage->sto_ijk : &controlPointImage->qto_ijk); const size_t controlPointNumber = NiftiImage::calcVoxelNumber(controlPointImage, 3); const int3 controlPointImageDim = make_int3(controlPointImage->nx, controlPointImage->ny, controlPointImage->nz); @@ -555,7 +555,7 @@ void GetDeformationFromDisplacement(nifti_image *image, float4 *imageCuda) { const int3 imageDim{ image->nx, image->ny, image->nz }; thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), voxelNumber, [=]__device__(const int index) { - const auto [x, y, z] = reg_indexToDims_cuda(index, imageDim); + const auto [x, y, z] = IndexToDims(index, imageDim); const float4 initialPosition{ float(x) * affineMatrix.m[0][0] + float(y) * affineMatrix.m[0][1] + (is3d ? float(z) * affineMatrix.m[0][2] : 0.f) + affineMatrix.m[0][3], @@ -866,7 +866,7 @@ double ApproxLinearEnergy(const nifti_image *controlPointGrid, const size_t voxelNumber = NiftiImage::calcVoxelNumber(controlPointGrid, 3); // Matrix to use to convert the gradient from mm to voxel - const mat33 reorientation = reg_mat44_to_mat33(controlPointGrid->sform_code > 0 ? &controlPointGrid->sto_ijk : &controlPointGrid->qto_ijk); + const mat33 reorientation = Mat44ToMat33(controlPointGrid->sform_code > 0 ? &controlPointGrid->sto_ijk : &controlPointGrid->qto_ijk); // Store the basis values since they are constant as the value is approximated at the control point positions only Basis1st basis; @@ -903,7 +903,7 @@ void ApproxLinearEnergyGradient(const nifti_image *controlPointGrid, const float approxRatio = weight / static_cast(voxelNumber); // Matrix to use to convert the gradient from mm to voxel - const mat33 reorientation = reg_mat44_to_mat33(controlPointGrid->sform_code > 0 ? &controlPointGrid->sto_ijk : &controlPointGrid->qto_ijk); + const mat33 reorientation = Mat44ToMat33(controlPointGrid->sform_code > 0 ? &controlPointGrid->sto_ijk : &controlPointGrid->qto_ijk); const mat33 invReorientation = nifti_mat33_inverse(reorientation); // Store the basis values since they are constant as the value is approximated at the control point positions only @@ -932,7 +932,7 @@ void ApproxLinearEnergyGradient(const nifti_image *controlPointGrid, thrust::for_each_n(thrust::device, thrust::make_counting_iterator(0), voxelNumber, [ transGradCuda, dispMatricesTexture, cppDims, approxRatio, basis, invReorientation ]__device__(const int index) { - const auto [x, y, z] = reg_indexToDims_cuda(index, cppDims); + const auto [x, y, z] = IndexToDims(index, cppDims); auto gradVal = transGradCuda[index]; if constexpr (is3d) { diff --git a/reg-lib/cuda/CudaLocalTransformationKernels.cu b/reg-lib/cuda/CudaLocalTransformationKernels.cu index b7639f76..ebb95539 100644 --- a/reg-lib/cuda/CudaLocalTransformationKernels.cu +++ b/reg-lib/cuda/CudaLocalTransformationKernels.cu @@ -10,11 +10,112 @@ * */ -#include "_reg_common_cuda_kernels.cu" +#include "CudaCommon.hpp" /* *************************************************************** */ namespace NiftyReg::Cuda { /* *************************************************************** */ +__device__ __inline__ mat33 Mat33Inverse(const mat33 r) { + /* INPUT MATRIX: */ + const double r11 = r.m[0][0]; const double r12 = r.m[0][1]; const double r13 = r.m[0][2]; /* [ r11 r12 r13 ] */ + const double r21 = r.m[1][0]; const double r22 = r.m[1][1]; const double r23 = r.m[1][2]; /* [ r21 r22 r23 ] */ + const double r31 = r.m[2][0]; const double r32 = r.m[2][1]; const double r33 = r.m[2][2]; /* [ r31 r32 r33 ] */ + + double deti = (r11 * r22 * r33 - r11 * r32 * r23 - r21 * r12 * r33 + + r21 * r32 * r13 + r31 * r12 * r23 - r31 * r22 * r13); + + if (deti != 0.0) deti = 1.0 / deti; + + mat33 q; + q.m[0][0] = float(deti * (r22 * r33 - r32 * r23)); + q.m[0][1] = float(deti * (-r12 * r33 + r32 * r13)); + q.m[0][2] = float(deti * (r12 * r23 - r22 * r13)); + + q.m[1][0] = float(deti * (-r21 * r33 + r31 * r23)); + q.m[1][1] = float(deti * (r11 * r33 - r31 * r13)); + q.m[1][2] = float(deti * (-r11 * r23 + r21 * r13)); + + q.m[2][0] = float(deti * (r21 * r32 - r31 * r22)); + q.m[2][1] = float(deti * (-r11 * r32 + r31 * r12)); + q.m[2][2] = float(deti * (r11 * r22 - r21 * r12)); + + return q; +} +/* *************************************************************** */ +__device__ __inline__ float Mat33Determ(const mat33 r) { + /* INPUT MATRIX: */ + const double r11 = r.m[0][0]; const double r12 = r.m[0][1]; const double r13 = r.m[0][2]; /* [ r11 r12 r13 ] */ + const double r21 = r.m[1][0]; const double r22 = r.m[1][1]; const double r23 = r.m[1][2]; /* [ r21 r22 r23 ] */ + const double r31 = r.m[2][0]; const double r32 = r.m[2][1]; const double r33 = r.m[2][2]; /* [ r31 r32 r33 ] */ + + return float(r11 * r22 * r33 - r11 * r32 * r23 - r21 * r12 * r33 + + r21 * r32 * r13 + r31 * r12 * r23 - r31 * r22 * r13); +} +/* *************************************************************** */ +__device__ __inline__ float Mat33RowNorm(const mat33 a) { + float r1 = fabs(a.m[0][0]) + fabs(a.m[0][1]) + fabs(a.m[0][2]); + const float r2 = fabs(a.m[1][0]) + fabs(a.m[1][1]) + fabs(a.m[1][2]); + const float r3 = fabs(a.m[2][0]) + fabs(a.m[2][1]) + fabs(a.m[2][2]); + if (r1 < r2) r1 = r2; + if (r1 < r3) r1 = r3; + return r1; +} +/* *************************************************************** */ +__device__ __inline__ float Mat33ColNorm(const mat33 a) { + float r1 = fabs(a.m[0][0]) + fabs(a.m[1][0]) + fabs(a.m[2][0]); + const float r2 = fabs(a.m[0][1]) + fabs(a.m[1][1]) + fabs(a.m[2][1]); + const float r3 = fabs(a.m[0][2]) + fabs(a.m[1][2]) + fabs(a.m[2][2]); + if (r1 < r2) r1 = r2; + if (r1 < r3) r1 = r3; + return r1; +} +/* *************************************************************** */ +__device__ __inline__ mat33 Mat33Polar(mat33 x) { + // Force matrix to be nonsingular + float gam = Mat33Determ(x); + while (gam == 0.0) { // Perturb matrix + gam = 0.00001f * (0.001f + Mat33RowNorm(x)); + x.m[0][0] += gam; x.m[1][1] += gam; x.m[2][2] += gam; + gam = Mat33Determ(x); + } + + mat33 z; + float gmi, dif = 1.0f; + int k = 0; + while (1) { + const mat33 y = Mat33Inverse(x); + if (dif > 0.3) { // Far from convergence + const float alp = sqrt(Mat33RowNorm(x) * Mat33ColNorm(x)); + const float bet = sqrt(Mat33RowNorm(y) * Mat33ColNorm(y)); + gam = sqrt(bet / alp); + gmi = 1.f / gam; + } else { + gam = gmi = 1.0f; // Close to convergence + } + z.m[0][0] = 0.5f * (gam * x.m[0][0] + gmi * y.m[0][0]); + z.m[0][1] = 0.5f * (gam * x.m[0][1] + gmi * y.m[1][0]); + z.m[0][2] = 0.5f * (gam * x.m[0][2] + gmi * y.m[2][0]); + z.m[1][0] = 0.5f * (gam * x.m[1][0] + gmi * y.m[0][1]); + z.m[1][1] = 0.5f * (gam * x.m[1][1] + gmi * y.m[1][1]); + z.m[1][2] = 0.5f * (gam * x.m[1][2] + gmi * y.m[2][1]); + z.m[2][0] = 0.5f * (gam * x.m[2][0] + gmi * y.m[0][2]); + z.m[2][1] = 0.5f * (gam * x.m[2][1] + gmi * y.m[1][2]); + z.m[2][2] = 0.5f * (gam * x.m[2][2] + gmi * y.m[2][2]); + + dif = (fabs(z.m[0][0] - x.m[0][0]) + fabs(z.m[0][1] - x.m[0][1]) + + fabs(z.m[0][2] - x.m[0][2]) + fabs(z.m[1][0] - x.m[1][0]) + + fabs(z.m[1][1] - x.m[1][1]) + fabs(z.m[1][2] - x.m[1][2]) + + fabs(z.m[2][0] - x.m[2][0]) + fabs(z.m[2][1] - x.m[2][1]) + + fabs(z.m[2][2] - x.m[2][2])); + + k = k + 1; + if (k > 100 || dif < 3.e-6) break; // Convergence or exhaustion + x = z; + } + + return z; +} +/* *************************************************************** */ template __device__ __inline__ void GetBasisSplineValues(const float basis, float *values) { const float ff = Square(basis); @@ -206,7 +307,7 @@ __device__ void GetDeformationField3d(float4 *deformationField, nodePre = { Floor(xVoxel), Floor(yVoxel), Floor(zVoxel) }; basis = { xVoxel - float(nodePre.x--), yVoxel - float(nodePre.y--), zVoxel - float(nodePre.z--) }; } else { // starting deformation field is blank - !composition - const auto [x, y, z] = reg_indexToDims_cuda(index, referenceImageDim); + const auto [x, y, z] = IndexToDims(index, referenceImageDim); // The "nearest previous" node is determined [0,0,0] const float xVoxel = float(x) / controlPointVoxelSpacing.x; const float yVoxel = float(y) / controlPointVoxelSpacing.y; @@ -269,7 +370,7 @@ __device__ void GetDeformationField2d(float4 *deformationField, nodePre = { Floor(xVoxel), Floor(yVoxel) }; basis = { xVoxel - float(nodePre.x--), yVoxel - float(nodePre.y--) }; } else { // starting deformation field is blank - !composition - const auto [x, y, z] = reg_indexToDims_cuda(index, referenceImageDim); + const auto [x, y, z] = IndexToDims(index, referenceImageDim); // The "nearest previous" node is determined [0,0,0] const float xVoxel = float(x) / controlPointVoxelSpacing.x; const float yVoxel = float(y) / controlPointVoxelSpacing.y; @@ -312,7 +413,7 @@ __global__ void GetApproxJacobianValues2d(float *jacobianMatrices, const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid < controlPointNumber) { int quot, rem; - reg_div_cuda(tid, controlPointImageDim.x, quot, rem); + Divide(tid, controlPointImageDim.x, quot, rem); const int y = quot, x = rem; if (0 < x && x < controlPointImageDim.x - 1 && 0 < y && y < controlPointImageDim.y - 1) { @@ -374,9 +475,9 @@ __global__ void GetApproxJacobianValues3d(float *jacobianMatrices, const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid < controlPointNumber) { int quot, rem; - reg_div_cuda(tid, controlPointImageDim.x * controlPointImageDim.y, quot, rem); + Divide(tid, controlPointImageDim.x * controlPointImageDim.y, quot, rem); const int z = quot; - reg_div_cuda(rem, controlPointImageDim.x, quot, rem); + Divide(rem, controlPointImageDim.x, quot, rem); const int y = quot, x = rem; if (0 < x && x < controlPointImageDim.x - 1 && 0 < y && y < controlPointImageDim.y - 1 && 0 < z && z < controlPointImageDim.z - 1) { @@ -459,7 +560,7 @@ __global__ void GetJacobianValues2d(float *jacobianMatrices, const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid < voxelNumber) { int quot, rem; - reg_div_cuda(tid, referenceImageDim.x, quot, rem); + Divide(tid, referenceImageDim.x, quot, rem); const int y = quot, x = rem; // the "nearest previous" node is determined [0,0,0] @@ -528,9 +629,9 @@ __global__ void GetJacobianValues3d(float *jacobianMatrices, const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid < voxelNumber) { int quot, rem; - reg_div_cuda(tid, referenceImageDim.x * referenceImageDim.y, quot, rem); + Divide(tid, referenceImageDim.x * referenceImageDim.y, quot, rem); const int z = quot; - reg_div_cuda(rem, referenceImageDim.x, quot, rem); + Divide(rem, referenceImageDim.x, quot, rem); const int y = quot, x = rem; // the "nearest previous" node is determined [0,0,0] @@ -677,7 +778,7 @@ __global__ void ComputeApproxJacGradient2d(float4 *gradient, const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid < controlPointNumber) { int quot, rem; - reg_div_cuda(tid, controlPointImageDim.x, quot, rem); + Divide(tid, controlPointImageDim.x, quot, rem); const int y = quot, x = rem; float2 jacobianGradient{}; @@ -729,9 +830,9 @@ __global__ void ComputeApproxJacGradient3d(float4 *gradient, const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid < controlPointNumber) { int quot, rem; - reg_div_cuda(tid, controlPointImageDim.x * controlPointImageDim.y, quot, rem); + Divide(tid, controlPointImageDim.x * controlPointImageDim.y, quot, rem); const int z = quot; - reg_div_cuda(rem, controlPointImageDim.x, quot, rem); + Divide(rem, controlPointImageDim.x, quot, rem); const int y = quot, x = rem; float3 jacobianGradient{}; @@ -787,7 +888,7 @@ __global__ void ComputeJacGradient2d(float4 *gradient, const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid < controlPointNumber) { int quot, rem; - reg_div_cuda(tid, controlPointImageDim.x, quot, rem); + Divide(tid, controlPointImageDim.x, quot, rem); const int y = quot, x = rem; float2 jacobianGradient{}; @@ -842,9 +943,9 @@ __global__ void ComputeJacGradient3d(float4 *gradient, const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid < controlPointNumber) { int quot, rem; - reg_div_cuda(tid, controlPointImageDim.x * controlPointImageDim.y, quot, rem); + Divide(tid, controlPointImageDim.x * controlPointImageDim.y, quot, rem); const int z = quot; - reg_div_cuda(rem, controlPointImageDim.x, quot, rem); + Divide(rem, controlPointImageDim.x, quot, rem); const int y = quot, x = rem; float3 jacobianGradient{}; @@ -917,9 +1018,9 @@ __global__ void ApproxCorrectFolding3d(float4 *controlPointGrid, const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid < controlPointNumber) { int quot, rem; - reg_div_cuda(tid, controlPointImageDim.x * controlPointImageDim.y, quot, rem); + Divide(tid, controlPointImageDim.x * controlPointImageDim.y, quot, rem); const int z = quot; - reg_div_cuda(rem, controlPointImageDim.x, quot, rem); + Divide(rem, controlPointImageDim.x, quot, rem); const int y = quot, x = rem; float3 foldingCorrection{}; @@ -988,9 +1089,9 @@ __global__ void CorrectFolding3d(float4 *controlPointGrid, const unsigned tid = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (tid < controlPointNumber) { int quot, rem; - reg_div_cuda(tid, controlPointImageDim.x * controlPointImageDim.y, quot, rem); + Divide(tid, controlPointImageDim.x * controlPointImageDim.y, quot, rem); const int z = quot; - reg_div_cuda(rem, controlPointImageDim.x, quot, rem); + Divide(rem, controlPointImageDim.x, quot, rem); const int y = quot, x = rem; float3 foldingCorrection{}; @@ -1144,7 +1245,7 @@ __device__ static mat33 CreateDisplacementMatrix(const int index, const int3& cppDims, const Basis1st& basis, const mat33& reorientation) { - const auto [x, y, z] = reg_indexToDims_cuda(index, cppDims); + const auto [x, y, z] = IndexToDims(index, cppDims); if (x < 1 || x >= cppDims.x - 1 || y < 1 || y >= cppDims.y - 1 || (is3d && (z < 1 || z >= cppDims.z - 1))) return {}; @@ -1189,10 +1290,10 @@ __device__ static mat33 CreateDisplacementMatrix(const int index, } } // Convert from mm to voxel - matrix = reg_mat33_mul_cuda(reorientation, matrix); + matrix = reorientation * matrix; // Removing the rotation component - const mat33 r = reg_mat33_inverse_cuda(reg_mat33_polar_cuda(matrix)); - matrix = reg_mat33_mul_cuda(r, matrix); + const mat33 r = Mat33Inverse(Mat33Polar(matrix)); + matrix = r * matrix; // Convert to displacement matrix.m[0][0]--; matrix.m[1][1]--; if constexpr (is3d) matrix.m[2][2]--; diff --git a/reg-lib/cuda/CudaOptimiser.cu b/reg-lib/cuda/CudaOptimiser.cu index fb662d23..7a4e13a7 100644 --- a/reg-lib/cuda/CudaOptimiser.cu +++ b/reg-lib/cuda/CudaOptimiser.cu @@ -1,5 +1,4 @@ #include "CudaOptimiser.hpp" -#include "_reg_common_cuda_kernels.cu" #include /* *************************************************************** */ diff --git a/reg-lib/cuda/CudaResampling.cu b/reg-lib/cuda/CudaResampling.cu index 5c21bee8..58c33998 100644 --- a/reg-lib/cuda/CudaResampling.cu +++ b/reg-lib/cuda/CudaResampling.cu @@ -11,7 +11,6 @@ */ #include "CudaResampling.hpp" -#include "_reg_common_cuda_kernels.cu" /* *************************************************************** */ namespace NiftyReg::Cuda { @@ -237,19 +236,19 @@ template static float3 GetRealImageSpacing(const nifti_image *image) { float3 spacing{}; float indexVoxel1[3]{}, indexVoxel2[3], realVoxel1[3], realVoxel2[3]; - reg_mat44_mul(&image->sto_xyz, indexVoxel1, realVoxel1); + Mat44Mul(image->sto_xyz, indexVoxel1, realVoxel1); indexVoxel2[1] = indexVoxel2[2] = 0; indexVoxel2[0] = 1; - reg_mat44_mul(&image->sto_xyz, indexVoxel2, realVoxel2); + Mat44Mul(image->sto_xyz, indexVoxel2, realVoxel2); spacing.x = sqrtf(Square(realVoxel1[0] - realVoxel2[0]) + Square(realVoxel1[1] - realVoxel2[1]) + Square(realVoxel1[2] - realVoxel2[2])); indexVoxel2[0] = indexVoxel2[2] = 0; indexVoxel2[1] = 1; - reg_mat44_mul(&image->sto_xyz, indexVoxel2, realVoxel2); + Mat44Mul(image->sto_xyz, indexVoxel2, realVoxel2); spacing.y = sqrtf(Square(realVoxel1[0] - realVoxel2[0]) + Square(realVoxel1[1] - realVoxel2[1]) + Square(realVoxel1[2] - realVoxel2[2])); if constexpr (is3d) { indexVoxel2[0] = indexVoxel2[1] = 0; indexVoxel2[2] = 1; - reg_mat44_mul(&image->sto_xyz, indexVoxel2, realVoxel2); + Mat44Mul(image->sto_xyz, indexVoxel2, realVoxel2); spacing.z = sqrtf(Square(realVoxel1[0] - realVoxel2[0]) + Square(realVoxel1[1] - realVoxel2[1]) + Square(realVoxel1[2] - realVoxel2[2])); } @@ -290,7 +289,7 @@ void ResampleGradient(const nifti_image *floatingImage, make_float3(warpedImage->dx, warpedImage->dy, warpedImage->dz); // Reorientation matrix is assessed in order to remove the rigid component - const mat33 reorient = nifti_mat33_inverse(nifti_mat33_polar(reg_mat44_to_mat33(&deformationField->sto_xyz))); + const mat33 reorient = nifti_mat33_inverse(nifti_mat33_polar(Mat44ToMat33(&deformationField->sto_xyz))); thrust::for_each_n(thrust::device, maskCuda, activeVoxelNumber, [ warpedImageCuda, floatingTexture, deformationFieldTexture, floatingMatrix, floatingDims, defFieldDims, realSpacing, reorient, paddingValue @@ -344,7 +343,7 @@ void ResampleGradient(const nifti_image *floatingImage, // Compute the Jacobian matrix constexpr float basis[] = { 1.f, 0.f }; constexpr float deriv[] = { -1.f, 1.f }; - auto [x, y, z] = reg_indexToDims_cuda(index, defFieldDims); + auto [x, y, z] = IndexToDims(index, defFieldDims); mat33 jacMat{}; for (char c = 0; c < (is3d ? 2 : 1); c++) { if constexpr (is3d) { @@ -407,7 +406,7 @@ void ResampleGradient(const nifti_image *floatingImage, } } // reorient and scale the Jacobian matrix - jacMat = reg_mat33_mul_cuda(reorient, jacMat); + jacMat = reorient * jacMat; jacMat.m[0][0] /= realSpacing.x; jacMat.m[0][1] /= realSpacing.y; jacMat.m[1][0] /= realSpacing.x; diff --git a/reg-lib/cuda/CudaTools.cu b/reg-lib/cuda/CudaTools.cu index 91455a2c..6a3f39dd 100644 --- a/reg-lib/cuda/CudaTools.cu +++ b/reg-lib/cuda/CudaTools.cu @@ -41,23 +41,23 @@ void VoxelCentricToNodeCentric(const nifti_image *nodeImage, if (nodeImage->num_ext > 0 && nodeImage->ext_list[0].edata) { mat44 temp = *(reinterpret_cast(nodeImage->ext_list[0].edata)); temp = nifti_mat44_inverse(temp); - transformation = reg_mat44_mul(&temp, &transformation); + transformation = temp * transformation; } // Millimetre to voxel in the reference image - transformation = reg_mat44_mul(voxelImage->sform_code > 0 ? &voxelImage->sto_ijk : &voxelImage->qto_ijk, &transformation); + transformation = (voxelImage->sform_code > 0 ? voxelImage->sto_ijk : voxelImage->qto_ijk) * transformation; // The information has to be reoriented // Voxel to millimetre contains the orientation of the image that is used // to compute the spatial gradient (floating image) mat33 reorientation; if (voxelToMillimetre) { - reorientation = reg_mat44_to_mat33(voxelToMillimetre); + reorientation = Mat44ToMat33(voxelToMillimetre); if (nodeImage->num_ext > 0 && nodeImage->ext_list[0].edata) { - mat33 temp = reg_mat44_to_mat33(reinterpret_cast(nodeImage->ext_list[0].edata)); + mat33 temp = Mat44ToMat33(reinterpret_cast(nodeImage->ext_list[0].edata)); temp = nifti_mat33_inverse(temp); reorientation = nifti_mat33_mul(temp, reorientation); } - } else reg_mat33_eye(&reorientation); + } else Mat33Eye(&reorientation); // The information has to be weighted float ratio[3] = { nodeImage->dx, nodeImage->dy, nodeImage->dz }; for (int i = 0; i < (is3d ? 3 : 2); ++i) { diff --git a/reg-lib/cuda/CudaToolsKernels.cu b/reg-lib/cuda/CudaToolsKernels.cu index 361bbdac..f502ac4f 100644 --- a/reg-lib/cuda/CudaToolsKernels.cu +++ b/reg-lib/cuda/CudaToolsKernels.cu @@ -8,7 +8,7 @@ * See the LICENSE.txt file in the nifty_reg root folder */ -#include "_reg_common_cuda_kernels.cu" +#include "CudaCommon.hpp" /* *************************************************************** */ namespace NiftyReg::Cuda { @@ -23,10 +23,10 @@ __device__ void VoxelCentricToNodeCentricKernel(float4 *nodeImageCuda, const mat33 reorientation, const int index) { // Calculate the node coordinates - const auto [x, y, z] = reg_indexToDims_cuda(index, nodeImageDims); + const auto [x, y, z] = IndexToDims(index, nodeImageDims); // Transform into voxel coordinates float voxelCoord[3], nodeCoord[3] = { static_cast(x), static_cast(y), static_cast(z) }; - reg_mat44_mul_cuda(transformation, nodeCoord, voxelCoord); + Mat44Mul(transformation, nodeCoord, voxelCoord); // Linear interpolation float basisX[2], basisY[2], basisZ[2], interpolatedValue[3]{}; @@ -64,7 +64,7 @@ __device__ void VoxelCentricToNodeCentricKernel(float4 *nodeImageCuda, } float reorientedValue[3]; - reg_mat33_mul_cuda(reorientation, interpolatedValue, weight, reorientedValue); + Mat33Mul(reorientation, interpolatedValue, weight, reorientedValue); nodeImageCuda[index] = { reorientedValue[0], reorientedValue[1], reorientedValue[2], 0 }; } /* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_common_cuda_kernels.cu b/reg-lib/cuda/_reg_common_cuda_kernels.cu deleted file mode 100644 index 4206931d..00000000 --- a/reg-lib/cuda/_reg_common_cuda_kernels.cu +++ /dev/null @@ -1,157 +0,0 @@ -/* - * Copyright (c) 2009-2018, University College London - * Copyright (c) 2018, NiftyReg Developers. - * All rights reserved. - * See the LICENSE.txt file in the nifty_reg root folder - */ - -#pragma once - -/* *************************************************************** */ -template -__device__ __inline__ void reg_mat33_mul_cuda(const mat33 mat, const float (&in)[3], const double weight, float (&out)[3]) { - out[0] = weight * (mat.m[0][0] * in[0] + mat.m[1][0] * in[1] + mat.m[2][0] * in[2]); - out[1] = weight * (mat.m[0][1] * in[0] + mat.m[1][1] * in[1] + mat.m[2][1] * in[2]); - if constexpr (is3d) - out[2] = weight * (mat.m[0][2] * in[0] + mat.m[1][2] * in[1] + mat.m[2][2] * in[2]); -} -/* *************************************************************** */ -template -__device__ __inline__ void reg_mat44_mul_cuda(const mat44 mat, const float (&in)[3], float (&out)[3]) { - out[0] = double(mat.m[0][0]) * double(in[0]) + double(mat.m[0][1]) * double(in[1]) + double(mat.m[0][2]) * double(in[2]) + double(mat.m[0][3]); - out[1] = double(mat.m[1][0]) * double(in[0]) + double(mat.m[1][1]) * double(in[1]) + double(mat.m[1][2]) * double(in[2]) + double(mat.m[1][3]); - if constexpr (is3d) - out[2] = double(mat.m[2][0]) * double(in[0]) + double(mat.m[2][1]) * double(in[1]) + double(mat.m[2][2]) * double(in[2]) + double(mat.m[2][3]); -} -/* *************************************************************** */ -__device__ __inline__ mat33 reg_mat33_mul_cuda(const mat33 a, const mat33 b) { - mat33 c; - for (int i = 0; i < 3; i++) - for (int j = 0; j < 3; j++) - c.m[i][j] = a.m[i][0] * b.m[0][j] + a.m[i][1] * b.m[1][j] + a.m[i][2] * b.m[2][j]; - return c; -} -/* *************************************************************** */ -__device__ __inline__ mat33 reg_mat33_inverse_cuda(const mat33 r) { - /* INPUT MATRIX: */ - const double r11 = r.m[0][0]; const double r12 = r.m[0][1]; const double r13 = r.m[0][2]; /* [ r11 r12 r13 ] */ - const double r21 = r.m[1][0]; const double r22 = r.m[1][1]; const double r23 = r.m[1][2]; /* [ r21 r22 r23 ] */ - const double r31 = r.m[2][0]; const double r32 = r.m[2][1]; const double r33 = r.m[2][2]; /* [ r31 r32 r33 ] */ - - double deti = (r11 * r22 * r33 - r11 * r32 * r23 - r21 * r12 * r33 + - r21 * r32 * r13 + r31 * r12 * r23 - r31 * r22 * r13); - - if (deti != 0.0) deti = 1.0 / deti; - - mat33 q; - q.m[0][0] = float(deti * (r22 * r33 - r32 * r23)); - q.m[0][1] = float(deti * (-r12 * r33 + r32 * r13)); - q.m[0][2] = float(deti * (r12 * r23 - r22 * r13)); - - q.m[1][0] = float(deti * (-r21 * r33 + r31 * r23)); - q.m[1][1] = float(deti * (r11 * r33 - r31 * r13)); - q.m[1][2] = float(deti * (-r11 * r23 + r21 * r13)); - - q.m[2][0] = float(deti * (r21 * r32 - r31 * r22)); - q.m[2][1] = float(deti * (-r11 * r32 + r31 * r12)); - q.m[2][2] = float(deti * (r11 * r22 - r21 * r12)); - - return q; -} -/* *************************************************************** */ -__device__ __inline__ float reg_mat33_determ_cuda(const mat33 r) { - /* INPUT MATRIX: */ - const double r11 = r.m[0][0]; const double r12 = r.m[0][1]; const double r13 = r.m[0][2]; /* [ r11 r12 r13 ] */ - const double r21 = r.m[1][0]; const double r22 = r.m[1][1]; const double r23 = r.m[1][2]; /* [ r21 r22 r23 ] */ - const double r31 = r.m[2][0]; const double r32 = r.m[2][1]; const double r33 = r.m[2][2]; /* [ r31 r32 r33 ] */ - - return float(r11 * r22 * r33 - r11 * r32 * r23 - r21 * r12 * r33 + - r21 * r32 * r13 + r31 * r12 * r23 - r31 * r22 * r13); -} -/* *************************************************************** */ -__device__ __inline__ float reg_mat33_rownorm_cuda(const mat33 a) { - float r1 = fabs(a.m[0][0]) + fabs(a.m[0][1]) + fabs(a.m[0][2]); - const float r2 = fabs(a.m[1][0]) + fabs(a.m[1][1]) + fabs(a.m[1][2]); - const float r3 = fabs(a.m[2][0]) + fabs(a.m[2][1]) + fabs(a.m[2][2]); - if (r1 < r2) r1 = r2; - if (r1 < r3) r1 = r3; - return r1; -} -/* *************************************************************** */ -__device__ __inline__ float reg_mat33_colnorm_cuda(const mat33 a) { - float r1 = fabs(a.m[0][0]) + fabs(a.m[1][0]) + fabs(a.m[2][0]); - const float r2 = fabs(a.m[0][1]) + fabs(a.m[1][1]) + fabs(a.m[2][1]); - const float r3 = fabs(a.m[0][2]) + fabs(a.m[1][2]) + fabs(a.m[2][2]); - if (r1 < r2) r1 = r2; - if (r1 < r3) r1 = r3; - return r1; -} -/* *************************************************************** */ -__device__ __inline__ mat33 reg_mat33_polar_cuda(mat33 x) { - // Force matrix to be nonsingular - float gam = reg_mat33_determ_cuda(x); - while (gam == 0.0) { // Perturb matrix - gam = 0.00001f * (0.001f + reg_mat33_rownorm_cuda(x)); - x.m[0][0] += gam; x.m[1][1] += gam; x.m[2][2] += gam; - gam = reg_mat33_determ_cuda(x); - } - - mat33 z; - float gmi, dif = 1.0f; - int k = 0; - while (1) { - const mat33 y = reg_mat33_inverse_cuda(x); - if (dif > 0.3) { // Far from convergence - const float alp = sqrt(reg_mat33_rownorm_cuda(x) * reg_mat33_colnorm_cuda(x)); - const float bet = sqrt(reg_mat33_rownorm_cuda(y) * reg_mat33_colnorm_cuda(y)); - gam = sqrt(bet / alp); - gmi = 1.f / gam; - } else { - gam = gmi = 1.0f; // Close to convergence - } - z.m[0][0] = 0.5f * (gam * x.m[0][0] + gmi * y.m[0][0]); - z.m[0][1] = 0.5f * (gam * x.m[0][1] + gmi * y.m[1][0]); - z.m[0][2] = 0.5f * (gam * x.m[0][2] + gmi * y.m[2][0]); - z.m[1][0] = 0.5f * (gam * x.m[1][0] + gmi * y.m[0][1]); - z.m[1][1] = 0.5f * (gam * x.m[1][1] + gmi * y.m[1][1]); - z.m[1][2] = 0.5f * (gam * x.m[1][2] + gmi * y.m[2][1]); - z.m[2][0] = 0.5f * (gam * x.m[2][0] + gmi * y.m[0][2]); - z.m[2][1] = 0.5f * (gam * x.m[2][1] + gmi * y.m[1][2]); - z.m[2][2] = 0.5f * (gam * x.m[2][2] + gmi * y.m[2][2]); - - dif = (fabs(z.m[0][0] - x.m[0][0]) + fabs(z.m[0][1] - x.m[0][1]) + - fabs(z.m[0][2] - x.m[0][2]) + fabs(z.m[1][0] - x.m[1][0]) + - fabs(z.m[1][1] - x.m[1][1]) + fabs(z.m[1][2] - x.m[1][2]) + - fabs(z.m[2][0] - x.m[2][0]) + fabs(z.m[2][1] - x.m[2][1]) + - fabs(z.m[2][2] - x.m[2][2])); - - k = k + 1; - if (k > 100 || dif < 3.e-6) break; // Convergence or exhaustion - x = z; - } - - return z; -} -/* *************************************************************** */ -__device__ __inline__ void reg_div_cuda(const int num, const int denom, int& quot, int& rem) { - // This will be optimised by the compiler into a single div instruction - quot = num / denom; - rem = num % denom; -} -/* *************************************************************** */ -template -__device__ __inline__ int3 reg_indexToDims_cuda(const int index, const int3 dims) { - int quot = 0, rem; - if constexpr (is3d) - reg_div_cuda(index, dims.x * dims.y, quot, rem); - else rem = index; - const int z = quot; - reg_div_cuda(rem, dims.x, quot, rem); - const int y = quot, x = rem; - return { x, y, z }; -} -/* *************************************************************** */ -__device__ __inline__ int3 reg_indexToDims_cuda(const int index, const int3 dims) { - return dims.z > 1 ? reg_indexToDims_cuda(index, dims) : reg_indexToDims_cuda(index, dims); -} -/* *************************************************************** */ diff --git a/reg-lib/cuda/_reg_nmi_gpu.cu b/reg-lib/cuda/_reg_nmi_gpu.cu index b117e568..d97b569f 100755 --- a/reg-lib/cuda/_reg_nmi_gpu.cu +++ b/reg-lib/cuda/_reg_nmi_gpu.cu @@ -11,7 +11,6 @@ */ #include "_reg_nmi_gpu.h" -#include "_reg_common_cuda_kernels.cu" /* *************************************************************** */ reg_nmi_gpu::reg_nmi_gpu(): reg_nmi::reg_nmi() { diff --git a/reg-lib/cuda/affineDeformationKernel.cu b/reg-lib/cuda/affineDeformationKernel.cu index a9ec43a1..e9bfa38d 100644 --- a/reg-lib/cuda/affineDeformationKernel.cu +++ b/reg-lib/cuda/affineDeformationKernel.cu @@ -61,7 +61,7 @@ void launchAffine(mat44 *affineTransformation, float* trans = (float *)malloc(16 * sizeof(float)); const mat44 *targetMatrix = (deformationField->sform_code > 0) ? &(deformationField->sto_xyz) : &(deformationField->qto_xyz); - mat44 transformationMatrix = compose ? *affineTransformation : reg_mat44_mul(affineTransformation, targetMatrix); + mat44 transformationMatrix = compose ? *affineTransformation : *affineTransformation * *targetMatrix; mat44ToCptr(transformationMatrix, trans); NR_CUDA_SAFE_CALL(cudaMemcpy(trans_d, trans, 16 * sizeof(float), cudaMemcpyHostToDevice)); free(trans); diff --git a/reg-lib/cuda/blockMatchingKernel.cu b/reg-lib/cuda/blockMatchingKernel.cu index f70f277f..55781c4f 100644 --- a/reg-lib/cuda/blockMatchingKernel.cu +++ b/reg-lib/cuda/blockMatchingKernel.cu @@ -15,7 +15,7 @@ #include "_reg_tools.h" #include -#include "_reg_maths.h" +#include "Maths.hpp" //////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////// diff --git a/reg-lib/cuda/resampleKernel.cu b/reg-lib/cuda/resampleKernel.cu index cfbe514f..50bcb91c 100644 --- a/reg-lib/cuda/resampleKernel.cu +++ b/reg-lib/cuda/resampleKernel.cu @@ -3,7 +3,7 @@ #include #include #include"_reg_resampling.h" -#include"_reg_maths.h" +#include"Maths.hpp" #include "resampleKernel.h" #include "CudaCommon.hpp" #include"_reg_tools.h" @@ -59,18 +59,6 @@ __device__ __inline__ void interpolantCubicSpline(FieldTYPE ratio, FieldTYPE *ba basis[3] = (FieldTYPE) ((ratio - (double)1.0) * FF / (double)2.0); } /* *************************************************************** */ -__device__ __inline__ -void reg_mat44_eye(float *mat) { - mat[0 * 4 + 0] = 1.f; - mat[0 * 4 + 1] = mat[0 * 4 + 2] = mat[0 * 4 + 3] = 0.f; - mat[1 * 4 + 1] = 1.f; - mat[1 * 4 + 0] = mat[1 * 4 + 2] = mat[1 * 4 + 3] = 0.f; - mat[2 * 4 + 2] = 1.f; - mat[2 * 4 + 0] = mat[2 * 4 + 1] = mat[2 * 4 + 3] = 0.f; - mat[3 * 4 + 3] = 1.f; - mat[3 * 4 + 0] = mat[3 * 4 + 1] = mat[3 * 4 + 2] = 0.f; -} -/* *************************************************************** */ __inline__ __device__ void interpWindowedSincKernel(double relative, double *basis) { if (relative < 0) diff --git a/reg-test/reg_test_affineDeformationField.cpp b/reg-test/reg_test_affineDeformationField.cpp index 858b541b..dc551cf8 100644 --- a/reg-test/reg_test_affineDeformationField.cpp +++ b/reg-test/reg_test_affineDeformationField.cpp @@ -39,7 +39,7 @@ class AffineDeformationFieldTest { // Identity use case - 2D mat44 identity; - reg_mat44_eye(&identity); + Mat44Eye(&identity); // Test order [0,0] [1,0] [0,1] [1,1] vector identityResult2d{ { 0, 0, 0 }, { 1, 0, 0 }, { 0, 1, 0 }, { 1, 1, 0 } }; testData.emplace_back(TestData( @@ -63,7 +63,7 @@ class AffineDeformationFieldTest { // Translation - 2D mat44 translation; - reg_mat44_eye(&translation); + Mat44Eye(&translation); translation.m[0][3] = -0.5; translation.m[1][3] = 1.5; translation.m[2][3] = 0.75; @@ -109,7 +109,7 @@ class AffineDeformationFieldTest { // Full affine - 2D // Test order [0,0] [1,0] [0,1] [1,1] mat44 affine; - reg_mat44_eye(&affine); + Mat44Eye(&affine); affine.m[0][3] = -0.5; affine.m[1][3] = 1.5; affine.m[2][3] = 0.75; diff --git a/reg-test/reg_test_be.cpp b/reg-test/reg_test_be.cpp index cdc57493..7e8e8611 100644 --- a/reg-test/reg_test_be.cpp +++ b/reg-test/reg_test_be.cpp @@ -75,8 +75,8 @@ class BendingEnergyTest { // Set some scaling transformation in the transformations mat44 affine2d, affine3d; - reg_mat44_eye(&affine2d); - reg_mat44_eye(&affine3d); + Mat44Eye(&affine2d); + Mat44Eye(&affine3d); affine3d.m[0][0] = affine2d.m[0][0] = 0.8f; affine3d.m[1][1] = affine2d.m[1][1] = 1.2f; affine3d.m[2][2] = 1.1f; diff --git a/reg-test/reg_test_blockMatching.cpp b/reg-test/reg_test_blockMatching.cpp index aa66259a..2243ec2d 100644 --- a/reg-test/reg_test_blockMatching.cpp +++ b/reg-test/reg_test_blockMatching.cpp @@ -46,7 +46,7 @@ class BMTest { // Create a translation matrix to apply OFFSET voxels along each axis mat44 translationMatrix; - reg_mat44_eye(&translationMatrix); + Mat44Eye(&translationMatrix); translationMatrix.m[0][3] = -OFFSET; translationMatrix.m[1][3] = -OFFSET; translationMatrix.m[2][3] = -OFFSET; diff --git a/reg-test/reg_test_regr_lts.cpp b/reg-test/reg_test_regr_lts.cpp index 0cc60f7f..681a8ffc 100644 --- a/reg-test/reg_test_regr_lts.cpp +++ b/reg-test/reg_test_regr_lts.cpp @@ -79,8 +79,8 @@ class LtsTest { auto&& [testName, reference, floating, ttype, inlier] = data; // Create identity transformations - unique_ptr matCpu{ new mat44 }; reg_mat44_eye(matCpu.get()); - unique_ptr matCuda{ new mat44 }; reg_mat44_eye(matCuda.get()); + unique_ptr matCpu{ new mat44 }; Mat44Eye(matCpu.get()); + unique_ptr matCuda{ new mat44 }; Mat44Eye(matCuda.get()); // Create images NiftiImage referenceCpu(reference), referenceCuda(reference); diff --git a/reg-test/reg_test_voxelCentricToNodeCentric.cpp b/reg-test/reg_test_voxelCentricToNodeCentric.cpp index 551fe96d..aa42def3 100644 --- a/reg-test/reg_test_voxelCentricToNodeCentric.cpp +++ b/reg-test/reg_test_voxelCentricToNodeCentric.cpp @@ -154,19 +154,19 @@ class VoxelCentricToNodeCentricTest { if (nodeGrad->num_ext > 0 && nodeGrad->ext_list[0].edata) { mat44 temp = *(reinterpret_cast(nodeGrad->ext_list[0].edata)); temp = nifti_mat44_inverse(temp); - transformation = reg_mat44_mul(&temp, &transformation); + transformation = temp * transformation; } // Millimetre to voxel in the reference image if (voxelGrad->sform_code > 0) - transformation = reg_mat44_mul(&voxelGrad->sto_ijk, &transformation); - else transformation = reg_mat44_mul(&voxelGrad->qto_ijk, &transformation); + transformation = voxelGrad->sto_ijk * transformation; + else transformation = voxelGrad->qto_ijk * transformation; // The information has to be reoriented // Voxel to millimetre contains the orientation of the image that is used // to compute the spatial gradient (floating image) - mat33 reorientation = reg_mat44_to_mat33(voxelToMillimetre); + mat33 reorientation = Mat44ToMat33(voxelToMillimetre); if (nodeGrad->num_ext > 0 && nodeGrad->ext_list[0].edata) { - mat33 temp = reg_mat44_to_mat33(reinterpret_cast(nodeGrad->ext_list[0].edata)); + mat33 temp = Mat44ToMat33(reinterpret_cast(nodeGrad->ext_list[0].edata)); temp = nifti_mat33_inverse(temp); reorientation = nifti_mat33_mul(temp, reorientation); } @@ -189,7 +189,7 @@ class VoxelCentricToNodeCentricTest { nodeCoord[1] = static_cast(y); for (int x = 0; x < nodeGrad->nx; x++) { nodeCoord[0] = static_cast(x); - reg_mat44_mul(&transformation, nodeCoord, voxelCoord); + Mat44Mul(transformation, nodeCoord, voxelCoord); // Linear interpolation DataType basisX[2], basisY[2], basisZ[2]; const int pre[3] = { Floor(voxelCoord[0]), Floor(voxelCoord[1]), Floor(voxelCoord[2]) };