From e1ec1f4244ae5fb9cc6575eaabc44d836f428a6d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Onur=20=C3=9Clgen?= Date: Thu, 5 Oct 2023 20:53:24 +0100 Subject: [PATCH] Refactorisations --- CMakeLists.txt | 2 +- niftyreg_build_version.txt | 2 +- reg-lib/_reg_f3d.cpp | 1 + reg-lib/_reg_f3d2.cpp | 5 +- reg-lib/cpu/_reg_maths.h | 4 + reg-lib/cpu/_reg_tools.cpp | 173 +++++++++--------- reg-lib/cpu/_reg_tools.h | 16 +- reg-lib/cuda/CudaCommon.cu | 66 ++++--- ...g_test_regr_approxLinearEnergyGradient.cpp | 6 +- 9 files changed, 142 insertions(+), 133 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 67368df2..87ee07e6 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -71,7 +71,7 @@ option(BUILD_TESTING "To build the unit tests" OFF) option(USE_CUDA "To use the CUDA platform" OFF) option(USE_OPENCL "To use the OpenCL platform" OFF) option(USE_OPENMP "To use openMP for multi-CPU processing" ON) -option(USE_SSE "To enable SEE computation in some case" ON) +option(USE_SSE "To enable SSE computation in some case" ON) #----------------------------------------------------------------------------- option(USE_NRRD "To use the NRRD file format" OFF) mark_as_advanced(USE_NRRD) diff --git a/niftyreg_build_version.txt b/niftyreg_build_version.txt index 0ae9d1ef..3d9988ad 100644 --- a/niftyreg_build_version.txt +++ b/niftyreg_build_version.txt @@ -1 +1 @@ -334 +335 diff --git a/reg-lib/_reg_f3d.cpp b/reg-lib/_reg_f3d.cpp index 9c4722c0..6eedbba3 100644 --- a/reg-lib/_reg_f3d.cpp +++ b/reg-lib/_reg_f3d.cpp @@ -482,6 +482,7 @@ void reg_f3d::SetOptimiser() { template void reg_f3d::SmoothGradient() { // The gradient is smoothed using a Gaussian kernel if it is required + if (this->gradientSmoothingSigma == 0) return; this->compute->SmoothGradient(this->gradientSmoothingSigma); NR_FUNC_CALLED(); } diff --git a/reg-lib/_reg_f3d2.cpp b/reg-lib/_reg_f3d2.cpp index ea0f0d56..79317999 100644 --- a/reg-lib/_reg_f3d2.cpp +++ b/reg-lib/_reg_f3d2.cpp @@ -348,11 +348,10 @@ void reg_f3d2::GetLandmarkDistanceGradient() { /* *************************************************************** */ template void reg_f3d2::SmoothGradient() { - reg_f3d::SmoothGradient(); - // The gradient is smoothed using a Gaussian kernel if it is required + if (this->gradientSmoothingSigma == 0) return; + reg_f3d::SmoothGradient(); computeBw->SmoothGradient(this->gradientSmoothingSigma); - NR_FUNC_CALLED(); } /* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_maths.h b/reg-lib/cpu/_reg_maths.h index 6a35bd6d..93151883 100644 --- a/reg-lib/cpu/_reg_maths.h +++ b/reg-lib/cpu/_reg_maths.h @@ -56,6 +56,10 @@ DEVICE inline T Square(const T& x) { return x * x; } template +DEVICE inline T Cube(const T& x) { + return x * x * x; +} +template DEVICE inline int Floor(const T& x) { const int i = static_cast(x); return i - (x < i); diff --git a/reg-lib/cpu/_reg_tools.cpp b/reg-lib/cpu/_reg_tools.cpp index 59aa73ba..fbd7798d 100755 --- a/reg-lib/cpu/_reg_tools.cpp +++ b/reg-lib/cpu/_reg_tools.cpp @@ -833,10 +833,11 @@ void reg_tools_kernelConvolution(nifti_image *image, const float *sigma, const int& kernelType, const int *mask, - const bool *timePoint, + const bool *timePoints, const bool *axis) { if (image->nx > 2048 || image->ny > 2048 || image->nz > 2048) - NR_FATAL_ERROR("This function does not support images with dimension > 2048"); + NR_FATAL_ERROR("This function does not support images with dimensions larger than 2048"); + #ifdef WIN32 long index; const long voxelNumber = (long)NiftiImage::calcVoxelNumber(image, 3); @@ -844,37 +845,36 @@ void reg_tools_kernelConvolution(nifti_image *image, size_t index; const size_t voxelNumber = NiftiImage::calcVoxelNumber(image, 3); #endif + DataType *imagePtr = static_cast(image->data); - int imageDim[3] = { image->nx, image->ny, image->nz }; + const int imageDims[3]{ image->nx, image->ny, image->nz }; - bool *nanImagePtr = (bool*)calloc(voxelNumber, sizeof(bool)); - float *densityPtr = (float*)calloc(voxelNumber, sizeof(float)); + unique_ptr nanImagePtr{ new bool[voxelNumber]() }; + unique_ptr densityPtr{ new float[voxelNumber]() }; // Loop over the dimension higher than 3 for (int t = 0; t < image->nt * image->nu; t++) { - if (timePoint[t]) { + if (timePoints[t]) { DataType *intensityPtr = &imagePtr[t * voxelNumber]; #ifdef _OPENMP #pragma omp parallel for default(none) \ shared(densityPtr, intensityPtr, mask, nanImagePtr, voxelNumber) #endif for (index = 0; index < voxelNumber; index++) { - densityPtr[index] = intensityPtr[index] == intensityPtr[index] ? 1.f : 0; - densityPtr[index] *= mask[index] >= 0 ? 1 : 0; - nanImagePtr[index] = static_cast(densityPtr[index]); - if (nanImagePtr[index] == 0) - intensityPtr[index] = 0; + densityPtr[index] = mask[index] >= 0 && intensityPtr[index] == intensityPtr[index] ? 1.f : 0; + nanImagePtr[index] = !static_cast(densityPtr[index]); + if (nanImagePtr[index]) intensityPtr[index] = 0; } // Loop over the x, y and z dimensions for (int n = 0; n < 3; n++) { if (axis[n] && image->dim[n] > 1) { double temp; if (sigma[t] > 0) temp = sigma[t] / image->pixdim[n + 1]; // mm to voxel - else temp = fabs(sigma[t]); // voxel based if negative value + else temp = fabs(sigma[t]); // voxel-based if negative value int radius = 0; // Define the kernel size if (kernelType == MEAN_KERNEL || kernelType == LINEAR_KERNEL) { - // Mean or linear filtering + // Mean or linear filtering radius = static_cast(temp); } else if (kernelType == GAUSSIAN_KERNEL) { // Gaussian kernel @@ -895,8 +895,10 @@ void reg_tools_kernelConvolution(nifti_image *image, for (int i = -radius; i <= radius; i++) { // temp contains the kernel node spacing double relative = fabs(i / temp); - if (relative < 1.0) kernel[i + radius] = static_cast(2.0 / 3.0 - relative * relative + 0.5 * relative * relative * relative); - else if (relative < 2.0) kernel[i + radius] = static_cast(-(relative - 2.0) * (relative - 2.0) * (relative - 2.0) / 6.0); + if (relative < 1.0) + kernel[i + radius] = static_cast(2.0 / 3.0 - Square(relative) + 0.5 * Cube(relative)); + else if (relative < 2.0) + kernel[i + radius] = static_cast(-Cube(relative - 2.0) / 6.0); else kernel[i + radius] = 0; kernelSum += kernel[i + radius]; } @@ -905,7 +907,7 @@ void reg_tools_kernelConvolution(nifti_image *image, for (int i = -radius; i <= radius; i++) { // 2.506... = sqrt(2*pi) // temp contains the sigma in voxel - kernel[radius + i] = static_cast(exp(-(i * i) / (2.0 * Square(temp))) / (temp * 2.506628274631)); + kernel[radius + i] = static_cast(exp(-Square(i) / (2.0 * Square(temp))) / (temp * 2.506628274631)); kernelSum += kernel[radius + i]; } } else if (kernelType == LINEAR_KERNEL) { @@ -914,7 +916,7 @@ void reg_tools_kernelConvolution(nifti_image *image, kernel[radius + i] = 1.f - fabs(i / static_cast(radius)); kernelSum += kernel[radius + i]; } - } else if (kernelType == MEAN_KERNEL && imageDim[2] == 1) { + } else if (kernelType == MEAN_KERNEL && imageDims[2] == 1) { // Compute the mean kernel for (int i = -radius; i <= radius; i++) { kernel[radius + i] = 1.f; @@ -922,22 +924,22 @@ void reg_tools_kernelConvolution(nifti_image *image, } } // No kernel is required for the mean filtering - // No need for kernel normalisation as this is handle by the density function + // No need for kernel normalisation as this is handled by the density function NR_DEBUG("Convolution type[" << kernelType << "] dim[" << n << "] tp[" << t << "] radius[" << radius << "] kernelSum[" << kernelSum << "]"); int planeNumber, planeIndex, lineOffset; int lineIndex, shiftPre, shiftPst, k; switch (n) { case 0: - planeNumber = imageDim[1] * imageDim[2]; + planeNumber = imageDims[1] * imageDims[2]; lineOffset = 1; break; case 1: - planeNumber = imageDim[0] * imageDim[2]; - lineOffset = imageDim[0]; + planeNumber = imageDims[0] * imageDims[2]; + lineOffset = imageDims[0]; break; case 2: - planeNumber = imageDim[0] * imageDim[1]; + planeNumber = imageDims[0] * imageDims[1]; lineOffset = planeNumber; break; } @@ -949,8 +951,8 @@ void reg_tools_kernelConvolution(nifti_image *image, float *currentDensityPtr = nullptr; DataType bufferIntensity[2048]; float bufferDensity[2048]; - double bufferIntensitycur = 0; - double bufferDensitycur = 0; + double bufferIntensityCur = 0; + double bufferDensityCur = 0; #ifdef _USE_SSE union { @@ -963,31 +965,27 @@ void reg_tools_kernelConvolution(nifti_image *image, #ifdef _OPENMP #ifdef _USE_SSE #pragma omp parallel for default(none) \ - shared(imageDim, intensityPtr, densityPtr, radius, kernel, lineOffset, n, \ - planeNumber,kernelSum) \ - private(realIndex,currentIntensityPtr,currentDensityPtr,lineIndex,bufferIntensity, \ - bufferDensity,shiftPre,shiftPst,kernelPtr,kernelValue,densitySum,intensitySum, \ - k, bufferIntensitycur,bufferDensitycur, \ + shared(imageDims, intensityPtr, densityPtr, radius, kernel, lineOffset, n, planeNumber, kernelSum) \ + private(realIndex, currentIntensityPtr, currentDensityPtr, lineIndex, bufferIntensity, \ + bufferDensity, shiftPre, shiftPst, kernelPtr, kernelValue, densitySum, intensitySum, \ + k, bufferIntensityCur, bufferDensityCur, \ kernel_sse, intensity_sse, density_sse, intensity_sum_sse, density_sum_sse) #else #pragma omp parallel for default(none) \ - shared(imageDim, intensityPtr, densityPtr, radius, kernel, lineOffset, n, \ - planeNumber,kernelSum) \ - private(realIndex,currentIntensityPtr,currentDensityPtr,lineIndex,bufferIntensity, \ - bufferDensity,shiftPre,shiftPst,kernelPtr,kernelValue,densitySum,intensitySum, \ - k, bufferIntensitycur,bufferDensitycur) + shared(imageDims, intensityPtr, densityPtr, radius, kernel, lineOffset, n, planeNumber, kernelSum) \ + private(realIndex, currentIntensityPtr, currentDensityPtr, lineIndex, bufferIntensity, \ + bufferDensity, shiftPre, shiftPst, kernelPtr, kernelValue, densitySum, intensitySum, \ + k, bufferIntensityCur, bufferDensityCur) #endif #endif // _OPENMP // Loop over the different voxel for (planeIndex = 0; planeIndex < planeNumber; ++planeIndex) { switch (n) { case 0: - realIndex = planeIndex * imageDim[0]; + realIndex = planeIndex * imageDims[0]; break; case 1: - realIndex = (planeIndex / imageDim[0]) * - imageDim[0] * imageDim[1] + - planeIndex % imageDim[0]; + realIndex = (planeIndex / imageDims[0]) * imageDims[0] * imageDims[1] + planeIndex % imageDims[0]; break; case 2: realIndex = planeIndex; @@ -998,15 +996,15 @@ void reg_tools_kernelConvolution(nifti_image *image, // Fetch the current line into a stack buffer currentIntensityPtr = &intensityPtr[realIndex]; currentDensityPtr = &densityPtr[realIndex]; - for (lineIndex = 0; lineIndex < imageDim[n]; ++lineIndex) { + for (lineIndex = 0; lineIndex < imageDims[n]; ++lineIndex) { bufferIntensity[lineIndex] = *currentIntensityPtr; bufferDensity[lineIndex] = *currentDensityPtr; currentIntensityPtr += lineOffset; currentDensityPtr += lineOffset; } if (kernelSum > 0) { - // Perform the kernel convolution along 1 line - for (lineIndex = 0; lineIndex < imageDim[n]; ++lineIndex) { + // Perform the kernel convolution along one line + for (lineIndex = 0; lineIndex < imageDims[n]; ++lineIndex) { // Define the kernel boundaries shiftPre = lineIndex - radius; shiftPst = lineIndex + radius + 1; @@ -1014,7 +1012,7 @@ void reg_tools_kernelConvolution(nifti_image *image, kernelPtr = &kernel[-shiftPre]; shiftPre = 0; } else kernelPtr = &kernel[0]; - if (shiftPst > imageDim[n]) shiftPst = imageDim[n]; + if (shiftPst > imageDims[n]) shiftPst = imageDims[n]; // Set the current values to zero // Increment the current value by performing the weighted sum #ifdef _USE_SSE @@ -1066,33 +1064,32 @@ void reg_tools_kernelConvolution(nifti_image *image, } // line convolution } // kernel sum else { - for (lineIndex = 1; lineIndex < imageDim[n]; ++lineIndex) { + for (lineIndex = 1; lineIndex < imageDims[n]; ++lineIndex) { bufferIntensity[lineIndex] += bufferIntensity[lineIndex - 1]; bufferDensity[lineIndex] += bufferDensity[lineIndex - 1]; } shiftPre = -radius - 1; shiftPst = radius; - for (lineIndex = 0; lineIndex < imageDim[n]; ++lineIndex, ++shiftPre, ++shiftPst) { + for (lineIndex = 0; lineIndex < imageDims[n]; ++lineIndex, ++shiftPre, ++shiftPst) { if (shiftPre > -1) { - if (shiftPst < imageDim[n]) { - bufferIntensitycur = bufferIntensity[shiftPre] - bufferIntensity[shiftPst]; - bufferDensitycur = bufferDensity[shiftPre] - bufferDensity[shiftPst]; + if (shiftPst < imageDims[n]) { + bufferIntensityCur = bufferIntensity[shiftPre] - bufferIntensity[shiftPst]; + bufferDensityCur = bufferDensity[shiftPre] - bufferDensity[shiftPst]; } else { - bufferIntensitycur = bufferIntensity[shiftPre] - bufferIntensity[imageDim[n] - 1]; - bufferDensitycur = bufferDensity[shiftPre] - bufferDensity[imageDim[n] - 1]; + bufferIntensityCur = bufferIntensity[shiftPre] - bufferIntensity[imageDims[n] - 1]; + bufferDensityCur = bufferDensity[shiftPre] - bufferDensity[imageDims[n] - 1]; } } else { - if (shiftPst < imageDim[n]) { - bufferIntensitycur = -bufferIntensity[shiftPst]; - bufferDensitycur = -bufferDensity[shiftPst]; + if (shiftPst < imageDims[n]) { + bufferIntensityCur = -bufferIntensity[shiftPst]; + bufferDensityCur = -bufferDensity[shiftPst]; } else { - bufferIntensitycur = 0; - bufferDensitycur = 0; + bufferIntensityCur = 0; + bufferDensityCur = 0; } } - intensityPtr[realIndex] = static_cast(bufferIntensitycur); - densityPtr[realIndex] = static_cast(bufferDensitycur); - + intensityPtr[realIndex] = static_cast(bufferIntensityCur); + densityPtr[realIndex] = static_cast(bufferDensityCur); realIndex += lineOffset; } // line convolution of mean filter } // No kernel computation @@ -1106,14 +1103,12 @@ void reg_tools_kernelConvolution(nifti_image *image, shared(voxelNumber, intensityPtr, densityPtr, nanImagePtr) #endif for (index = 0; index < voxelNumber; ++index) { - if (nanImagePtr[index] != 0) - intensityPtr[index] = static_cast((float)intensityPtr[index] / densityPtr[index]); - else intensityPtr[index] = std::numeric_limits::quiet_NaN(); + if (nanImagePtr[index]) + intensityPtr[index] = std::numeric_limits::quiet_NaN(); + else intensityPtr[index] = static_cast(intensityPtr[index] / densityPtr[index]); } } // check if the time point is active } // loop over the time points - free(nanImagePtr); - free(densityPtr); } /* *************************************************************** */ template @@ -1122,7 +1117,7 @@ void reg_tools_labelKernelConvolution_core(nifti_image *image, float varianceY, float varianceZ, int *mask, - bool *timePoint) { + bool *timePoints) { if (image->nx > 2048 || image->ny > 2048 || image->nz > 2048) NR_FATAL_ERROR("This function does not support images with dimension > 2048"); #ifdef WIN32 @@ -1134,13 +1129,13 @@ void reg_tools_labelKernelConvolution_core(nifti_image *image, #endif DataType *imagePtr = static_cast(image->data); - const int activeTimePointNumber = image->nt * image->nu; - bool *activeTimePoint = (bool*)calloc(activeTimePointNumber, sizeof(bool)); + const int activeTimePointCount = image->nt * image->nu; + bool *activeTimePoints = (bool*)calloc(activeTimePointCount, sizeof(bool)); // Check if input time points and masks are nullptr - if (timePoint == nullptr) { + if (timePoints == nullptr) { // All time points are considered as active - for (int i = 0; i < activeTimePointNumber; i++) activeTimePoint[i] = true; - } else for (int i = 0; i < activeTimePointNumber; i++) activeTimePoint[i] = timePoint[i]; + for (int i = 0; i < activeTimePointCount; i++) activeTimePoints[i] = true; + } else for (int i = 0; i < activeTimePointCount; i++) activeTimePoints[i] = timePoints[i]; int *currentMask = nullptr; if (mask == nullptr) { @@ -1156,8 +1151,8 @@ void reg_tools_labelKernelConvolution_core(nifti_image *image, typedef typename std::map::iterator DataPointMapIt; // Loop over the dimension higher than 3 - for (int t = 0; t < activeTimePointNumber; t++) { - if (activeTimePoint[t]) { + for (int t = 0; t < activeTimePointCount; t++) { + if (activeTimePoints[t]) { DataType *intensityPtr = &imagePtr[t * voxelNumber]; for (index = 0; index < voxelNumber; index++) { nanImagePtr[index] = (intensityPtr[index] == intensityPtr[index]) ? true : false; @@ -1268,7 +1263,7 @@ void reg_tools_labelKernelConvolution_core(nifti_image *image, free(tmpImagePtr); free(currentMask); - free(activeTimePoint); + free(activeTimePoints); free(nanImagePtr); } /* *************************************************************** */ @@ -1277,31 +1272,31 @@ void reg_tools_labelKernelConvolution(nifti_image *image, float varianceY, float varianceZ, int *mask, - bool *timePoint) { + bool *timePoints) { switch (image->datatype) { case NIFTI_TYPE_UINT8: - reg_tools_labelKernelConvolution_core(image, varianceX, varianceY, varianceZ, mask, timePoint); + reg_tools_labelKernelConvolution_core(image, varianceX, varianceY, varianceZ, mask, timePoints); break; case NIFTI_TYPE_INT8: - reg_tools_labelKernelConvolution_core(image, varianceX, varianceY, varianceZ, mask, timePoint); + reg_tools_labelKernelConvolution_core(image, varianceX, varianceY, varianceZ, mask, timePoints); break; case NIFTI_TYPE_UINT16: - reg_tools_labelKernelConvolution_core(image, varianceX, varianceY, varianceZ, mask, timePoint); + reg_tools_labelKernelConvolution_core(image, varianceX, varianceY, varianceZ, mask, timePoints); break; case NIFTI_TYPE_INT16: - reg_tools_labelKernelConvolution_core(image, varianceX, varianceY, varianceZ, mask, timePoint); + reg_tools_labelKernelConvolution_core(image, varianceX, varianceY, varianceZ, mask, timePoints); break; case NIFTI_TYPE_UINT32: - reg_tools_labelKernelConvolution_core(image, varianceX, varianceY, varianceZ, mask, timePoint); + reg_tools_labelKernelConvolution_core(image, varianceX, varianceY, varianceZ, mask, timePoints); break; case NIFTI_TYPE_INT32: - reg_tools_labelKernelConvolution_core(image, varianceX, varianceY, varianceZ, mask, timePoint); + reg_tools_labelKernelConvolution_core(image, varianceX, varianceY, varianceZ, mask, timePoints); break; case NIFTI_TYPE_FLOAT32: - reg_tools_labelKernelConvolution_core(image, varianceX, varianceY, varianceZ, mask, timePoint); + reg_tools_labelKernelConvolution_core(image, varianceX, varianceY, varianceZ, mask, timePoints); break; case NIFTI_TYPE_FLOAT64: - reg_tools_labelKernelConvolution_core(image, varianceX, varianceY, varianceZ, mask, timePoint); + reg_tools_labelKernelConvolution_core(image, varianceX, varianceY, varianceZ, mask, timePoints); break; default: NR_FATAL_ERROR("The image data type is not supported"); @@ -1312,7 +1307,7 @@ void reg_tools_kernelConvolution(nifti_image *image, const float *sigma, const int& kernelType, const int *mask, - const bool *timePoint, + const bool *timePoints, const bool *axis) { if (image->datatype != NIFTI_TYPE_FLOAT32 && image->datatype != NIFTI_TYPE_FLOAT64) NR_FATAL_ERROR("The image is expected to be of floating precision type"); @@ -1320,18 +1315,18 @@ void reg_tools_kernelConvolution(nifti_image *image, if (image->nt <= 0) image->nt = image->dim[4] = 1; if (image->nu <= 0) image->nu = image->dim[5] = 1; - unique_ptr axisToSmooth{ new bool[3] }; + bool axisToSmooth[3]; if (axis == nullptr) { // All axis are smoothed by default - for (int i = 0; i < 3; i++) axisToSmooth[i] = true; + axisToSmooth[0] = axisToSmooth[1] = axisToSmooth[2] = true; } else for (int i = 0; i < 3; i++) axisToSmooth[i] = axis[i]; - const int activeTimePointNumber = image->nt * image->nu; - unique_ptr activeTimePoint{ new bool[activeTimePointNumber] }; - if (timePoint == nullptr) { + const int activeTimePointCount = image->nt * image->nu; + unique_ptr activeTimePoints{ new bool[activeTimePointCount] }; + if (timePoints == nullptr) { // All time points are considered as active - for (int i = 0; i < activeTimePointNumber; i++) activeTimePoint[i] = true; - } else for (int i = 0; i < activeTimePointNumber; i++) activeTimePoint[i] = timePoint[i]; + for (int i = 0; i < activeTimePointCount; i++) activeTimePoints[i] = true; + } else for (int i = 0; i < activeTimePointCount; i++) activeTimePoints[i] = timePoints[i]; unique_ptr currentMask; if (!mask) { @@ -1341,7 +1336,7 @@ void reg_tools_kernelConvolution(nifti_image *image, std::visit([&](auto&& imgDataType) { using ImgDataType = std::decay_t; - reg_tools_kernelConvolution(image, sigma, kernelType, mask, activeTimePoint.get(), axisToSmooth.get()); + reg_tools_kernelConvolution(image, sigma, kernelType, mask, activeTimePoints.get(), axisToSmooth); }, NiftiImage::getFloatingDataType(image)); } /* *************************************************************** */ diff --git a/reg-lib/cpu/_reg_tools.h b/reg-lib/cpu/_reg_tools.h index d776017f..d392d9c1 100755 --- a/reg-lib/cpu/_reg_tools.h +++ b/reg-lib/cpu/_reg_tools.h @@ -81,10 +81,14 @@ void reg_tools_removeSCLInfo(nifti_image *img); void reg_getRealImageSpacing(nifti_image *image, float *spacingValues); /* *************************************************************** */ -/** @brief Smooth an image using a Gaussian kernel +/** @brief Smooth an image using a specified kernel * @param image Image to be smoothed - * @param sigma Standard deviation of the Gaussian kernel - * to use. The kernel is bounded between +/- 3 sigma. + * @param sigma Standard deviation of the kernel to use. + * The kernel is bounded between +/- 3 sigma. + * @param kernelType Type of kernel to use. + * @param mask An integer mask over which the smoothing should occur. + * @param timePoints Boolean array to specify which time points have to be + * smoothed. The array follow the dim array of the nifti header. * @param axis Boolean array to specify which axis have to be * smoothed. The array follow the dim array of the nifti header. */ @@ -100,8 +104,8 @@ void reg_tools_kernelConvolution(nifti_image *image, * @param varianceX The variance of the Gaussian kernel in X * @param varianceY The variance of the Gaussian kernel in Y * @param varianceZ The variance of the Gaussian kernel in Z - * @param mask An integer mask over which the Gaussian smoothing should occur - * @param timePoint Boolean array to specify which timepoints have to be + * @param mask An integer mask over which the Gaussian smoothing should occur. + * @param timePoints Boolean array to specify which time points have to be * smoothed. */ void reg_tools_labelKernelConvolution(nifti_image *image, @@ -109,7 +113,7 @@ void reg_tools_labelKernelConvolution(nifti_image *image, float varianceY, float varianceZ, int *mask = nullptr, - bool *timePoint = nullptr); + bool *timePoints = nullptr); /* *************************************************************** */ /** @brief Downsample an image by a ratio of two * @param image Image to be downsampled diff --git a/reg-lib/cuda/CudaCommon.cu b/reg-lib/cuda/CudaCommon.cu index 387dabad..27804dcb 100644 --- a/reg-lib/cuda/CudaCommon.cu +++ b/reg-lib/cuda/CudaCommon.cu @@ -82,22 +82,23 @@ void TransferNiftiToDevice(cudaArray *arrayCuda, const nifti_image *img) { template void TransferNiftiToDevice(cudaArray *arrayCuda, const nifti_image *img) { if (sizeof(DataType) == sizeof(float4)) { - if (img->datatype != NIFTI_TYPE_FLOAT32 || img->dim[5] < 2 || img->dim[4] > 1) - NR_FATAL_ERROR("The specified image is not a single precision deformation field image"); + if (img->datatype != NIFTI_TYPE_FLOAT32) + NR_FATAL_ERROR("The specified image is not a single precision image"); const float *niftiImgValues = static_cast(img->data); const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3); + const auto timePointCount = img->dim[4] * img->dim[5]; unique_ptr array(new float4[voxelNumber]()); for (size_t i = 0; i < voxelNumber; i++) array[i].x = *niftiImgValues++; - if (img->dim[5] >= 2) { + if (timePointCount >= 2) { for (size_t i = 0; i < voxelNumber; i++) array[i].y = *niftiImgValues++; } - if (img->dim[5] >= 3) { + if (timePointCount >= 3) { for (size_t i = 0; i < voxelNumber; i++) array[i].z = *niftiImgValues++; } - if (img->dim[5] >= 4) { + if (timePointCount >= 4) { for (size_t i = 0; i < voxelNumber; i++) array[i].w = *niftiImgValues++; } @@ -153,29 +154,30 @@ void TransferNiftiToDevice(cudaArray *array1Cuda, cudaArray *array2Cuda, const n template void TransferNiftiToDevice(cudaArray *array1Cuda, cudaArray *array2Cuda, const nifti_image *img) { if (sizeof(DataType) == sizeof(float4)) { - if (img->datatype != NIFTI_TYPE_FLOAT32 || img->dim[5] < 2 || img->dim[4] > 1) - NR_FATAL_ERROR("The specified image is not a single precision deformation field image"); + if (img->datatype != NIFTI_TYPE_FLOAT32) + NR_FATAL_ERROR("The specified image is not a single precision image"); const float *niftiImgValues = static_cast(img->data); const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3); + const auto timePointCount = img->dim[4] * img->dim[5]; unique_ptr array1(new float4[voxelNumber]()); unique_ptr array2(new float4[voxelNumber]()); for (size_t i = 0; i < voxelNumber; i++) array1[i].x = *niftiImgValues++; for (size_t i = 0; i < voxelNumber; i++) array2[i].x = *niftiImgValues++; - if (img->dim[5] >= 2) { + if (timePointCount >= 2) { for (size_t i = 0; i < voxelNumber; i++) array1[i].y = *niftiImgValues++; for (size_t i = 0; i < voxelNumber; i++) array2[i].y = *niftiImgValues++; } - if (img->dim[5] >= 3) { + if (timePointCount >= 3) { for (size_t i = 0; i < voxelNumber; i++) array1[i].z = *niftiImgValues++; for (size_t i = 0; i < voxelNumber; i++) array2[i].z = *niftiImgValues++; } - if (img->dim[5] >= 4) { + if (timePointCount >= 4) { for (size_t i = 0; i < voxelNumber; i++) array1[i].w = *niftiImgValues++; for (size_t i = 0; i < voxelNumber; i++) @@ -223,22 +225,23 @@ void TransferNiftiToDevice(DataType *arrayCuda, const nifti_image *img) { template void TransferNiftiToDevice(DataType *arrayCuda, const nifti_image *img) { if (sizeof(DataType) == sizeof(float4)) { - if (img->datatype != NIFTI_TYPE_FLOAT32 || img->dim[5] < 2 || img->dim[4] > 1) - NR_FATAL_ERROR("The specified image is not a single precision deformation field image"); + if (img->datatype != NIFTI_TYPE_FLOAT32) + NR_FATAL_ERROR("The specified image is not a single precision image"); const float *niftiImgValues = static_cast(img->data); const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3); + const auto timePointCount = img->dim[4] * img->dim[5]; unique_ptr array(new float4[voxelNumber]()); for (size_t i = 0; i < voxelNumber; i++) array[i].x = *niftiImgValues++; - if (img->dim[5] >= 2) { + if (timePointCount >= 2) { for (size_t i = 0; i < voxelNumber; i++) array[i].y = *niftiImgValues++; } - if (img->dim[5] >= 3) { + if (timePointCount >= 3) { for (size_t i = 0; i < voxelNumber; i++) array[i].z = *niftiImgValues++; } - if (img->dim[5] >= 4) { + if (timePointCount >= 4) { for (size_t i = 0; i < voxelNumber; i++) array[i].w = *niftiImgValues++; } @@ -273,29 +276,30 @@ void TransferNiftiToDevice(DataType *array1Cuda, DataType *array2Cuda, const nif template void TransferNiftiToDevice(DataType *array1Cuda, DataType *array2Cuda, const nifti_image *img) { if (sizeof(DataType) == sizeof(float4)) { - if (img->datatype != NIFTI_TYPE_FLOAT32 || img->dim[5] < 2 || img->dim[4] > 1) - NR_FATAL_ERROR("The specified image is not a single precision deformation field image"); + if (img->datatype != NIFTI_TYPE_FLOAT32) + NR_FATAL_ERROR("The specified image is not a single precision image"); const float *niftiImgValues = static_cast(img->data); const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3); + const auto timePointCount = img->dim[4] * img->dim[5]; unique_ptr array1(new float4[voxelNumber]()); unique_ptr array2(new float4[voxelNumber]()); for (size_t i = 0; i < voxelNumber; i++) array1[i].x = *niftiImgValues++; for (size_t i = 0; i < voxelNumber; i++) array2[i].x = *niftiImgValues++; - if (img->dim[5] >= 2) { + if (timePointCount >= 2) { for (size_t i = 0; i < voxelNumber; i++) array1[i].y = *niftiImgValues++; for (size_t i = 0; i < voxelNumber; i++) array2[i].y = *niftiImgValues++; } - if (img->dim[5] >= 3) { + if (timePointCount >= 3) { for (size_t i = 0; i < voxelNumber; i++) array1[i].z = *niftiImgValues++; for (size_t i = 0; i < voxelNumber; i++) array2[i].z = *niftiImgValues++; } - if (img->dim[5] >= 4) { + if (timePointCount >= 4) { for (size_t i = 0; i < voxelNumber; i++) array1[i].w = *niftiImgValues++; for (size_t i = 0; i < voxelNumber; i++) @@ -350,23 +354,24 @@ template void TransferFromDeviceToNifti(nifti_image *img, const DataType *arrayCuda) { if (sizeof(DataType) == sizeof(float4)) { // A nifti 5D volume is expected - if (img->dim[0] < 5 || img->dim[4]>1 || img->dim[5] < 2 || img->datatype != NIFTI_TYPE_FLOAT32) - NR_FATAL_ERROR("The nifti image is not a 5D volume"); + if (img->datatype != NIFTI_TYPE_FLOAT32) + NR_FATAL_ERROR("The specified image is not a single precision image"); const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3); + const auto timePointCount = img->dim[4] * img->dim[5]; thrust::device_ptr arrayCudaPtr(reinterpret_cast(arrayCuda)); const thrust::host_vector array(arrayCudaPtr, arrayCudaPtr + voxelNumber); float *niftiImgValues = static_cast(img->data); for (size_t i = 0; i < voxelNumber; i++) *niftiImgValues++ = array[i].x; - if (img->dim[5] >= 2) { + if (timePointCount >= 2) { for (size_t i = 0; i < voxelNumber; i++) *niftiImgValues++ = array[i].y; } - if (img->dim[5] >= 3) { + if (timePointCount >= 3) { for (size_t i = 0; i < voxelNumber; i++) *niftiImgValues++ = array[i].z; } - if (img->dim[5] >= 4) { + if (timePointCount >= 4) { for (size_t i = 0; i < voxelNumber; i++) *niftiImgValues++ = array[i].w; } @@ -399,9 +404,10 @@ template void TransferFromDeviceToNifti(nifti_image *img, const DataType *array1Cuda, const DataType *array2Cuda) { if (sizeof(DataType) == sizeof(float4)) { // A nifti 5D volume is expected - if (img->dim[0] < 5 || img->dim[4]>1 || img->dim[5] < 2 || img->datatype != NIFTI_TYPE_FLOAT32) - NR_FATAL_ERROR("The nifti image is not a 5D volume"); + if (img->datatype != NIFTI_TYPE_FLOAT32) + NR_FATAL_ERROR("The specified image is not a single precision image"); const size_t voxelNumber = NiftiImage::calcVoxelNumber(img, 3); + const auto timePointCount = img->dim[4] * img->dim[5]; thrust::device_ptr array1CudaPtr(reinterpret_cast(array1Cuda)); thrust::device_ptr array2CudaPtr(reinterpret_cast(array2Cuda)); const thrust::host_vector array1(array1CudaPtr, array1CudaPtr + voxelNumber); @@ -411,19 +417,19 @@ void TransferFromDeviceToNifti(nifti_image *img, const DataType *array1Cuda, con *niftiImgValues++ = array1[i].x; for (size_t i = 0; i < voxelNumber; i++) *niftiImgValues++ = array2[i].x; - if (img->dim[5] >= 2) { + if (timePointCount >= 2) { for (size_t i = 0; i < voxelNumber; i++) *niftiImgValues++ = array1[i].y; for (size_t i = 0; i < voxelNumber; i++) *niftiImgValues++ = array2[i].y; } - if (img->dim[5] >= 3) { + if (timePointCount >= 3) { for (size_t i = 0; i < voxelNumber; i++) *niftiImgValues++ = array1[i].z; for (size_t i = 0; i < voxelNumber; i++) *niftiImgValues++ = array2[i].z; } - if (img->dim[5] >= 4) { + if (timePointCount >= 4) { for (size_t i = 0; i < voxelNumber; i++) *niftiImgValues++ = array1[i].w; for (size_t i = 0; i < voxelNumber; i++) diff --git a/reg-test/reg_test_regr_approxLinearEnergyGradient.cpp b/reg-test/reg_test_regr_approxLinearEnergyGradient.cpp index d0fb7543..8d982112 100644 --- a/reg-test/reg_test_regr_approxLinearEnergyGradient.cpp +++ b/reg-test/reg_test_regr_approxLinearEnergyGradient.cpp @@ -7,7 +7,7 @@ * to ensure the CPU and CUDA versions yield the same output **/ -class ApproxLinearEnergyGradient { +class ApproxLinearEnergyGradientTest { protected: using TestData = std::tuple; using TestCase = std::tuple; @@ -15,7 +15,7 @@ class ApproxLinearEnergyGradient { inline static vector testCases; public: - ApproxLinearEnergyGradient() { + ApproxLinearEnergyGradientTest() { if (!testCases.empty()) return; @@ -124,7 +124,7 @@ class ApproxLinearEnergyGradient { } }; -TEST_CASE_METHOD(ApproxLinearEnergyGradient, "Regression Approximate Linear Energy Gradient", "[regression]") { +TEST_CASE_METHOD(ApproxLinearEnergyGradientTest, "Regression Approximate Linear Energy Gradient", "[regression]") { // Loop over all generated test cases for (auto&& testCase : testCases) { // Retrieve test information