From 255b4c3ffe81e3d5efd4ebc7b40a36dedd512574 Mon Sep 17 00:00:00 2001 From: Duong Le Date: Sun, 14 Apr 2024 17:58:58 +0000 Subject: [PATCH 01/29] Add Pad Reflection implementation and test --- include/miopen/miopen.h | 27 +++ src/CMakeLists.txt | 5 + src/include/miopen/pad_reflection.hpp | 45 +++++ .../miopen/pad_reflection/invoke_params.hpp | 53 +++++ .../pad_reflection/problem_description.hpp | 89 +++++++++ src/include/miopen/pad_reflection/solvers.hpp | 55 ++++++ src/kernels/MIOpenPadReflection.cpp | 146 ++++++++++++++ src/pad_reflection.cpp | 67 +++++++ src/pad_reflection/problem_description.cpp | 55 ++++++ src/pad_reflection_api.cpp | 54 +++++ src/solver/pad_reflection/pad_reflection.cpp | 176 +++++++++++++++++ test/cpu_pad_reflection.hpp | 91 +++++++++ test/gtest/pad_reflection.cpp | 65 ++++++ test/gtest/pad_reflection.hpp | 186 ++++++++++++++++++ 14 files changed, 1114 insertions(+) create mode 100644 src/include/miopen/pad_reflection.hpp create mode 100644 src/include/miopen/pad_reflection/invoke_params.hpp create mode 100644 src/include/miopen/pad_reflection/problem_description.hpp create mode 100644 src/include/miopen/pad_reflection/solvers.hpp create mode 100644 src/kernels/MIOpenPadReflection.cpp create mode 100644 src/pad_reflection.cpp create mode 100644 src/pad_reflection/problem_description.cpp create mode 100644 src/pad_reflection_api.cpp create mode 100644 src/solver/pad_reflection/pad_reflection.cpp create mode 100644 test/cpu_pad_reflection.hpp create mode 100644 test/gtest/pad_reflection.cpp create mode 100644 test/gtest/pad_reflection.hpp diff --git a/include/miopen/miopen.h b/include/miopen/miopen.h index e768c7b349..bf69585717 100644 --- a/include/miopen/miopen.h +++ b/include/miopen/miopen.h @@ -5795,6 +5795,33 @@ MIOPEN_EXPORT miopenStatus_t miopenSumForward(miopenHandle_t handle, #ifdef MIOPEN_BETA_API + +/*! @brief Execute a sum forward layer + * + * @param handle MIOpen handle (input) + * @param nanPropagation Nan number propagation mode (input) + * @param workspace Address of the allocated workspace data (input) + * @param workspaceSizeInBytes Size in bytes of the allocated workspace data (input) + * @param xDesc Tensor descriptor for data input tensor x (input) + * @param x Data tensor x (input) + * @param dim Dimensions to sum. (input) + * @param yDesc Tensor descriptor for output data tensor y (input) + * @param y Data tensor y (output) + * @return miopenStatus_t + */ +MIOPEN_EXPORT miopenStatus_t miopenPadReflection(miopenHandle_t handle, + const miopenTensorDescriptor_t xDesc, + const void* x, + const miopenTensorDescriptor_t yDesc, + void* y, + const int * padding); + +/** @} */ +// CLOSEOUT SUM DOXYGEN GROUP +#endif + +#ifdef MIOPEN_BETA_API + /*! @ingroup argmax * @brief Find the index of the maximum value of a tensor across dimensions. * diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 1c7f8f7a8e..9550c878e1 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -152,6 +152,7 @@ set( MIOpen_Source reducetensor.cpp reducetensor_api.cpp reduce/problem_description.cpp + pad_reflection/problem_description.cpp rnn.cpp rnn_api.cpp rnn/rnn_util.cpp @@ -269,10 +270,12 @@ set( MIOpen_Source solver/pooling/backwardNd.cpp solver/reduce/forward_argmax.cpp solver/reduce/forward_sum.cpp + solver/pad_reflection/pad_reflection.cpp solver/softmax/attn_softmax.cpp solver/softmax/softmax.cpp subbuffers.cpp sum_api.cpp + pad_reflection_api.cpp target_properties.cpp temp_file.cpp tensor.cpp @@ -468,6 +471,7 @@ if( MIOPEN_BACKEND MATCHES "OpenCL" OR MIOPEN_BACKEND STREQUAL "HIPOC" OR MIOPEN kernels/MIOpenSoftmax.cl kernels/MIOpenSoftmaxAttn.cpp kernels/MIOpenSum.cpp + kernels/MIOpenPadReflection.cpp kernels/MIOpenUtilKernels3.cl kernels/MIOpenUtilKernels4.cl kernels/MIOpenUtilKernels5.cl @@ -602,6 +606,7 @@ if( MIOPEN_BACKEND MATCHES "OpenCL" OR MIOPEN_BACKEND STREQUAL "HIPOC" OR MIOPEN ocl/fusionopconvocl.cpp ocl/fusionopbiasbnactivocl.cpp sum.cpp + pad_reflection.cpp ${PROJECT_BINARY_DIR}/db_path.cpp ) diff --git a/src/include/miopen/pad_reflection.hpp b/src/include/miopen/pad_reflection.hpp new file mode 100644 index 0000000000..5fa22935ae --- /dev/null +++ b/src/include/miopen/pad_reflection.hpp @@ -0,0 +1,45 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ +#ifndef MIOPEN_SUM_HPP_ +#define MIOPEN_SUM_HPP_ + +#include + +namespace miopen { + +struct Handle; +struct TensorDescriptor; + +miopenStatus_t PadReflection(Handle& handle, + const TensorDescriptor& xDesc, + ConstData_t x, + const TensorDescriptor& yDesc, + Data_t y, + const int* padding + ); + +} // namespace miopen +#endif // _MIOPEN_SUM_HPP_ diff --git a/src/include/miopen/pad_reflection/invoke_params.hpp b/src/include/miopen/pad_reflection/invoke_params.hpp new file mode 100644 index 0000000000..db2b434d06 --- /dev/null +++ b/src/include/miopen/pad_reflection/invoke_params.hpp @@ -0,0 +1,53 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ + +#pragma once + +#include +#include + +namespace miopen { +namespace pad_reflection { + +struct InvokeParams : public miopen::InvokeParams +{ + InvokeParams() = default; + + const TensorDescriptor* xDesc = nullptr; + const TensorDescriptor* yDesc = nullptr; + + ConstData_t x = nullptr; + Data_t y = nullptr; + + const int* padding = nullptr; + + std::size_t GetWorkspaceSize() const { return 0; } + Data_t GetWorkspace() const { return nullptr; } +}; + +} // namespace pad_reflection + +} // namespace miopen diff --git a/src/include/miopen/pad_reflection/problem_description.hpp b/src/include/miopen/pad_reflection/problem_description.hpp new file mode 100644 index 0000000000..87c6521017 --- /dev/null +++ b/src/include/miopen/pad_reflection/problem_description.hpp @@ -0,0 +1,89 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ +#pragma once + +#include +#include +#include +#include +#include + +namespace miopen { + +struct NetworkConfig; + +namespace pad_reflection { + +struct ProblemDescription : ProblemDescriptionBase +{ + ProblemDescription(const TensorDescriptor& xDesc_, + const TensorDescriptor& yDesc_) + : xDesc(xDesc_), yDesc(yDesc_) + { + } + + const TensorDescriptor& GetXDesc() const { return xDesc; } + const TensorDescriptor& GetYDesc() const { return yDesc; } + + + bool IsSameType() const + { + if(xDesc.GetType() != yDesc.GetType()) + { +#if MIOPEN_BUILD_DEV || !MIOPEN_NDEBUG + MIOPEN_THROW(miopenStatusBadParm, "Pad Reflection: Tensor types do not match."); +#else + return false; +#endif + } + return true; + } + + bool IsAllPacked() const + { + if(!(xDesc.IsPacked() && yDesc.IsPacked())) + { +#if MIOPEN_BUILD_DEV || !MIOPEN_NDEBUG + MIOPEN_THROW(miopenStatusBadParm, "Pad Reflection: Unpacked tensors not supported."); +#else + return false; +#endif + } + return true; + } + + + + NetworkConfig MakeNetworkConfig() const override; + +private: + TensorDescriptor xDesc; + TensorDescriptor yDesc; +}; + +} // namespace pad_reflection + +} // namespace miopen diff --git a/src/include/miopen/pad_reflection/solvers.hpp b/src/include/miopen/pad_reflection/solvers.hpp new file mode 100644 index 0000000000..3becfd36b1 --- /dev/null +++ b/src/include/miopen/pad_reflection/solvers.hpp @@ -0,0 +1,55 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ +#pragma once + +#include +#include +#include + +namespace miopen { + +namespace solver { + +namespace pad_reflection { + +using PadReflectionSolver = NonTunableSolverBase; + +struct PadReflection final : PadReflectionSolver +{ + const std::string& SolverDbId() const override { return GetSolverDbId(); } + + bool IsApplicable(const ExecutionContext& context, + const miopen::pad_reflection::ProblemDescription& problem) const override; + ConvSolution GetSolution(const ExecutionContext& context, + const miopen::pad_reflection::ProblemDescription& problem) const override; + bool MayNeedWorkspace() const override { return true; } +}; + +} // namespace pad_reflection + +} // namespace solver + +} // namespace miopen diff --git a/src/kernels/MIOpenPadReflection.cpp b/src/kernels/MIOpenPadReflection.cpp new file mode 100644 index 0000000000..11175485c3 --- /dev/null +++ b/src/kernels/MIOpenPadReflection.cpp @@ -0,0 +1,146 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ +#include +#ifndef MIOPEN_DONT_USE_HIP_RUNTIME_HEADERS +#include +#include +#endif + +#include "float_types.h" + +// extern "C" __global__ void PadReflection2dFwdContiguous(const FLOAT* __restrict__ input, +// FLOAT* __restrict__ output, +// uint64_t output_size, +// long padding_l, long padding_t, +// const size_t * input_tv_size, +// const size_t * output_tv_size, +// const size_t * input_tv_stride +// ) +// { +// const size_t gid = threadIdx.x + blockIdx.x * blockDim.x; +// if(gid >= output_size) +// return; + +// // gid to output n, c, h, w +// size_t in_H = input_tv_size[2]; +// size_t in_W = input_tv_size[3]; + +// long n, c, h, w; +// // GET_NCHW(n, c, h, w, gid, output); +// ulong nch = (gid) / output_tv_size[3]; +// w = (gid) % output_tv_size[3]; +// ulong nc = nch / output_tv_size[2]; +// h = nch % output_tv_size[2]; +// n = nc / output_tv_size[1]; +// c = nc % output_tv_size[1]; + +// long in_start_x = max(0L, -padding_l); +// long in_start_y = max(0L, -padding_t); +// long out_start_x = max(0L, padding_l); +// long out_start_y = max(0L, padding_t); + +// if (w < padding_l) { +// w = padding_l * 2 - w; +// } else if (padding_l <= w && w < in_W + padding_l) { +// w = w; +// } else { +// w = (in_W + padding_l - 1) * 2 - w; +// } +// w = w - out_start_x + in_start_x; + +// if (h < padding_t) { +// h = padding_t * 2 - h; +// } else if (padding_t <= h && h < in_H + padding_t) { +// h = h; +// } else { +// h = (in_H + padding_t - 1) * 2 - h; +// } +// h = h - out_start_y + in_start_y; + +// output[gid] = input[(input_tv_stride[3] * (w)) + +// (input_tv_stride[2] * (h)) + +// (input_tv_stride[1] * (c)) + +// (input_tv_stride[0] * (n)) + +// 0]; +// } + +extern "C" __global__ void PadReflection2dFwdContiguous(const FLOAT* __restrict__ input, + FLOAT* __restrict__ output, + uint64_t output_size, + long padding_l, long padding_t, + const size_t in_H, + const size_t in_W, + const size_t output_size_1, + const size_t output_size_2, + const size_t output_size_3, + const size_t input_stride_0, + const size_t input_stride_1, + const size_t input_stride_2, + const size_t input_stride_3 + ) +{ + const size_t gid = threadIdx.x + blockIdx.x * blockDim.x; + if(gid >= output_size) + return; + + long n, c, h, w; + // GET_NCHW(n, c, h, w, gid, output); + ulong nch = (gid) / output_size_3; + w = (gid) % output_size_3; + ulong nc = nch / output_size_2; + h = nch % output_size_2; + n = nc / output_size_1; + c = nc % output_size_1; + + long in_start_x = max(0L, -padding_l); + long in_start_y = max(0L, -padding_t); + long out_start_x = max(0L, padding_l); + long out_start_y = max(0L, padding_t); + + if (w < padding_l) { + w = padding_l * 2 - w; + } else if (padding_l <= w && w < in_W + padding_l) { + w = w; + } else { + w = (in_W + padding_l - 1) * 2 - w; + } + w = w - out_start_x + in_start_x; + + if (h < padding_t) { + h = padding_t * 2 - h; + } else if (padding_t <= h && h < in_H + padding_t) { + h = h; + } else { + h = (in_H + padding_t - 1) * 2 - h; + } + h = h - out_start_y + in_start_y; + + output[gid] = input[(input_stride_3 * (w)) + + (input_stride_2 * (h)) + + (input_stride_1 * (c)) + + (input_stride_0 * (n)) + + 0]; +} \ No newline at end of file diff --git a/src/pad_reflection.cpp b/src/pad_reflection.cpp new file mode 100644 index 0000000000..2f2a2b4241 --- /dev/null +++ b/src/pad_reflection.cpp @@ -0,0 +1,67 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace miopen { + +miopenStatus_t PadReflection(Handle& handle, + const TensorDescriptor& xDesc, + ConstData_t x, + const TensorDescriptor& yDesc, + Data_t y, + const int* padding + ) +{ + const auto problem = pad_reflection::ProblemDescription{xDesc, yDesc}; + + const auto invoke_params = [&]() { + auto tmp = pad_reflection::InvokeParams{}; + tmp.type = InvokeType::Run; + tmp.xDesc = &xDesc; + tmp.yDesc = &yDesc; + tmp.x = x; + tmp.y = y; + tmp.padding = padding; + return tmp; + }(); + + const auto algo = AlgorithmName{"PadReflection"}; + const auto solvers = solver::SolverContainer{}; + + solvers.ExecutePrimitive(handle, problem, algo, invoke_params); + + return miopenStatusSuccess; +} + +} // namespace miopen diff --git a/src/pad_reflection/problem_description.cpp b/src/pad_reflection/problem_description.cpp new file mode 100644 index 0000000000..e0b3545854 --- /dev/null +++ b/src/pad_reflection/problem_description.cpp @@ -0,0 +1,55 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ + +#include +#include + +#include + +namespace miopen { + +namespace pad_reflection { + +NetworkConfig ProblemDescription::MakeNetworkConfig() const +{ + auto xlength = xDesc.GetLengths(); + auto ylength = yDesc.GetLengths(); + + auto output_size = std::accumulate( + ylength.begin(), ylength.end(), static_cast(1), std::multiplies()); + auto dtype = xDesc.GetType(); + + std::ostringstream ss; + + ss << "dtype" << dtype; + ss << "output_size" << output_size; + + return NetworkConfig{ss.str()}; +} + +} // namespace pad_reflection + +} // namespace miopen diff --git a/src/pad_reflection_api.cpp b/src/pad_reflection_api.cpp new file mode 100644 index 0000000000..26c99c25ea --- /dev/null +++ b/src/pad_reflection_api.cpp @@ -0,0 +1,54 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ + +#include +#include +#include +#include +#include + + +extern "C" miopenStatus_t miopenPadReflection(miopenHandle_t handle, + const miopenTensorDescriptor_t xDesc, + const void* x, + const miopenTensorDescriptor_t yDesc, + void* y, + const int * padding + ) +{ + MIOPEN_LOG_FUNCTION( + handle, xDesc, x, yDesc, y, padding); + + return miopen::try_([&] { + miopen::PadReflection(miopen::deref(handle), + miopen::deref(xDesc), + DataCast(x), + miopen::deref(yDesc), + DataCast(y), + padding + ); + }); +} diff --git a/src/solver/pad_reflection/pad_reflection.cpp b/src/solver/pad_reflection/pad_reflection.cpp new file mode 100644 index 0000000000..b73aa4e7b7 --- /dev/null +++ b/src/solver/pad_reflection/pad_reflection.cpp @@ -0,0 +1,176 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include + +#define LOCAL_SIZE 256 + +namespace miopen { + +namespace solver { + +namespace pad_reflection { + +bool PadReflection::IsApplicable(const ExecutionContext& context, + const miopen::pad_reflection::ProblemDescription& problem) const +{ + return true; +} + +ConvSolution +PadReflection::GetSolution(const ExecutionContext& context, + const miopen::pad_reflection::ProblemDescription& problem) const +{ + auto result = ConvSolution{miopenStatusSuccess}; + + auto dtype = problem.GetXDesc().GetType(); + auto xdims = problem.GetXDesc().GetLengths(); + auto ydims = problem.GetYDesc().GetLengths(); + + auto kernel = KernelInfo{}; + + kernel.kernel_file = "MIOpenPadReflection.cpp"; + kernel.kernel_name = "PadReflection2dFwdContiguous"; + auto output_numel = + std::accumulate(ydims.begin(), ydims.end(), 1ULL, std::multiplies()); + + const auto build_params = KernelBuildParameters{ + {"MIOPEN_USE_FP16", static_cast(dtype == miopenHalf)}, + {"MIOPEN_USE_FP32", static_cast(dtype == miopenFloat)}, + {"MIOPEN_USE_FP64", static_cast(dtype == miopenDouble)}, + {"MIOPEN_USE_BFP16", static_cast(dtype == miopenBFloat16)}, + }; + + kernel.comp_options = build_params.GenerateFor(kbp::HIP{}); + + size_t xlocalsize = LOCAL_SIZE; + size_t xgridsize = AlignUp(output_numel, xlocalsize); + size_t ylocalsize = 1; + size_t ygridsize = 1; + size_t zlocalsize = 1; + size_t zgridsize = 1; + kernel.l_wk.push_back(xlocalsize); + kernel.l_wk.push_back(ylocalsize); + kernel.l_wk.push_back(zlocalsize); + + kernel.g_wk.push_back(xgridsize); + kernel.g_wk.push_back(ygridsize); + kernel.g_wk.push_back(zgridsize); + + result.construction_params.push_back(kernel); + + result.invoker_factory = [](const std::vector& kernels) { + return [=](const Handle& handle_, const AnyInvokeParams& raw_params) { + decltype(auto) kernel = handle_.Run(kernels[0]); + decltype(auto) params = raw_params.CastTo(); + + auto xdims = params.xDesc->GetLengths(); + auto ydims = params.yDesc->GetLengths(); + + auto xstrides = params.xDesc->GetStrides(); + + auto output_size = + std::accumulate(ydims.begin(), ydims.end(), 1ULL, std::multiplies()); + + // const size_t* xdims_data; + // const size_t* ydims_data; + // const size_t* xstrides_data; + + // hipMalloc(&xdims_data, sizeof(size_t) * xdims.size()); + // hipMalloc(&ydims_data, sizeof(size_t) * ydims.size()); + // hipMalloc(&xstrides_data, sizeof(size_t) * xstrides.size()); + + // hipMemcpy((void*)xdims_data, + // xdims.data(), + // sizeof(size_t) * xdims.size(), + // hipMemcpyHostToDevice); + // hipMemcpy((void*)ydims_data, + // ydims.data(), + // sizeof(size_t) * ydims.size(), + // hipMemcpyHostToDevice); + // hipMemcpy((void*)xstrides_data, + // xstrides.data(), + // sizeof(size_t) * xstrides.size(), + // hipMemcpyHostToDevice); + + // long padding_l = params.padding[0]; + // long padding_t = params.padding[2]; + // kernel(params.x, + // params.y, + // output_size, + // padding_l, + // padding_t, + // xdims_data, + // ydims_data, + // xstrides_data); + + // hipFree((void*)xdims_data); + // hipFree((void*)ydims_data); + // hipFree((void*)xstrides_data); + + + long padding_l = params.padding[0]; + long padding_t = params.padding[2]; + size_t in_H = xdims[2]; + size_t in_W = xdims[3]; + size_t output_size_1 = ydims[1]; + size_t output_size_2 = ydims[2]; + size_t output_size_3 = ydims[3]; + size_t input_stride_0 = xstrides[0]; + size_t input_stride_1 = xstrides[1]; + size_t input_stride_2 = xstrides[2]; + size_t input_stride_3 = xstrides[3]; + kernel(params.x, + params.y, + output_size, + padding_l, + padding_t, + in_H, + in_W, + output_size_1, + output_size_2, + output_size_3, + input_stride_0, + input_stride_1, + input_stride_2, + input_stride_3); + }; + }; + + return result; +} + +} // namespace pad_reflection + +} // namespace solver + +} // namespace miopen diff --git a/test/cpu_pad_reflection.hpp b/test/cpu_pad_reflection.hpp new file mode 100644 index 0000000000..a64069a5c6 --- /dev/null +++ b/test/cpu_pad_reflection.hpp @@ -0,0 +1,91 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ +#ifndef GUARD_CPU_SUM_HPP +#define GUARD_CPU_SUM_HPP + +#include "tensor_holder.hpp" + +template +void cpu_pad_reflection(tensor input_tensor, + tensor& ref_output_tensor, + const int * padding_gpu + ) +{ + long padding_l = padding_gpu[0]; + long padding_t = padding_gpu[2]; + auto input_dims = input_tensor.desc.GetLengths(); + auto output_dims = ref_output_tensor.desc.GetLengths(); + auto input = input_tensor.data.data(); + auto output = ref_output_tensor.data.data(); + auto input_strides = input_tensor.desc.GetStrides(); + auto output_size = + std::accumulate(output_dims.begin(), output_dims.end(), 1L, std::multiplies()); + + size_t in_H = input_dims[2]; + size_t in_W = input_dims[3]; + + for (size_t gid = 0; gid < output_size; ++gid) { + + long n, c, h, w; + // GET_NCHW(n, c, h, w, gid, output); + ulong nch = (gid) / output_dims[3]; + w = (gid) % output_dims[3]; + ulong nc = nch / output_dims[2]; + h = nch % output_dims[2]; + n = nc / output_dims[1]; + c = nc % output_dims[1]; + + long in_start_x = max(0L, -padding_l); + long in_start_y = max(0L, -padding_t); + long out_start_x = max(0L, padding_l); + long out_start_y = max(0L, padding_t); + + if (w < padding_l) { + w = padding_l * 2 - w; + } else if (padding_l <= w && w < in_W + padding_l) { + w = w; + } else { + w = (in_W + padding_l - 1) * 2 - w; + } + w = w - out_start_x + in_start_x; + + if (h < padding_t) { + h = padding_t * 2 - h; + } else if (padding_t <= h && h < in_H + padding_t) { + h = h; + } else { + h = (in_H + padding_t - 1) * 2 - h; + } + h = h - out_start_y + in_start_y; + + output[gid] = input[(input_strides[3] * (w)) + + (input_strides[2] * (h)) + + (input_strides[1] * (c)) + + (input_strides[0] * (n)) + + 0]; + } +} +#endif diff --git a/test/gtest/pad_reflection.cpp b/test/gtest/pad_reflection.cpp new file mode 100644 index 0000000000..90f09eee41 --- /dev/null +++ b/test/gtest/pad_reflection.cpp @@ -0,0 +1,65 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ + +#include "pad_reflection.hpp" +#include + +MIOPEN_DECLARE_ENV_VAR_STR(MIOPEN_TEST_FLOAT_ARG) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_TEST_ALL) + +namespace pad_reflection { + +std::string GetFloatArg() +{ + const auto& tmp = miopen::GetStringEnv(ENV(MIOPEN_TEST_FLOAT_ARG)); + if(tmp.empty()) + { + return ""; + } + return tmp; +} + +struct PadReflectionTestFloat : PadReflectionTest +{ +}; + +} // namespace sum_duong +using namespace pad_reflection; + +TEST_P(PadReflectionTestFloat, SumDuongTestFw) +{ + if(miopen::IsEnabled(ENV(MIOPEN_TEST_ALL)) && (GetFloatArg() == "--float")) + { + RunTest(); + Verify(); + } + else + { + GTEST_SKIP(); + } +}; + +INSTANTIATE_TEST_SUITE_P(SumDuongTestSet, PadReflectionTestFloat, testing::ValuesIn(PadReflectionTestFloatConfigs())); diff --git a/test/gtest/pad_reflection.hpp b/test/gtest/pad_reflection.hpp new file mode 100644 index 0000000000..9dcc60a4db --- /dev/null +++ b/test/gtest/pad_reflection.hpp @@ -0,0 +1,186 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ + +#include "../driver/tensor_driver.hpp" +#include "cpu_pad_reflection.hpp" +#include "get_handle.hpp" +#include "random.hpp" +#include "tensor_holder.hpp" +#include "verify.hpp" +#include +#include +#include + +struct PadReflectionCase +{ + size_t N; + size_t C; + size_t D; + size_t H; + size_t W; + int padding[4]; + friend std::ostream& operator<<(std::ostream& os, const PadReflectionCase& tc) + { + return os << " N:" << tc.N << " C:" << tc.C << " D:" << tc.D << " H:" << tc.H + << " W:" << tc.W; + } + + std::vector GetInput() + { + if((N != 0) && (C != 0) && (D != 0) && (H != 0) && (W != 0)) + { + return std::vector({N, C, D, H, W}); + } + else if((N != 0) && (C != 0) && (H != 0) && (W != 0)) + { + return std::vector({N, C, H, W}); + } + else if((N != 0) && (C != 0) && (W != 0)) + { + return std::vector({N, C, W}); + } + else if((N != 0) && (W != 0)) + { + return std::vector({N, W}); + } + else if((N != 0)) + { + return std::vector({N}); + } + else + { + std::cout << "Error Input Tensor Lengths\n" << std::endl; + return std::vector({0}); + } + } + + int * GetPadding() { + return padding; + } +}; + +std::vector PadReflectionTestFloatConfigs() +{ // n c d h w dim nanPropagation + // clang-format off + return { + // { 8, 120, 0, 0, 1}, //bart + // { 8, 120, 0, 0, 1}, + // { 8, 1023, 0, 0, 1}, //gpt_neo + // { 8, 1024, 0, 0, 768}, + // { 8, 1023, 0, 0, 1}, + // { 8, 1024, 0, 0, 768}, + // { 16, 1024, 0, 0, 768}, //gpt2 + // { 16, 1024, 0, 0, 768}, + { 1, 1, 0, 3, 3, {2, 2, 2, 2}}, + { 48, 8, 0, 512, 512, {1, 1, 1, 1}}, + { 48, 8, 0, 512, 512, {1, 1, 3, 3}}, + { 48, 8, 0, 512, 512, {0, 0, 2, 2}}, + { 16, 311, 0, 98, 512, {1, 1, 1, 1}}, + { 16, 311, 0, 98, 512, {1, 1, 3, 3}}, + { 16, 311, 0, 98, 512, {0, 0, 2, 2}}, + }; + // clang-format on +} + +template +struct PadReflectionTest : public ::testing::TestWithParam +{ +protected: + void SetUp() override + { + auto&& handle = get_handle(); + pad_reflection_config = GetParam(); + auto gen_value = [](auto...) { return prng::gen_descreet_uniform_sign(1e-2, 100); }; + + auto in_dims = pad_reflection_config.GetInput(); + auto padding = pad_reflection_config.GetPadding(); + input = tensor{in_dims}.generate(gen_value); + std::vector out_dims; + + for(int i = 0; i < in_dims.size(); i++) + { + if(i == 2) + { + out_dims.push_back(in_dims[i] + 2*padding[2]); + } else if (i == 3) { + out_dims.push_back(in_dims[i] + 2*padding[0]); + } + else { + out_dims.push_back(in_dims[i]); + } + } + + output = tensor{out_dims}; + std::fill(output.begin(), output.end(), std::numeric_limits::quiet_NaN()); + + ref_output = tensor{out_dims}; + std::fill(ref_output.begin(), ref_output.end(), std::numeric_limits::quiet_NaN()); + + input_dev = handle.Write(input.data); + output_dev = handle.Write(output.data); + } + void RunTest() + { + auto&& handle = get_handle(); + auto padding = pad_reflection_config.GetPadding(); + + cpu_pad_reflection(input, ref_output, padding); + miopenStatus_t status; + const int * padding_gpu; + hipMalloc(&padding_gpu, sizeof(int) * 4); + hipMemcpy((void*)padding_gpu, padding, sizeof(int) * 4, hipMemcpyHostToDevice); + + status = miopen::PadReflection(handle, + input.desc, + input_dev.get(), + output.desc, + output_dev.get(), + padding_gpu + ); + + EXPECT_EQ(status, miopenStatusSuccess); + + output.data = handle.Read(output_dev, output.data.size()); + } + + void Verify() + { + for(int i = 0; i < output.data.size() - 1; ++i) + { + EXPECT_EQ(output.data[i], ref_output.data[i]); + } + } + PadReflectionCase pad_reflection_config; + + tensor input; + tensor output; + + tensor ref_output; + + miopen::Allocator::ManageDataPtr input_dev; + miopen::Allocator::ManageDataPtr output_dev; + +}; From ef7e9a5ec5f74250850bfc80ac2e3079eea42c65 Mon Sep 17 00:00:00 2001 From: Duong Le Date: Sun, 14 Apr 2024 18:17:23 +0000 Subject: [PATCH 02/29] Change some define variable --- src/include/miopen/pad_reflection.hpp | 6 +++--- test/cpu_pad_reflection.hpp | 4 ++-- test/gtest/pad_reflection.cpp | 6 +++--- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/include/miopen/pad_reflection.hpp b/src/include/miopen/pad_reflection.hpp index 5fa22935ae..bbbcc8a1f8 100644 --- a/src/include/miopen/pad_reflection.hpp +++ b/src/include/miopen/pad_reflection.hpp @@ -23,8 +23,8 @@ * SOFTWARE. * *******************************************************************************/ -#ifndef MIOPEN_SUM_HPP_ -#define MIOPEN_SUM_HPP_ +#ifndef MIOPEN_PAD_REFLECTION_HPP_ +#define MIOPEN_PAD_REFLECTION_HPP_ #include @@ -42,4 +42,4 @@ miopenStatus_t PadReflection(Handle& handle, ); } // namespace miopen -#endif // _MIOPEN_SUM_HPP_ +#endif // MIOPEN_PAD_REFLECTION_HPP_ diff --git a/test/cpu_pad_reflection.hpp b/test/cpu_pad_reflection.hpp index a64069a5c6..0fd774ec71 100644 --- a/test/cpu_pad_reflection.hpp +++ b/test/cpu_pad_reflection.hpp @@ -23,8 +23,8 @@ * SOFTWARE. * *******************************************************************************/ -#ifndef GUARD_CPU_SUM_HPP -#define GUARD_CPU_SUM_HPP +#ifndef GUARD_CPU_PAD_REFLECTION_HPP +#define GUARD_CPU_PAD_REFLECTION_HPP #include "tensor_holder.hpp" diff --git a/test/gtest/pad_reflection.cpp b/test/gtest/pad_reflection.cpp index 90f09eee41..b4986b94ae 100644 --- a/test/gtest/pad_reflection.cpp +++ b/test/gtest/pad_reflection.cpp @@ -46,10 +46,10 @@ struct PadReflectionTestFloat : PadReflectionTest { }; -} // namespace sum_duong +} // namespace pad_reflection using namespace pad_reflection; -TEST_P(PadReflectionTestFloat, SumDuongTestFw) +TEST_P(PadReflectionTestFloat, PadReflectionFw) { if(miopen::IsEnabled(ENV(MIOPEN_TEST_ALL)) && (GetFloatArg() == "--float")) { @@ -62,4 +62,4 @@ TEST_P(PadReflectionTestFloat, SumDuongTestFw) } }; -INSTANTIATE_TEST_SUITE_P(SumDuongTestSet, PadReflectionTestFloat, testing::ValuesIn(PadReflectionTestFloatConfigs())); +INSTANTIATE_TEST_SUITE_P(PadReflectionTestSet, PadReflectionTestFloat, testing::ValuesIn(PadReflectionTestFloatConfigs())); From c70a5ce445a99b52584465b2ee4b8dbd95161293 Mon Sep 17 00:00:00 2001 From: Duong Le Date: Mon, 15 Apr 2024 09:32:38 +0000 Subject: [PATCH 03/29] Change padding to std::vector, update kernel new format, add MIOpenDriver for pad_reflection --- driver/CMakeLists.txt | 1 + driver/dm_pad_reflection.cpp | 40 ++ driver/driver.hpp | 5 +- driver/pad_reflection_driver.hpp | 394 ++++++++++++++++++ include/miopen/miopen.h | 3 +- src/include/miopen/pad_reflection.hpp | 2 +- .../miopen/pad_reflection/invoke_params.hpp | 2 +- .../pad_reflection/problem_description.hpp | 8 +- src/kernels/MIOpenPadReflection.cpp | 160 ++++--- src/pad_reflection.cpp | 6 +- src/pad_reflection_api.cpp | 2 +- src/solver/pad_reflection/pad_reflection.cpp | 106 ++--- test/cpu_pad_reflection.hpp | 6 +- test/gtest/pad_reflection.hpp | 16 +- 14 files changed, 571 insertions(+), 180 deletions(-) create mode 100644 driver/dm_pad_reflection.cpp create mode 100644 driver/pad_reflection_driver.hpp diff --git a/driver/CMakeLists.txt b/driver/CMakeLists.txt index 224e550fed..17a6c20628 100644 --- a/driver/CMakeLists.txt +++ b/driver/CMakeLists.txt @@ -51,6 +51,7 @@ add_executable(MIOpenDriver dm_softmax.cpp dm_sum.cpp dm_tensorop.cpp + dm_pad_reflection.cpp main.cpp registry_driver_maker.cpp rocrand_wrapper.cpp) diff --git a/driver/dm_pad_reflection.cpp b/driver/dm_pad_reflection.cpp new file mode 100644 index 0000000000..295634d38c --- /dev/null +++ b/driver/dm_pad_reflection.cpp @@ -0,0 +1,40 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ +#include "registry_driver_maker.hpp" +#include "pad_reflection_driver.hpp" + +static Driver* makeDriver(const std::string& base_arg) +{ + if(base_arg == "padreflection") + return new PadReflectionDriver(); + if(base_arg == "padreflectionfp16") + return new PadReflectionDriver(); + if(base_arg == "padreflectionbfp16") + return new PadReflectionDriver(); + return nullptr; +} + +REGISTER_DRIVER_MAKER(makeDriver); diff --git a/driver/driver.hpp b/driver/driver.hpp index 4cfc2b544e..01eea79570 100644 --- a/driver/driver.hpp +++ b/driver/driver.hpp @@ -151,7 +151,7 @@ inline void PadBufferSize(size_t& sz, int datatype_sz) "pool[fp16], lrn[fp16], " "activ[fp16], softmax[fp16], bnorm[fp16], rnn[fp16], gemm[fp16], ctc, dropout[fp16], " "tensorop[fp16], reduce[fp16|fp64], layernorm[bfp16|fp16], sum[bfp16|fp16], " - "argmax[bfp16|fp16], groupnorm[bfp16|fp16], cat[bfp16|fp16]\n"); + "argmax[bfp16|fp16], groupnorm[bfp16|fp16], cat[bfp16|fp16], padreflection[bfp16|fp16]\n"); exit(0); // NOLINT (concurrency-mt-unsafe) } @@ -176,7 +176,8 @@ inline std::string ParseBaseArg(int argc, char* argv[]) arg != "layernormfp16" && arg != "layernormbfp16" && arg != "sum" && arg != "sumfp16" && arg != "sumbfp16" && arg != "argmax" && arg != "argmaxfp16" && arg != "argmaxbfp16" && arg != "groupnorm" && arg != "groupnormfp16" && arg != "groupnormbfp16" && arg != "cat" && - arg != "catfp16" && arg != "catbfp16" && arg != "--version") + arg != "catfp16" && arg != "catbfp16" && arg != "padreflection" && arg != "padreflectionfp16" && + arg != "padreflectionbfp16" && arg != "--version") { printf("FAILED: Invalid Base Input Argument\n"); Usage(); diff --git a/driver/pad_reflection_driver.hpp b/driver/pad_reflection_driver.hpp new file mode 100644 index 0000000000..7cba644381 --- /dev/null +++ b/driver/pad_reflection_driver.hpp @@ -0,0 +1,394 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ +#ifndef GUARD_MIOPEN_PAD_REFLECTION_DRIVER_HPP +#define GUARD_MIOPEN_PAD_REFLECTION_DRIVER_HPP + +#include "InputFlags.hpp" +#include "driver.hpp" +#include "tensor_driver.hpp" +#include "timer.hpp" +#include "random.hpp" +#include +#include +#include +#include +#include +#include +#include +#include +#include <../test/tensor_holder.hpp> +#include <../test/verify.hpp> + +#ifndef MLO_PADREFLECTIONHOST_H_ +#define MLO_PADREFLECTIONHOST_H_ + +template +void mloPadReflectionRunHost(miopenTensorDescriptor_t inputDesc, + miopenTensorDescriptor_t outputDesc, + Tgpu* input, + Tcheck* outputhost, + std::vector padding) +{ + auto input_dims = miopen::deref(inputDesc).GetLengths(); + auto output_dims = miopen::deref(outputDesc).GetLengths(); + + auto output_numel = + std::accumulate(output_dims.begin(), output_dims.end(), 1L, std::multiplies()); + + long padding_l = padding[0]; + long padding_t = padding[2]; + auto input_strides = miopen::deref(inputDesc).GetStrides(); + size_t in_H = input_dims[2]; + size_t in_W = input_dims[3]; + + for(size_t gid = 0; gid < output_numel; gid++) + { + long n, c, h, w; + // GET_NCHW(n, c, h, w, gid, output); + ulong nch = (gid) / output_dims[3]; + w = (gid) % output_dims[3]; + ulong nc = nch / output_dims[2]; + h = nch % output_dims[2]; + n = nc / output_dims[1]; + c = nc % output_dims[1]; + + long in_start_x = max(0L, -padding_l); + long in_start_y = max(0L, -padding_t); + long out_start_x = max(0L, padding_l); + long out_start_y = max(0L, padding_t); + + if (w < padding_l) { + w = padding_l * 2 - w; + } else if (padding_l <= w && w < in_W + padding_l) { + w = w; + } else { + w = (in_W + padding_l - 1) * 2 - w; + } + w = w - out_start_x + in_start_x; + + if (h < padding_t) { + h = padding_t * 2 - h; + } else if (padding_t <= h && h < in_H + padding_t) { + h = h; + } else { + h = (in_H + padding_t - 1) * 2 - h; + } + h = h - out_start_y + in_start_y; + + outputhost[gid] = input[(input_strides[3] * (w)) + + (input_strides[2] * (h)) + + (input_strides[1] * (c)) + + (input_strides[0] * (n)) + + 0]; + } +} +#endif + +template +class PadReflectionDriver : public Driver +{ +public: + PadReflectionDriver() : Driver() + { + miopenCreateTensorDescriptor(&inputDesc); + miopenCreateTensorDescriptor(&outputDesc); + + data_type = miopen_type{}; + } + + int AddCmdLineArgs() override; + int ParseCmdLineArgs(int argc, char* argv[]) override; + InputFlags& GetInputFlags() override { return inflags; } + + int GetandSetData() override; + std::vector GetInputTensorLengthsFromCmdLine(); + + int AllocateBuffersAndCopy() override; + + int RunForwardGPU() override; + int RunForwardCPU(); + + int RunBackwardGPU() override; + + Tref GetTolerance(); + int VerifyBackward() override; + int VerifyForward() override; + ~PadReflectionDriver() override + { + miopenDestroyTensorDescriptor(inputDesc); + miopenDestroyTensorDescriptor(outputDesc); + } + +private: + InputFlags inflags; + + int forw; + + miopenTensorDescriptor_t inputDesc; + miopenTensorDescriptor_t outputDesc; + + std::unique_ptr in_dev; + std::unique_ptr out_dev; + + std::vector in; + std::vector out; + std::vector outhost; + + std::vector padding; +}; + +template +int PadReflectionDriver::ParseCmdLineArgs(int argc, char* argv[]) +{ + inflags.Parse(argc, argv); + + if(inflags.GetValueInt("time") == 1) + { + miopenEnableProfiling(GetHandle(), true); + } + return miopenStatusSuccess; +} + +template +int PadReflectionDriver::GetandSetData() +{ + std::string padding_str = inflags.GetValueStr("padding"); + std::stringstream ss(padding_str); + std::string token; + while (std::getline(ss, token, ',')) { + padding.push_back(std::stoul(token)); + } + + std::vector in_len = GetInputTensorLengthsFromCmdLine(); + + SetTensorNd(inputDesc, in_len, data_type); + + std::vector out_len; + + for(int i = 0; i < in_len.size(); i++) + { + //If H + if(i == 2) + { + out_len.push_back(in_len[i] + 2*padding[2]); + } + //If W + else if(i == 3) + { + out_len.push_back(in_len[i] + 2*padding[0]); + } + else { + out_len.push_back(in_len[i]); + } + } + + if(out_len.empty()) + out_len.push_back(1); + + SetTensorNd(outputDesc, out_len, data_type); + + return 0; +} + +template +int PadReflectionDriver::AddCmdLineArgs() +{ + inflags.AddInputFlag("forw", 'F', "1", "Run only Forward Pad Reflection (Default=1)", "int"); + inflags.AddInputFlag("batchsize", 'n', "256", "Mini-batch size (Default=100)", "int"); + inflags.AddInputFlag("in_channels", 'c', "4", "Number of Input Channels (Default=3)", "int"); + inflags.AddInputFlag("in_d", 'D', "0", "Input Depth (Default=0)", "int"); + inflags.AddInputFlag("in_h", 'H', "32", "Input Height (Default=32)", "int"); + inflags.AddInputFlag("in_w", 'W', "32", "Input Width (Default=32)", "int"); + + inflags.AddInputFlag("padding", 'P', "1,1,1,1", "Padding array (Default=1,1,1,1)", "str"); + + inflags.AddInputFlag("iter", 'i', "10", "Number of Iterations (Default=10)", "int"); + inflags.AddInputFlag("verify", 'V', "1", "Verify Each Layer (Default=1)", "int"); + inflags.AddInputFlag("time", 't', "0", "Time Each Layer (Default=0)", "int"); + inflags.AddInputFlag( + "wall", 'w', "0", "Wall-clock Time Each Layer, Requires time == 1 (Default=0)", "int"); + + return miopenStatusSuccess; +} + +template +std::vector PadReflectionDriver::GetInputTensorLengthsFromCmdLine() +{ + int in_n = inflags.GetValueInt("batchsize"); + int in_c = inflags.GetValueInt("in_channels"); + int in_w = inflags.GetValueInt("in_w"); + int in_h = inflags.GetValueInt("in_h"); + int in_d = inflags.GetValueInt("in_d"); + + if((in_n != 0) && (in_c != 0) && (in_d != 0) && (in_h != 0) && (in_w != 0)) + { + return std::vector({in_n, in_c, in_d, in_h, in_w}); + } + else if((in_n != 0) && (in_c != 0) && (in_h != 0) && (in_w != 0)) + { + return std::vector({in_n, in_c, in_h, in_w}); + } + else if((in_n != 0) && (in_c != 0) && (in_w != 0)) + { + return std::vector({in_n, in_c, in_w}); + } + else if((in_n != 0) && (in_w != 0)) + { + return std::vector({in_n, in_w}); + } + else if(in_n != 0) + { + return std::vector({in_n}); + } + else + { + std::cerr << "Error Input Tensor Lengths\n" << std::endl; + return std::vector({0}); + } +} + +template +int PadReflectionDriver::AllocateBuffersAndCopy() +{ + size_t in_sz = GetTensorSize(inputDesc); + size_t out_sz = GetTensorSize(outputDesc); + + uint32_t ctx = 0; + + in_dev = std::unique_ptr(new GPUMem(ctx, in_sz, sizeof(Tgpu))); + out_dev = std::unique_ptr(new GPUMem(ctx, out_sz, sizeof(Tgpu))); + + in = std::vector(in_sz, static_cast(0)); + out = std::vector(out_sz, static_cast(0)); + outhost = std::vector(out_sz, static_cast(0)); + + for(int i = 0; i < in_sz; i++) + { + in[i] = prng::gen_A_to_B(static_cast(0.0), static_cast(1.0)); + } + + if(in_dev->ToGPU(GetStream(), in.data()) != 0) + std::cerr << "Error copying (in) to GPU, size: " << in_dev->GetSize() << std::endl; + + if(out_dev->ToGPU(GetStream(), out.data()) != 0) + std::cerr << "Error copying (out) to GPU, size: " << out_dev->GetSize() << std::endl; + + return miopenStatusSuccess; +} + +template +int PadReflectionDriver::RunForwardGPU() +{ + float kernel_total_time = 0; + float kernel_first_time = 0; + + Timer t; + START_TIME + + for(int i = 0; i < inflags.GetValueInt("iter"); i++) + { + miopenPadReflection(GetHandle(), + inputDesc, + in_dev->GetMem(), + outputDesc, + out_dev->GetMem(), + padding); + + float time = 0.0; + miopenGetKernelTime(GetHandle(), &time); + kernel_total_time += time; + if(i == 0) + kernel_first_time = time; + } + + if(inflags.GetValueInt("time") == 1) + { + STOP_TIME + int iter = inflags.GetValueInt("iter"); + if(WALL_CLOCK) + std::cout << "Wall-clock Time Forward Pad Reflection Elapsed: " << t.gettime_ms() / iter + << " ms\n"; + + float kernel_average_time = + iter > 1 ? (kernel_total_time - kernel_first_time) / (iter - 1) : kernel_first_time; + std::cout << "GPU Kernel Time Forward Pad Reflection Elapsed: " << kernel_average_time << " ms\n"; + } + + if(out_dev->FromGPU(GetStream(), out.data()) != 0) + std::cerr << "Error copying (out_dev) from GPU, size: " << out_dev->GetSize() << std::endl; + + return miopenStatusSuccess; +} + +template +int PadReflectionDriver::RunForwardCPU() +{ + mloPadReflectionRunHost( + inputDesc, outputDesc, in.data(), outhost.data(), padding); + + return miopenStatusSuccess; +} + +template +int PadReflectionDriver::RunBackwardGPU() +{ + return miopenStatusSuccess; +} + +template +Tref PadReflectionDriver::GetTolerance() +{ + return 0; +} + +template +int PadReflectionDriver::VerifyForward() +{ + RunForwardCPU(); + const Tref tolerance = GetTolerance(); + auto error = miopen::rms_range(outhost, out); + + if(!std::isfinite(error) || error > 0) + { + std::cout << "Pad Reflection FAILED: " << error << " > " << tolerance << std::endl; + return EC_VerifyFwd; + } + else + { + std::cout << "Pad Reflection Verifies OK on CPU reference (" << error << " == " << tolerance + << ')' << std::endl; + } + + return miopenStatusSuccess; +} + +template +int PadReflectionDriver::VerifyBackward() +{ + return miopenStatusSuccess; +} + +#endif // GUARD_MIOPEN_PAD_REFLECTION_DRIVER_HPP diff --git a/include/miopen/miopen.h b/include/miopen/miopen.h index bf69585717..7965edb26b 100644 --- a/include/miopen/miopen.h +++ b/include/miopen/miopen.h @@ -26,6 +26,7 @@ #ifndef MIOPEN_GUARD_MIOPEN_H_ #define MIOPEN_GUARD_MIOPEN_H_ +#include #ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wextern-c-compat" @@ -5814,7 +5815,7 @@ MIOPEN_EXPORT miopenStatus_t miopenPadReflection(miopenHandle_t handle, const void* x, const miopenTensorDescriptor_t yDesc, void* y, - const int * padding); + const std::vector padding); /** @} */ // CLOSEOUT SUM DOXYGEN GROUP diff --git a/src/include/miopen/pad_reflection.hpp b/src/include/miopen/pad_reflection.hpp index bbbcc8a1f8..9b08a6eeaf 100644 --- a/src/include/miopen/pad_reflection.hpp +++ b/src/include/miopen/pad_reflection.hpp @@ -38,7 +38,7 @@ miopenStatus_t PadReflection(Handle& handle, ConstData_t x, const TensorDescriptor& yDesc, Data_t y, - const int* padding + const std::vector padding ); } // namespace miopen diff --git a/src/include/miopen/pad_reflection/invoke_params.hpp b/src/include/miopen/pad_reflection/invoke_params.hpp index db2b434d06..afa9999d8d 100644 --- a/src/include/miopen/pad_reflection/invoke_params.hpp +++ b/src/include/miopen/pad_reflection/invoke_params.hpp @@ -42,7 +42,7 @@ struct InvokeParams : public miopen::InvokeParams ConstData_t x = nullptr; Data_t y = nullptr; - const int* padding = nullptr; + const std::vector* padding = nullptr; std::size_t GetWorkspaceSize() const { return 0; } Data_t GetWorkspace() const { return nullptr; } diff --git a/src/include/miopen/pad_reflection/problem_description.hpp b/src/include/miopen/pad_reflection/problem_description.hpp index 87c6521017..3ffa4a4284 100644 --- a/src/include/miopen/pad_reflection/problem_description.hpp +++ b/src/include/miopen/pad_reflection/problem_description.hpp @@ -40,13 +40,16 @@ namespace pad_reflection { struct ProblemDescription : ProblemDescriptionBase { ProblemDescription(const TensorDescriptor& xDesc_, - const TensorDescriptor& yDesc_) - : xDesc(xDesc_), yDesc(yDesc_) + const TensorDescriptor& yDesc_, + const std::vector padding_ + ) + : xDesc(xDesc_), yDesc(yDesc_), padding(padding_) { } const TensorDescriptor& GetXDesc() const { return xDesc; } const TensorDescriptor& GetYDesc() const { return yDesc; } + const std::vector& GetPadding() const { return padding; } bool IsSameType() const @@ -82,6 +85,7 @@ struct ProblemDescription : ProblemDescriptionBase private: TensorDescriptor xDesc; TensorDescriptor yDesc; + std::vector padding; }; } // namespace pad_reflection diff --git a/src/kernels/MIOpenPadReflection.cpp b/src/kernels/MIOpenPadReflection.cpp index 11175485c3..2d360d45f2 100644 --- a/src/kernels/MIOpenPadReflection.cpp +++ b/src/kernels/MIOpenPadReflection.cpp @@ -31,116 +31,98 @@ #include "float_types.h" -// extern "C" __global__ void PadReflection2dFwdContiguous(const FLOAT* __restrict__ input, -// FLOAT* __restrict__ output, -// uint64_t output_size, -// long padding_l, long padding_t, -// const size_t * input_tv_size, -// const size_t * output_tv_size, -// const size_t * input_tv_stride -// ) -// { -// const size_t gid = threadIdx.x + blockIdx.x * blockDim.x; -// if(gid >= output_size) -// return; - -// // gid to output n, c, h, w -// size_t in_H = input_tv_size[2]; -// size_t in_W = input_tv_size[3]; - -// long n, c, h, w; -// // GET_NCHW(n, c, h, w, gid, output); -// ulong nch = (gid) / output_tv_size[3]; -// w = (gid) % output_tv_size[3]; -// ulong nc = nch / output_tv_size[2]; -// h = nch % output_tv_size[2]; -// n = nc / output_tv_size[1]; -// c = nc % output_tv_size[1]; - -// long in_start_x = max(0L, -padding_l); -// long in_start_y = max(0L, -padding_t); -// long out_start_x = max(0L, padding_l); -// long out_start_y = max(0L, padding_t); - -// if (w < padding_l) { -// w = padding_l * 2 - w; -// } else if (padding_l <= w && w < in_W + padding_l) { -// w = w; -// } else { -// w = (in_W + padding_l - 1) * 2 - w; -// } -// w = w - out_start_x + in_start_x; - -// if (h < padding_t) { -// h = padding_t * 2 - h; -// } else if (padding_t <= h && h < in_H + padding_t) { -// h = h; -// } else { -// h = (in_H + padding_t - 1) * 2 - h; -// } -// h = h - out_start_y + in_start_y; - -// output[gid] = input[(input_tv_stride[3] * (w)) + -// (input_tv_stride[2] * (h)) + -// (input_tv_stride[1] * (c)) + -// (input_tv_stride[0] * (n)) + -// 0]; -// } - -extern "C" __global__ void PadReflection2dFwdContiguous(const FLOAT* __restrict__ input, - FLOAT* __restrict__ output, - uint64_t output_size, - long padding_l, long padding_t, - const size_t in_H, - const size_t in_W, - const size_t output_size_1, - const size_t output_size_2, - const size_t output_size_3, - const size_t input_stride_0, - const size_t input_stride_1, - const size_t input_stride_2, - const size_t input_stride_3 - ) +template +__device__ void padReflection2dFwdContiguous(const TI* __restrict__ input, + TO* __restrict__ output, + uint64_t output_size, + long padding_l, + long padding_t, + const size_t in_H, + const size_t in_W, + const size_t output_size_1, + const size_t output_size_2, + const size_t output_size_3, + const size_t input_stride_0, + const size_t input_stride_1, + const size_t input_stride_2, + const size_t input_stride_3) { const size_t gid = threadIdx.x + blockIdx.x * blockDim.x; if(gid >= output_size) return; long n, c, h, w; - // GET_NCHW(n, c, h, w, gid, output); ulong nch = (gid) / output_size_3; - w = (gid) % output_size_3; - ulong nc = nch / output_size_2; - h = nch % output_size_2; - n = nc / output_size_1; - c = nc % output_size_1; + w = (gid) % output_size_3; + ulong nc = nch / output_size_2; + h = nch % output_size_2; + n = nc / output_size_1; + c = nc % output_size_1; - long in_start_x = max(0L, -padding_l); - long in_start_y = max(0L, -padding_t); + long in_start_x = max(0L, -padding_l); + long in_start_y = max(0L, -padding_t); long out_start_x = max(0L, padding_l); long out_start_y = max(0L, padding_t); - if (w < padding_l) { + if(w < padding_l) + { w = padding_l * 2 - w; - } else if (padding_l <= w && w < in_W + padding_l) { + } + else if(padding_l <= w && w < in_W + padding_l) + { w = w; - } else { + } + else + { w = (in_W + padding_l - 1) * 2 - w; } w = w - out_start_x + in_start_x; - if (h < padding_t) { + if(h < padding_t) + { h = padding_t * 2 - h; - } else if (padding_t <= h && h < in_H + padding_t) { + } + else if(padding_t <= h && h < in_H + padding_t) + { h = h; - } else { + } + else + { h = (in_H + padding_t - 1) * 2 - h; } h = h - out_start_y + in_start_y; - output[gid] = input[(input_stride_3 * (w)) + - (input_stride_2 * (h)) + - (input_stride_1 * (c)) + - (input_stride_0 * (n)) + - 0]; + output[gid] = input[(input_stride_3 * (w)) + (input_stride_2 * (h)) + (input_stride_1 * (c)) + + (input_stride_0 * (n)) + 0]; +} + +extern "C" __global__ void PadReflection2dFwdContiguous(const INPUT_TYPE* __restrict__ input, + OUTPUT_TYPE* __restrict__ output, + uint64_t output_size, + long padding_l, + long padding_t, + const size_t in_H, + const size_t in_W, + const size_t output_size_1, + const size_t output_size_2, + const size_t output_size_3, + const size_t input_stride_0, + const size_t input_stride_1, + const size_t input_stride_2, + const size_t input_stride_3) +{ + padReflection2dFwdContiguous(input, + output, + output_size, + padding_l, + padding_t, + in_H, + in_W, + output_size_1, + output_size_2, + output_size_3, + input_stride_0, + input_stride_1, + input_stride_2, + input_stride_3); } \ No newline at end of file diff --git a/src/pad_reflection.cpp b/src/pad_reflection.cpp index 2f2a2b4241..2c3dee192d 100644 --- a/src/pad_reflection.cpp +++ b/src/pad_reflection.cpp @@ -40,10 +40,10 @@ miopenStatus_t PadReflection(Handle& handle, ConstData_t x, const TensorDescriptor& yDesc, Data_t y, - const int* padding + const std::vector padding ) { - const auto problem = pad_reflection::ProblemDescription{xDesc, yDesc}; + const auto problem = pad_reflection::ProblemDescription{xDesc, yDesc, padding}; const auto invoke_params = [&]() { auto tmp = pad_reflection::InvokeParams{}; @@ -52,7 +52,7 @@ miopenStatus_t PadReflection(Handle& handle, tmp.yDesc = &yDesc; tmp.x = x; tmp.y = y; - tmp.padding = padding; + tmp.padding = &padding; return tmp; }(); diff --git a/src/pad_reflection_api.cpp b/src/pad_reflection_api.cpp index 26c99c25ea..cfefee3dcb 100644 --- a/src/pad_reflection_api.cpp +++ b/src/pad_reflection_api.cpp @@ -36,7 +36,7 @@ extern "C" miopenStatus_t miopenPadReflection(miopenHandle_t handle, const void* x, const miopenTensorDescriptor_t yDesc, void* y, - const int * padding + const std::vector padding ) { MIOPEN_LOG_FUNCTION( diff --git a/src/solver/pad_reflection/pad_reflection.cpp b/src/solver/pad_reflection/pad_reflection.cpp index b73aa4e7b7..e3540f7562 100644 --- a/src/solver/pad_reflection/pad_reflection.cpp +++ b/src/solver/pad_reflection/pad_reflection.cpp @@ -52,42 +52,42 @@ PadReflection::GetSolution(const ExecutionContext& context, { auto result = ConvSolution{miopenStatusSuccess}; - auto dtype = problem.GetXDesc().GetType(); - auto xdims = problem.GetXDesc().GetLengths(); - auto ydims = problem.GetYDesc().GetLengths(); - - auto kernel = KernelInfo{}; - - kernel.kernel_file = "MIOpenPadReflection.cpp"; - kernel.kernel_name = "PadReflection2dFwdContiguous"; - auto output_numel = - std::accumulate(ydims.begin(), ydims.end(), 1ULL, std::multiplies()); - - const auto build_params = KernelBuildParameters{ - {"MIOPEN_USE_FP16", static_cast(dtype == miopenHalf)}, - {"MIOPEN_USE_FP32", static_cast(dtype == miopenFloat)}, - {"MIOPEN_USE_FP64", static_cast(dtype == miopenDouble)}, - {"MIOPEN_USE_BFP16", static_cast(dtype == miopenBFloat16)}, - }; - - kernel.comp_options = build_params.GenerateFor(kbp::HIP{}); + auto input_dtype = miopen::GetDataType(problem.GetXDesc().GetType()); + auto output_dtype = miopen::GetDataType(problem.GetYDesc().GetType()); + auto xdims = problem.GetXDesc().GetLengths(); + auto ydims = problem.GetYDesc().GetLengths(); + + { + auto kernel = KernelInfo{}; + + kernel.kernel_file = "MIOpenPadReflection.cpp"; + kernel.kernel_name = "PadReflection2dFwdContiguous"; + auto output_numel = + std::accumulate(ydims.begin(), ydims.end(), 1ULL, std::multiplies()); + + const auto build_params = KernelBuildParameters{ + {"INPUT_TYPE", input_dtype == "bfloat16" ? "ushort" : input_dtype}, + {"OUTPUT_TYPE", output_dtype == "bfloat16" ? "ushort" : output_dtype}, + }; - size_t xlocalsize = LOCAL_SIZE; - size_t xgridsize = AlignUp(output_numel, xlocalsize); - size_t ylocalsize = 1; - size_t ygridsize = 1; - size_t zlocalsize = 1; - size_t zgridsize = 1; - kernel.l_wk.push_back(xlocalsize); - kernel.l_wk.push_back(ylocalsize); - kernel.l_wk.push_back(zlocalsize); + kernel.comp_options = build_params.GenerateFor(kbp::HIP{}); - kernel.g_wk.push_back(xgridsize); - kernel.g_wk.push_back(ygridsize); - kernel.g_wk.push_back(zgridsize); + size_t xlocalsize = LOCAL_SIZE; + size_t xgridsize = AlignUp(output_numel, xlocalsize); + size_t ylocalsize = 1; + size_t ygridsize = 1; + size_t zlocalsize = 1; + size_t zgridsize = 1; + kernel.l_wk.push_back(xlocalsize); + kernel.l_wk.push_back(ylocalsize); + kernel.l_wk.push_back(zlocalsize); - result.construction_params.push_back(kernel); + kernel.g_wk.push_back(xgridsize); + kernel.g_wk.push_back(ygridsize); + kernel.g_wk.push_back(zgridsize); + result.construction_params.push_back(kernel); + } result.invoker_factory = [](const std::vector& kernels) { return [=](const Handle& handle_, const AnyInvokeParams& raw_params) { decltype(auto) kernel = handle_.Run(kernels[0]); @@ -101,45 +101,9 @@ PadReflection::GetSolution(const ExecutionContext& context, auto output_size = std::accumulate(ydims.begin(), ydims.end(), 1ULL, std::multiplies()); - // const size_t* xdims_data; - // const size_t* ydims_data; - // const size_t* xstrides_data; - - // hipMalloc(&xdims_data, sizeof(size_t) * xdims.size()); - // hipMalloc(&ydims_data, sizeof(size_t) * ydims.size()); - // hipMalloc(&xstrides_data, sizeof(size_t) * xstrides.size()); - - // hipMemcpy((void*)xdims_data, - // xdims.data(), - // sizeof(size_t) * xdims.size(), - // hipMemcpyHostToDevice); - // hipMemcpy((void*)ydims_data, - // ydims.data(), - // sizeof(size_t) * ydims.size(), - // hipMemcpyHostToDevice); - // hipMemcpy((void*)xstrides_data, - // xstrides.data(), - // sizeof(size_t) * xstrides.size(), - // hipMemcpyHostToDevice); - - // long padding_l = params.padding[0]; - // long padding_t = params.padding[2]; - // kernel(params.x, - // params.y, - // output_size, - // padding_l, - // padding_t, - // xdims_data, - // ydims_data, - // xstrides_data); - - // hipFree((void*)xdims_data); - // hipFree((void*)ydims_data); - // hipFree((void*)xstrides_data); - - - long padding_l = params.padding[0]; - long padding_t = params.padding[2]; + auto padding = params.padding; + long padding_l = (*padding)[0]; + long padding_t = (*padding)[2]; size_t in_H = xdims[2]; size_t in_W = xdims[3]; size_t output_size_1 = ydims[1]; diff --git a/test/cpu_pad_reflection.hpp b/test/cpu_pad_reflection.hpp index 0fd774ec71..befa63f08b 100644 --- a/test/cpu_pad_reflection.hpp +++ b/test/cpu_pad_reflection.hpp @@ -31,11 +31,11 @@ template void cpu_pad_reflection(tensor input_tensor, tensor& ref_output_tensor, - const int * padding_gpu + const std::vector padding ) { - long padding_l = padding_gpu[0]; - long padding_t = padding_gpu[2]; + long padding_l = padding[0]; + long padding_t = padding[2]; auto input_dims = input_tensor.desc.GetLengths(); auto output_dims = ref_output_tensor.desc.GetLengths(); auto input = input_tensor.data.data(); diff --git a/test/gtest/pad_reflection.hpp b/test/gtest/pad_reflection.hpp index 9dcc60a4db..b4d6ba2755 100644 --- a/test/gtest/pad_reflection.hpp +++ b/test/gtest/pad_reflection.hpp @@ -77,8 +77,12 @@ struct PadReflectionCase } } - int * GetPadding() { - return padding; + std::vector GetPadding() { + std::vector paddingVector; + for (int i = 0; i < 4; ++i) { + paddingVector.push_back(static_cast(padding[i])); + } + return paddingVector; } }; @@ -149,16 +153,16 @@ struct PadReflectionTest : public ::testing::TestWithParam cpu_pad_reflection(input, ref_output, padding); miopenStatus_t status; - const int * padding_gpu; - hipMalloc(&padding_gpu, sizeof(int) * 4); - hipMemcpy((void*)padding_gpu, padding, sizeof(int) * 4, hipMemcpyHostToDevice); + // const int * padding_gpu; + // hipMalloc(&padding_gpu, sizeof(int) * 4); + // hipMemcpy((void*)padding_gpu, padding, sizeof(int) * 4, hipMemcpyHostToDevice); status = miopen::PadReflection(handle, input.desc, input_dev.get(), output.desc, output_dev.get(), - padding_gpu + padding ); EXPECT_EQ(status, miopenStatusSuccess); From 49e7a4687c816804e6d654cb2097dba6041b315c Mon Sep 17 00:00:00 2001 From: Duong Le Date: Mon, 15 Apr 2024 10:15:38 +0000 Subject: [PATCH 04/29] add padreflection docs and update cmakelist to alphabetic ordering --- docs/reference/index.rst | 1 + driver/CMakeLists.txt | 2 +- include/miopen/miopen.h | 12 ++++++++---- src/CMakeLists.txt | 8 ++++---- 4 files changed, 14 insertions(+), 9 deletions(-) diff --git a/docs/reference/index.rst b/docs/reference/index.rst index 02bcb88622..60883f9a1b 100644 --- a/docs/reference/index.rst +++ b/docs/reference/index.rst @@ -31,4 +31,5 @@ The MIOpen API library is structured as follows: * :doc:`Sum <../doxygen/html/group__sum>` (experimental) * :doc:`GroupNorm <../doxygen/html/group__groupnorm>` (experimental) * :doc:`Cat <../doxygen/html/group__cat>` (experimental) + * :doc:`PadReflection <../doxygen/html/group__padreflection>` (experimental) * :doc:`Argmax<./argmax>` (experimental) diff --git a/driver/CMakeLists.txt b/driver/CMakeLists.txt index 17a6c20628..bfd196f790 100644 --- a/driver/CMakeLists.txt +++ b/driver/CMakeLists.txt @@ -45,13 +45,13 @@ add_executable(MIOpenDriver dm_groupnorm.cpp dm_layernorm.cpp dm_lrn.cpp + dm_pad_reflection.cpp dm_pool.cpp dm_reduce.cpp dm_rnn.cpp dm_softmax.cpp dm_sum.cpp dm_tensorop.cpp - dm_pad_reflection.cpp main.cpp registry_driver_maker.cpp rocrand_wrapper.cpp) diff --git a/include/miopen/miopen.h b/include/miopen/miopen.h index 7965edb26b..003b989f9a 100644 --- a/include/miopen/miopen.h +++ b/include/miopen/miopen.h @@ -69,6 +69,7 @@ * @defgroup argmax * @defgroup groupnorm * @defgroup cat + * @defgroup padreflection * */ @@ -5795,9 +5796,12 @@ MIOPEN_EXPORT miopenStatus_t miopenSumForward(miopenHandle_t handle, #endif #ifdef MIOPEN_BETA_API - - -/*! @brief Execute a sum forward layer +// Padreflection APIs +/** @addtogroup padreflection + * + * @{ + */ +/*! @brief Add padding by the reflection of the tensor * * @param handle MIOpen handle (input) * @param nanPropagation Nan number propagation mode (input) @@ -5818,7 +5822,7 @@ MIOPEN_EXPORT miopenStatus_t miopenPadReflection(miopenHandle_t handle, const std::vector padding); /** @} */ -// CLOSEOUT SUM DOXYGEN GROUP +// CLOSEOUT PAD REFLECTION DOXYGEN GROUP #endif #ifdef MIOPEN_BETA_API diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index fd6157964c..06a3d3d2d0 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -144,6 +144,8 @@ set( MIOpen_Source mha/problem_description.cpp op_args.cpp operator.cpp + pad_reflection/problem_description.cpp + pad_reflection_api.cpp performance_config.cpp pooling/problem_description.cpp pooling_api.cpp @@ -154,7 +156,6 @@ set( MIOpen_Source reducetensor.cpp reducetensor_api.cpp reduce/problem_description.cpp - pad_reflection/problem_description.cpp rnn.cpp rnn_api.cpp rnn/rnn_util.cpp @@ -265,6 +266,7 @@ set( MIOpen_Source solver/layernorm/forward_layernorm2d_ck.cpp solver/layernorm/forward_layernorm4d_ck.cpp solver/mha/mha_solver.cpp + solver/pad_reflection/pad_reflection.cpp solver/pooling/forward2d.cpp solver/pooling/forwardNaive.cpp solver/pooling/forwardNd.cpp @@ -272,12 +274,10 @@ set( MIOpen_Source solver/pooling/backwardNd.cpp solver/reduce/forward_argmax.cpp solver/reduce/forward_sum.cpp - solver/pad_reflection/pad_reflection.cpp solver/softmax/attn_softmax.cpp solver/softmax/softmax.cpp subbuffers.cpp sum_api.cpp - pad_reflection_api.cpp target_properties.cpp temp_file.cpp tensor.cpp @@ -604,11 +604,11 @@ if( MIOPEN_BACKEND MATCHES "OpenCL" OR MIOPEN_BACKEND STREQUAL "HIPOC" OR MIOPEN hip/hip_build_utils.cpp hip/batched_transpose_sol.cpp hip/general_tensor_reorder_sol.cpp + pad_reflection.cpp pooling.cpp ocl/fusionopconvocl.cpp ocl/fusionopbiasbnactivocl.cpp sum.cpp - pad_reflection.cpp ${PROJECT_BINARY_DIR}/db_path.cpp ) From 00905ef966ab250842837649db42d63e2376ee83 Mon Sep 17 00:00:00 2001 From: duong Date: Mon, 15 Apr 2024 15:28:45 +0000 Subject: [PATCH 05/29] Fix some warning error --- src/include/miopen/pad_reflection.hpp | 2 +- .../pad_reflection/problem_description.hpp | 2 -- src/include/miopen/pad_reflection/solvers.hpp | 7 ++++++- src/kernels/MIOpenPadReflection.cpp | 2 -- src/solver/pad_reflection/pad_reflection.cpp | 19 +++++++++++++++---- test/cpu_pad_reflection.hpp | 2 -- 6 files changed, 22 insertions(+), 12 deletions(-) diff --git a/src/include/miopen/pad_reflection.hpp b/src/include/miopen/pad_reflection.hpp index 9b08a6eeaf..66f7d9f07a 100644 --- a/src/include/miopen/pad_reflection.hpp +++ b/src/include/miopen/pad_reflection.hpp @@ -38,7 +38,7 @@ miopenStatus_t PadReflection(Handle& handle, ConstData_t x, const TensorDescriptor& yDesc, Data_t y, - const std::vector padding + std::vector padding ); } // namespace miopen diff --git a/src/include/miopen/pad_reflection/problem_description.hpp b/src/include/miopen/pad_reflection/problem_description.hpp index 3ffa4a4284..d5d488a3f1 100644 --- a/src/include/miopen/pad_reflection/problem_description.hpp +++ b/src/include/miopen/pad_reflection/problem_description.hpp @@ -78,8 +78,6 @@ struct ProblemDescription : ProblemDescriptionBase return true; } - - NetworkConfig MakeNetworkConfig() const override; private: diff --git a/src/include/miopen/pad_reflection/solvers.hpp b/src/include/miopen/pad_reflection/solvers.hpp index 3becfd36b1..df867a3776 100644 --- a/src/include/miopen/pad_reflection/solvers.hpp +++ b/src/include/miopen/pad_reflection/solvers.hpp @@ -43,9 +43,14 @@ struct PadReflection final : PadReflectionSolver bool IsApplicable(const ExecutionContext& context, const miopen::pad_reflection::ProblemDescription& problem) const override; + ConvSolution GetSolution(const ExecutionContext& context, const miopen::pad_reflection::ProblemDescription& problem) const override; - bool MayNeedWorkspace() const override { return true; } + + std::size_t GetWorkspaceSize(const ExecutionContext& context, + const miopen::pad_reflection::ProblemDescription& problem) const override; + + bool MayNeedWorkspace() const override { return false; } }; } // namespace pad_reflection diff --git a/src/kernels/MIOpenPadReflection.cpp b/src/kernels/MIOpenPadReflection.cpp index 2d360d45f2..7d67dc7019 100644 --- a/src/kernels/MIOpenPadReflection.cpp +++ b/src/kernels/MIOpenPadReflection.cpp @@ -70,7 +70,6 @@ __device__ void padReflection2dFwdContiguous(const TI* __restrict__ input, } else if(padding_l <= w && w < in_W + padding_l) { - w = w; } else { @@ -84,7 +83,6 @@ __device__ void padReflection2dFwdContiguous(const TI* __restrict__ input, } else if(padding_t <= h && h < in_H + padding_t) { - h = h; } else { diff --git a/src/solver/pad_reflection/pad_reflection.cpp b/src/solver/pad_reflection/pad_reflection.cpp index e3540f7562..9856ab98d6 100644 --- a/src/solver/pad_reflection/pad_reflection.cpp +++ b/src/solver/pad_reflection/pad_reflection.cpp @@ -40,15 +40,26 @@ namespace solver { namespace pad_reflection { -bool PadReflection::IsApplicable(const ExecutionContext& context, - const miopen::pad_reflection::ProblemDescription& problem) const +bool PadReflection::IsApplicable([[maybe_unused]] const ExecutionContext& context, + [[maybe_unused]] const miopen::pad_reflection::ProblemDescription& problem) const { + if(!problem.IsSameType()) + return false; + if(!problem.IsAllPacked()) + return false; return true; } +std::size_t +PadReflection::GetWorkspaceSize([[maybe_unused]] const ExecutionContext& context, + [[maybe_unused]] const miopen::pad_reflection::ProblemDescription& problem) const +{ + return 0; +} + ConvSolution -PadReflection::GetSolution(const ExecutionContext& context, - const miopen::pad_reflection::ProblemDescription& problem) const +PadReflection::GetSolution([[maybe_unused]] const ExecutionContext& context, + [[maybe_unused]] const miopen::pad_reflection::ProblemDescription& problem) const { auto result = ConvSolution{miopenStatusSuccess}; diff --git a/test/cpu_pad_reflection.hpp b/test/cpu_pad_reflection.hpp index befa63f08b..b5d6f413e9 100644 --- a/test/cpu_pad_reflection.hpp +++ b/test/cpu_pad_reflection.hpp @@ -66,7 +66,6 @@ void cpu_pad_reflection(tensor input_tensor, if (w < padding_l) { w = padding_l * 2 - w; } else if (padding_l <= w && w < in_W + padding_l) { - w = w; } else { w = (in_W + padding_l - 1) * 2 - w; } @@ -75,7 +74,6 @@ void cpu_pad_reflection(tensor input_tensor, if (h < padding_t) { h = padding_t * 2 - h; } else if (padding_t <= h && h < in_H + padding_t) { - h = h; } else { h = (in_H + padding_t - 1) * 2 - h; } From 6f8afd0cf81b889ed5457a7f09812e857e35ecb2 Mon Sep 17 00:00:00 2001 From: duong Date: Mon, 15 Apr 2024 15:30:43 +0000 Subject: [PATCH 06/29] githook indentation --- src/include/miopen/pad_reflection.hpp | 11 ++-- .../pad_reflection/problem_description.hpp | 4 +- src/include/miopen/pad_reflection/solvers.hpp | 15 +++-- src/solver/pad_reflection/pad_reflection.cpp | 23 +++---- test/cpu_pad_reflection.hpp | 63 ++++++++++--------- 5 files changed, 62 insertions(+), 54 deletions(-) diff --git a/src/include/miopen/pad_reflection.hpp b/src/include/miopen/pad_reflection.hpp index 66f7d9f07a..6bde02014a 100644 --- a/src/include/miopen/pad_reflection.hpp +++ b/src/include/miopen/pad_reflection.hpp @@ -34,12 +34,11 @@ struct Handle; struct TensorDescriptor; miopenStatus_t PadReflection(Handle& handle, - const TensorDescriptor& xDesc, - ConstData_t x, - const TensorDescriptor& yDesc, - Data_t y, - std::vector padding - ); + const TensorDescriptor& xDesc, + ConstData_t x, + const TensorDescriptor& yDesc, + Data_t y, + std::vector padding); } // namespace miopen #endif // MIOPEN_PAD_REFLECTION_HPP_ diff --git a/src/include/miopen/pad_reflection/problem_description.hpp b/src/include/miopen/pad_reflection/problem_description.hpp index d5d488a3f1..c81ea198eb 100644 --- a/src/include/miopen/pad_reflection/problem_description.hpp +++ b/src/include/miopen/pad_reflection/problem_description.hpp @@ -41,8 +41,7 @@ struct ProblemDescription : ProblemDescriptionBase { ProblemDescription(const TensorDescriptor& xDesc_, const TensorDescriptor& yDesc_, - const std::vector padding_ - ) + const std::vector padding_) : xDesc(xDesc_), yDesc(yDesc_), padding(padding_) { } @@ -51,7 +50,6 @@ struct ProblemDescription : ProblemDescriptionBase const TensorDescriptor& GetYDesc() const { return yDesc; } const std::vector& GetPadding() const { return padding; } - bool IsSameType() const { if(xDesc.GetType() != yDesc.GetType()) diff --git a/src/include/miopen/pad_reflection/solvers.hpp b/src/include/miopen/pad_reflection/solvers.hpp index df867a3776..44668f886d 100644 --- a/src/include/miopen/pad_reflection/solvers.hpp +++ b/src/include/miopen/pad_reflection/solvers.hpp @@ -35,7 +35,8 @@ namespace solver { namespace pad_reflection { -using PadReflectionSolver = NonTunableSolverBase; +using PadReflectionSolver = + NonTunableSolverBase; struct PadReflection final : PadReflectionSolver { @@ -44,11 +45,13 @@ struct PadReflection final : PadReflectionSolver bool IsApplicable(const ExecutionContext& context, const miopen::pad_reflection::ProblemDescription& problem) const override; - ConvSolution GetSolution(const ExecutionContext& context, - const miopen::pad_reflection::ProblemDescription& problem) const override; - - std::size_t GetWorkspaceSize(const ExecutionContext& context, - const miopen::pad_reflection::ProblemDescription& problem) const override; + ConvSolution + GetSolution(const ExecutionContext& context, + const miopen::pad_reflection::ProblemDescription& problem) const override; + + std::size_t + GetWorkspaceSize(const ExecutionContext& context, + const miopen::pad_reflection::ProblemDescription& problem) const override; bool MayNeedWorkspace() const override { return false; } }; diff --git a/src/solver/pad_reflection/pad_reflection.cpp b/src/solver/pad_reflection/pad_reflection.cpp index 9856ab98d6..a75feadfe0 100644 --- a/src/solver/pad_reflection/pad_reflection.cpp +++ b/src/solver/pad_reflection/pad_reflection.cpp @@ -40,8 +40,9 @@ namespace solver { namespace pad_reflection { -bool PadReflection::IsApplicable([[maybe_unused]] const ExecutionContext& context, - [[maybe_unused]] const miopen::pad_reflection::ProblemDescription& problem) const +bool PadReflection::IsApplicable( + [[maybe_unused]] const ExecutionContext& context, + [[maybe_unused]] const miopen::pad_reflection::ProblemDescription& problem) const { if(!problem.IsSameType()) return false; @@ -50,16 +51,16 @@ bool PadReflection::IsApplicable([[maybe_unused]] const ExecutionContext& contex return true; } -std::size_t -PadReflection::GetWorkspaceSize([[maybe_unused]] const ExecutionContext& context, - [[maybe_unused]] const miopen::pad_reflection::ProblemDescription& problem) const +std::size_t PadReflection::GetWorkspaceSize( + [[maybe_unused]] const ExecutionContext& context, + [[maybe_unused]] const miopen::pad_reflection::ProblemDescription& problem) const { return 0; } -ConvSolution -PadReflection::GetSolution([[maybe_unused]] const ExecutionContext& context, - [[maybe_unused]] const miopen::pad_reflection::ProblemDescription& problem) const +ConvSolution PadReflection::GetSolution( + [[maybe_unused]] const ExecutionContext& context, + [[maybe_unused]] const miopen::pad_reflection::ProblemDescription& problem) const { auto result = ConvSolution{miopenStatusSuccess}; @@ -112,9 +113,9 @@ PadReflection::GetSolution([[maybe_unused]] const ExecutionContext& context, auto output_size = std::accumulate(ydims.begin(), ydims.end(), 1ULL, std::multiplies()); - auto padding = params.padding; - long padding_l = (*padding)[0]; - long padding_t = (*padding)[2]; + auto padding = params.padding; + long padding_l = (*padding)[0]; + long padding_t = (*padding)[2]; size_t in_H = xdims[2]; size_t in_W = xdims[3]; size_t output_size_1 = ydims[1]; diff --git a/test/cpu_pad_reflection.hpp b/test/cpu_pad_reflection.hpp index b5d6f413e9..ef15909e7c 100644 --- a/test/cpu_pad_reflection.hpp +++ b/test/cpu_pad_reflection.hpp @@ -30,16 +30,15 @@ template void cpu_pad_reflection(tensor input_tensor, - tensor& ref_output_tensor, - const std::vector padding - ) + tensor& ref_output_tensor, + const std::vector padding) { - long padding_l = padding[0]; - long padding_t = padding[2]; - auto input_dims = input_tensor.desc.GetLengths(); - auto output_dims = ref_output_tensor.desc.GetLengths(); - auto input = input_tensor.data.data(); - auto output = ref_output_tensor.data.data(); + long padding_l = padding[0]; + long padding_t = padding[2]; + auto input_dims = input_tensor.desc.GetLengths(); + auto output_dims = ref_output_tensor.desc.GetLengths(); + auto input = input_tensor.data.data(); + auto output = ref_output_tensor.data.data(); auto input_strides = input_tensor.desc.GetStrides(); auto output_size = std::accumulate(output_dims.begin(), output_dims.end(), 1L, std::multiplies()); @@ -47,43 +46,51 @@ void cpu_pad_reflection(tensor input_tensor, size_t in_H = input_dims[2]; size_t in_W = input_dims[3]; - for (size_t gid = 0; gid < output_size; ++gid) { + for(size_t gid = 0; gid < output_size; ++gid) + { long n, c, h, w; // GET_NCHW(n, c, h, w, gid, output); ulong nch = (gid) / output_dims[3]; - w = (gid) % output_dims[3]; - ulong nc = nch / output_dims[2]; - h = nch % output_dims[2]; - n = nc / output_dims[1]; - c = nc % output_dims[1]; + w = (gid) % output_dims[3]; + ulong nc = nch / output_dims[2]; + h = nch % output_dims[2]; + n = nc / output_dims[1]; + c = nc % output_dims[1]; - long in_start_x = max(0L, -padding_l); - long in_start_y = max(0L, -padding_t); + long in_start_x = max(0L, -padding_l); + long in_start_y = max(0L, -padding_t); long out_start_x = max(0L, padding_l); long out_start_y = max(0L, padding_t); - if (w < padding_l) { + if(w < padding_l) + { w = padding_l * 2 - w; - } else if (padding_l <= w && w < in_W + padding_l) { - } else { + } + else if(padding_l <= w && w < in_W + padding_l) + { + } + else + { w = (in_W + padding_l - 1) * 2 - w; } w = w - out_start_x + in_start_x; - if (h < padding_t) { + if(h < padding_t) + { h = padding_t * 2 - h; - } else if (padding_t <= h && h < in_H + padding_t) { - } else { + } + else if(padding_t <= h && h < in_H + padding_t) + { + } + else + { h = (in_H + padding_t - 1) * 2 - h; } h = h - out_start_y + in_start_y; - output[gid] = input[(input_strides[3] * (w)) + - (input_strides[2] * (h)) + - (input_strides[1] * (c)) + - (input_strides[0] * (n)) + - 0]; + output[gid] = input[(input_strides[3] * (w)) + (input_strides[2] * (h)) + + (input_strides[1] * (c)) + (input_strides[0] * (n)) + 0]; } } #endif From 508fabdfb2cb686c3bcc8815853ae4ee232c8988 Mon Sep 17 00:00:00 2001 From: duong Date: Mon, 15 Apr 2024 15:38:45 +0000 Subject: [PATCH 07/29] Code clean up --- driver/dm_pad_reflection.cpp | 1 - driver/pad_reflection_driver.hpp | 2 -- src/include/miopen/pad_reflection.hpp | 1 - src/include/miopen/pad_reflection/invoke_params.hpp | 1 - .../miopen/pad_reflection/problem_description.hpp | 1 - src/include/miopen/pad_reflection/solvers.hpp | 1 - src/pad_reflection.cpp | 1 - src/pad_reflection/problem_description.cpp | 1 - src/pad_reflection_api.cpp | 1 - test/gtest/pad_reflection.cpp | 1 + test/gtest/pad_reflection.hpp | 13 +------------ 11 files changed, 2 insertions(+), 22 deletions(-) diff --git a/driver/dm_pad_reflection.cpp b/driver/dm_pad_reflection.cpp index 295634d38c..b036d5ccb6 100644 --- a/driver/dm_pad_reflection.cpp +++ b/driver/dm_pad_reflection.cpp @@ -25,7 +25,6 @@ *******************************************************************************/ #include "registry_driver_maker.hpp" #include "pad_reflection_driver.hpp" - static Driver* makeDriver(const std::string& base_arg) { if(base_arg == "padreflection") diff --git a/driver/pad_reflection_driver.hpp b/driver/pad_reflection_driver.hpp index 7cba644381..8e7c2634e8 100644 --- a/driver/pad_reflection_driver.hpp +++ b/driver/pad_reflection_driver.hpp @@ -54,7 +54,6 @@ void mloPadReflectionRunHost(miopenTensorDescriptor_t inputDesc, { auto input_dims = miopen::deref(inputDesc).GetLengths(); auto output_dims = miopen::deref(outputDesc).GetLengths(); - auto output_numel = std::accumulate(output_dims.begin(), output_dims.end(), 1L, std::multiplies()); @@ -187,7 +186,6 @@ int PadReflectionDriver::GetandSetData() SetTensorNd(inputDesc, in_len, data_type); std::vector out_len; - for(int i = 0; i < in_len.size(); i++) { //If H diff --git a/src/include/miopen/pad_reflection.hpp b/src/include/miopen/pad_reflection.hpp index 6bde02014a..fcd265922a 100644 --- a/src/include/miopen/pad_reflection.hpp +++ b/src/include/miopen/pad_reflection.hpp @@ -39,6 +39,5 @@ miopenStatus_t PadReflection(Handle& handle, const TensorDescriptor& yDesc, Data_t y, std::vector padding); - } // namespace miopen #endif // MIOPEN_PAD_REFLECTION_HPP_ diff --git a/src/include/miopen/pad_reflection/invoke_params.hpp b/src/include/miopen/pad_reflection/invoke_params.hpp index afa9999d8d..7121364f7f 100644 --- a/src/include/miopen/pad_reflection/invoke_params.hpp +++ b/src/include/miopen/pad_reflection/invoke_params.hpp @@ -43,7 +43,6 @@ struct InvokeParams : public miopen::InvokeParams Data_t y = nullptr; const std::vector* padding = nullptr; - std::size_t GetWorkspaceSize() const { return 0; } Data_t GetWorkspace() const { return nullptr; } }; diff --git a/src/include/miopen/pad_reflection/problem_description.hpp b/src/include/miopen/pad_reflection/problem_description.hpp index c81ea198eb..19b70ca476 100644 --- a/src/include/miopen/pad_reflection/problem_description.hpp +++ b/src/include/miopen/pad_reflection/problem_description.hpp @@ -45,7 +45,6 @@ struct ProblemDescription : ProblemDescriptionBase : xDesc(xDesc_), yDesc(yDesc_), padding(padding_) { } - const TensorDescriptor& GetXDesc() const { return xDesc; } const TensorDescriptor& GetYDesc() const { return yDesc; } const std::vector& GetPadding() const { return padding; } diff --git a/src/include/miopen/pad_reflection/solvers.hpp b/src/include/miopen/pad_reflection/solvers.hpp index 44668f886d..e6913f5192 100644 --- a/src/include/miopen/pad_reflection/solvers.hpp +++ b/src/include/miopen/pad_reflection/solvers.hpp @@ -48,7 +48,6 @@ struct PadReflection final : PadReflectionSolver ConvSolution GetSolution(const ExecutionContext& context, const miopen::pad_reflection::ProblemDescription& problem) const override; - std::size_t GetWorkspaceSize(const ExecutionContext& context, const miopen::pad_reflection::ProblemDescription& problem) const override; diff --git a/src/pad_reflection.cpp b/src/pad_reflection.cpp index 2c3dee192d..eacd2605ef 100644 --- a/src/pad_reflection.cpp +++ b/src/pad_reflection.cpp @@ -58,7 +58,6 @@ miopenStatus_t PadReflection(Handle& handle, const auto algo = AlgorithmName{"PadReflection"}; const auto solvers = solver::SolverContainer{}; - solvers.ExecutePrimitive(handle, problem, algo, invoke_params); return miopenStatusSuccess; diff --git a/src/pad_reflection/problem_description.cpp b/src/pad_reflection/problem_description.cpp index e0b3545854..98cc727c17 100644 --- a/src/pad_reflection/problem_description.cpp +++ b/src/pad_reflection/problem_description.cpp @@ -46,7 +46,6 @@ NetworkConfig ProblemDescription::MakeNetworkConfig() const ss << "dtype" << dtype; ss << "output_size" << output_size; - return NetworkConfig{ss.str()}; } diff --git a/src/pad_reflection_api.cpp b/src/pad_reflection_api.cpp index cfefee3dcb..3b67d5a2d6 100644 --- a/src/pad_reflection_api.cpp +++ b/src/pad_reflection_api.cpp @@ -30,7 +30,6 @@ #include #include - extern "C" miopenStatus_t miopenPadReflection(miopenHandle_t handle, const miopenTensorDescriptor_t xDesc, const void* x, diff --git a/test/gtest/pad_reflection.cpp b/test/gtest/pad_reflection.cpp index b4986b94ae..d2dd368ef1 100644 --- a/test/gtest/pad_reflection.cpp +++ b/test/gtest/pad_reflection.cpp @@ -44,6 +44,7 @@ std::string GetFloatArg() struct PadReflectionTestFloat : PadReflectionTest { + }; } // namespace pad_reflection diff --git a/test/gtest/pad_reflection.hpp b/test/gtest/pad_reflection.hpp index b4d6ba2755..c02522e17b 100644 --- a/test/gtest/pad_reflection.hpp +++ b/test/gtest/pad_reflection.hpp @@ -87,17 +87,9 @@ struct PadReflectionCase }; std::vector PadReflectionTestFloatConfigs() -{ // n c d h w dim nanPropagation +{ // n c d h w padding // clang-format off return { - // { 8, 120, 0, 0, 1}, //bart - // { 8, 120, 0, 0, 1}, - // { 8, 1023, 0, 0, 1}, //gpt_neo - // { 8, 1024, 0, 0, 768}, - // { 8, 1023, 0, 0, 1}, - // { 8, 1024, 0, 0, 768}, - // { 16, 1024, 0, 0, 768}, //gpt2 - // { 16, 1024, 0, 0, 768}, { 1, 1, 0, 3, 3, {2, 2, 2, 2}}, { 48, 8, 0, 512, 512, {1, 1, 1, 1}}, { 48, 8, 0, 512, 512, {1, 1, 3, 3}}, @@ -153,9 +145,6 @@ struct PadReflectionTest : public ::testing::TestWithParam cpu_pad_reflection(input, ref_output, padding); miopenStatus_t status; - // const int * padding_gpu; - // hipMalloc(&padding_gpu, sizeof(int) * 4); - // hipMemcpy((void*)padding_gpu, padding, sizeof(int) * 4, hipMemcpyHostToDevice); status = miopen::PadReflection(handle, input.desc, From a4a249b6a11b07e3e35532a9ccc5165f59b17982 Mon Sep 17 00:00:00 2001 From: duong Date: Mon, 15 Apr 2024 15:39:21 +0000 Subject: [PATCH 08/29] githook cleanup --- driver/pad_reflection_driver.hpp | 87 ++++++++++--------- .../miopen/pad_reflection/invoke_params.hpp | 4 +- src/pad_reflection.cpp | 25 +++--- src/pad_reflection_api.cpp | 25 +++--- test/gtest/pad_reflection.cpp | 5 +- test/gtest/pad_reflection.hpp | 37 ++++---- 6 files changed, 92 insertions(+), 91 deletions(-) diff --git a/driver/pad_reflection_driver.hpp b/driver/pad_reflection_driver.hpp index 8e7c2634e8..edb35fee26 100644 --- a/driver/pad_reflection_driver.hpp +++ b/driver/pad_reflection_driver.hpp @@ -57,51 +57,58 @@ void mloPadReflectionRunHost(miopenTensorDescriptor_t inputDesc, auto output_numel = std::accumulate(output_dims.begin(), output_dims.end(), 1L, std::multiplies()); - long padding_l = padding[0]; - long padding_t = padding[2]; - auto input_strides = miopen::deref(inputDesc).GetStrides(); - size_t in_H = input_dims[2]; - size_t in_W = input_dims[3]; + long padding_l = padding[0]; + long padding_t = padding[2]; + auto input_strides = miopen::deref(inputDesc).GetStrides(); + size_t in_H = input_dims[2]; + size_t in_W = input_dims[3]; for(size_t gid = 0; gid < output_numel; gid++) { long n, c, h, w; // GET_NCHW(n, c, h, w, gid, output); ulong nch = (gid) / output_dims[3]; - w = (gid) % output_dims[3]; - ulong nc = nch / output_dims[2]; - h = nch % output_dims[2]; - n = nc / output_dims[1]; - c = nc % output_dims[1]; - - long in_start_x = max(0L, -padding_l); - long in_start_y = max(0L, -padding_t); + w = (gid) % output_dims[3]; + ulong nc = nch / output_dims[2]; + h = nch % output_dims[2]; + n = nc / output_dims[1]; + c = nc % output_dims[1]; + + long in_start_x = max(0L, -padding_l); + long in_start_y = max(0L, -padding_t); long out_start_x = max(0L, padding_l); long out_start_y = max(0L, padding_t); - if (w < padding_l) { + if(w < padding_l) + { w = padding_l * 2 - w; - } else if (padding_l <= w && w < in_W + padding_l) { + } + else if(padding_l <= w && w < in_W + padding_l) + { w = w; - } else { + } + else + { w = (in_W + padding_l - 1) * 2 - w; } w = w - out_start_x + in_start_x; - if (h < padding_t) { + if(h < padding_t) + { h = padding_t * 2 - h; - } else if (padding_t <= h && h < in_H + padding_t) { + } + else if(padding_t <= h && h < in_H + padding_t) + { h = h; - } else { + } + else + { h = (in_H + padding_t - 1) * 2 - h; } h = h - out_start_y + in_start_y; - outputhost[gid] = input[(input_strides[3] * (w)) + - (input_strides[2] * (h)) + - (input_strides[1] * (c)) + - (input_strides[0] * (n)) + - 0]; + outputhost[gid] = input[(input_strides[3] * (w)) + (input_strides[2] * (h)) + + (input_strides[1] * (c)) + (input_strides[0] * (n)) + 0]; } } #endif @@ -177,7 +184,8 @@ int PadReflectionDriver::GetandSetData() std::string padding_str = inflags.GetValueStr("padding"); std::stringstream ss(padding_str); std::string token; - while (std::getline(ss, token, ',')) { + while(std::getline(ss, token, ',')) + { padding.push_back(std::stoul(token)); } @@ -188,17 +196,18 @@ int PadReflectionDriver::GetandSetData() std::vector out_len; for(int i = 0; i < in_len.size(); i++) { - //If H + // If H if(i == 2) { - out_len.push_back(in_len[i] + 2*padding[2]); + out_len.push_back(in_len[i] + 2 * padding[2]); } - //If W + // If W else if(i == 3) { - out_len.push_back(in_len[i] + 2*padding[0]); + out_len.push_back(in_len[i] + 2 * padding[0]); } - else { + else + { out_len.push_back(in_len[i]); } } @@ -276,8 +285,8 @@ int PadReflectionDriver::AllocateBuffersAndCopy() uint32_t ctx = 0; - in_dev = std::unique_ptr(new GPUMem(ctx, in_sz, sizeof(Tgpu))); - out_dev = std::unique_ptr(new GPUMem(ctx, out_sz, sizeof(Tgpu))); + in_dev = std::unique_ptr(new GPUMem(ctx, in_sz, sizeof(Tgpu))); + out_dev = std::unique_ptr(new GPUMem(ctx, out_sz, sizeof(Tgpu))); in = std::vector(in_sz, static_cast(0)); out = std::vector(out_sz, static_cast(0)); @@ -308,12 +317,8 @@ int PadReflectionDriver::RunForwardGPU() for(int i = 0; i < inflags.GetValueInt("iter"); i++) { - miopenPadReflection(GetHandle(), - inputDesc, - in_dev->GetMem(), - outputDesc, - out_dev->GetMem(), - padding); + miopenPadReflection( + GetHandle(), inputDesc, in_dev->GetMem(), outputDesc, out_dev->GetMem(), padding); float time = 0.0; miopenGetKernelTime(GetHandle(), &time); @@ -332,7 +337,8 @@ int PadReflectionDriver::RunForwardGPU() float kernel_average_time = iter > 1 ? (kernel_total_time - kernel_first_time) / (iter - 1) : kernel_first_time; - std::cout << "GPU Kernel Time Forward Pad Reflection Elapsed: " << kernel_average_time << " ms\n"; + std::cout << "GPU Kernel Time Forward Pad Reflection Elapsed: " << kernel_average_time + << " ms\n"; } if(out_dev->FromGPU(GetStream(), out.data()) != 0) @@ -344,8 +350,7 @@ int PadReflectionDriver::RunForwardGPU() template int PadReflectionDriver::RunForwardCPU() { - mloPadReflectionRunHost( - inputDesc, outputDesc, in.data(), outhost.data(), padding); + mloPadReflectionRunHost(inputDesc, outputDesc, in.data(), outhost.data(), padding); return miopenStatusSuccess; } diff --git a/src/include/miopen/pad_reflection/invoke_params.hpp b/src/include/miopen/pad_reflection/invoke_params.hpp index 7121364f7f..c7b847676c 100644 --- a/src/include/miopen/pad_reflection/invoke_params.hpp +++ b/src/include/miopen/pad_reflection/invoke_params.hpp @@ -39,8 +39,8 @@ struct InvokeParams : public miopen::InvokeParams const TensorDescriptor* xDesc = nullptr; const TensorDescriptor* yDesc = nullptr; - ConstData_t x = nullptr; - Data_t y = nullptr; + ConstData_t x = nullptr; + Data_t y = nullptr; const std::vector* padding = nullptr; std::size_t GetWorkspaceSize() const { return 0; } diff --git a/src/pad_reflection.cpp b/src/pad_reflection.cpp index eacd2605ef..e3ac64d2be 100644 --- a/src/pad_reflection.cpp +++ b/src/pad_reflection.cpp @@ -36,23 +36,22 @@ namespace miopen { miopenStatus_t PadReflection(Handle& handle, - const TensorDescriptor& xDesc, - ConstData_t x, - const TensorDescriptor& yDesc, - Data_t y, - const std::vector padding - ) + const TensorDescriptor& xDesc, + ConstData_t x, + const TensorDescriptor& yDesc, + Data_t y, + const std::vector padding) { const auto problem = pad_reflection::ProblemDescription{xDesc, yDesc, padding}; const auto invoke_params = [&]() { - auto tmp = pad_reflection::InvokeParams{}; - tmp.type = InvokeType::Run; - tmp.xDesc = &xDesc; - tmp.yDesc = &yDesc; - tmp.x = x; - tmp.y = y; - tmp.padding = &padding; + auto tmp = pad_reflection::InvokeParams{}; + tmp.type = InvokeType::Run; + tmp.xDesc = &xDesc; + tmp.yDesc = &yDesc; + tmp.x = x; + tmp.y = y; + tmp.padding = &padding; return tmp; }(); diff --git a/src/pad_reflection_api.cpp b/src/pad_reflection_api.cpp index 3b67d5a2d6..97fe6a7fe6 100644 --- a/src/pad_reflection_api.cpp +++ b/src/pad_reflection_api.cpp @@ -31,23 +31,20 @@ #include extern "C" miopenStatus_t miopenPadReflection(miopenHandle_t handle, - const miopenTensorDescriptor_t xDesc, - const void* x, - const miopenTensorDescriptor_t yDesc, - void* y, - const std::vector padding - ) + const miopenTensorDescriptor_t xDesc, + const void* x, + const miopenTensorDescriptor_t yDesc, + void* y, + const std::vector padding) { - MIOPEN_LOG_FUNCTION( - handle, xDesc, x, yDesc, y, padding); + MIOPEN_LOG_FUNCTION(handle, xDesc, x, yDesc, y, padding); return miopen::try_([&] { miopen::PadReflection(miopen::deref(handle), - miopen::deref(xDesc), - DataCast(x), - miopen::deref(yDesc), - DataCast(y), - padding - ); + miopen::deref(xDesc), + DataCast(x), + miopen::deref(yDesc), + DataCast(y), + padding); }); } diff --git a/test/gtest/pad_reflection.cpp b/test/gtest/pad_reflection.cpp index d2dd368ef1..1eefd15d92 100644 --- a/test/gtest/pad_reflection.cpp +++ b/test/gtest/pad_reflection.cpp @@ -44,7 +44,6 @@ std::string GetFloatArg() struct PadReflectionTestFloat : PadReflectionTest { - }; } // namespace pad_reflection @@ -63,4 +62,6 @@ TEST_P(PadReflectionTestFloat, PadReflectionFw) } }; -INSTANTIATE_TEST_SUITE_P(PadReflectionTestSet, PadReflectionTestFloat, testing::ValuesIn(PadReflectionTestFloatConfigs())); +INSTANTIATE_TEST_SUITE_P(PadReflectionTestSet, + PadReflectionTestFloat, + testing::ValuesIn(PadReflectionTestFloatConfigs())); diff --git a/test/gtest/pad_reflection.hpp b/test/gtest/pad_reflection.hpp index c02522e17b..b755639f08 100644 --- a/test/gtest/pad_reflection.hpp +++ b/test/gtest/pad_reflection.hpp @@ -77,9 +77,11 @@ struct PadReflectionCase } } - std::vector GetPadding() { + std::vector GetPadding() + { std::vector paddingVector; - for (int i = 0; i < 4; ++i) { + for(int i = 0; i < 4; ++i) + { paddingVector.push_back(static_cast(padding[i])); } return paddingVector; @@ -107,24 +109,27 @@ struct PadReflectionTest : public ::testing::TestWithParam protected: void SetUp() override { - auto&& handle = get_handle(); - pad_reflection_config = GetParam(); + auto&& handle = get_handle(); + pad_reflection_config = GetParam(); auto gen_value = [](auto...) { return prng::gen_descreet_uniform_sign(1e-2, 100); }; auto in_dims = pad_reflection_config.GetInput(); auto padding = pad_reflection_config.GetPadding(); - input = tensor{in_dims}.generate(gen_value); + input = tensor{in_dims}.generate(gen_value); std::vector out_dims; for(int i = 0; i < in_dims.size(); i++) { if(i == 2) { - out_dims.push_back(in_dims[i] + 2*padding[2]); - } else if (i == 3) { - out_dims.push_back(in_dims[i] + 2*padding[0]); - } - else { + out_dims.push_back(in_dims[i] + 2 * padding[2]); + } + else if(i == 3) + { + out_dims.push_back(in_dims[i] + 2 * padding[0]); + } + else + { out_dims.push_back(in_dims[i]); } } @@ -141,18 +146,13 @@ struct PadReflectionTest : public ::testing::TestWithParam void RunTest() { auto&& handle = get_handle(); - auto padding = pad_reflection_config.GetPadding(); + auto padding = pad_reflection_config.GetPadding(); cpu_pad_reflection(input, ref_output, padding); miopenStatus_t status; - status = miopen::PadReflection(handle, - input.desc, - input_dev.get(), - output.desc, - output_dev.get(), - padding - ); + status = miopen::PadReflection( + handle, input.desc, input_dev.get(), output.desc, output_dev.get(), padding); EXPECT_EQ(status, miopenStatusSuccess); @@ -175,5 +175,4 @@ struct PadReflectionTest : public ::testing::TestWithParam miopen::Allocator::ManageDataPtr input_dev; miopen::Allocator::ManageDataPtr output_dev; - }; From a0ef46c20a734c34e1342c87b9cfb9f6d40e5098 Mon Sep 17 00:00:00 2001 From: Duong Le Date: Tue, 16 Apr 2024 08:40:34 +0000 Subject: [PATCH 09/29] Change copyright 2023->2024, change padding from vector to pointer, add padding test half and bf16 --- driver/pad_reflection_driver.hpp | 14 +++--- include/miopen/miopen.h | 6 +-- src/include/miopen/pad_reflection.hpp | 5 ++- .../miopen/pad_reflection/invoke_params.hpp | 6 ++- .../pad_reflection/problem_description.hpp | 24 +++++++--- src/include/miopen/pad_reflection/solvers.hpp | 7 +-- src/kernels/MIOpenPadReflection.cpp | 2 +- src/pad_reflection.cpp | 10 +++-- src/pad_reflection/problem_description.cpp | 2 +- src/pad_reflection_api.cpp | 8 ++-- src/solver/pad_reflection/pad_reflection.cpp | 29 +++++++----- test/cpu_pad_reflection.hpp | 2 +- test/gtest/pad_reflection.cpp | 44 ++++++++++++++++++- test/gtest/pad_reflection.hpp | 16 +++---- 14 files changed, 121 insertions(+), 54 deletions(-) diff --git a/driver/pad_reflection_driver.hpp b/driver/pad_reflection_driver.hpp index edb35fee26..f964b47e6f 100644 --- a/driver/pad_reflection_driver.hpp +++ b/driver/pad_reflection_driver.hpp @@ -2,7 +2,7 @@ * * MIT License * - * Copyright (c) 2023 Advanced Micro Devices, Inc. + * Copyright (c) 2024 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -85,7 +85,6 @@ void mloPadReflectionRunHost(miopenTensorDescriptor_t inputDesc, } else if(padding_l <= w && w < in_W + padding_l) { - w = w; } else { @@ -99,7 +98,6 @@ void mloPadReflectionRunHost(miopenTensorDescriptor_t inputDesc, } else if(padding_t <= h && h < in_H + padding_t) { - h = h; } else { @@ -189,6 +187,10 @@ int PadReflectionDriver::GetandSetData() padding.push_back(std::stoul(token)); } + if (!(padding.size() == 1 or padding.size() == 4)) { + std::cerr << "Error Padding Lengths\n" << std::endl; + } + std::vector in_len = GetInputTensorLengthsFromCmdLine(); SetTensorNd(inputDesc, in_len, data_type); @@ -230,7 +232,7 @@ int PadReflectionDriver::AddCmdLineArgs() inflags.AddInputFlag("in_h", 'H', "32", "Input Height (Default=32)", "int"); inflags.AddInputFlag("in_w", 'W', "32", "Input Width (Default=32)", "int"); - inflags.AddInputFlag("padding", 'P', "1,1,1,1", "Padding array (Default=1,1,1,1)", "str"); + inflags.AddInputFlag("padding", 'P', "1,1,1,1", "Padding array (Default=1 or 1,1,1,1)", "str"); inflags.AddInputFlag("iter", 'i', "10", "Number of Iterations (Default=10)", "int"); inflags.AddInputFlag("verify", 'V', "1", "Verify Each Layer (Default=1)", "int"); @@ -318,7 +320,7 @@ int PadReflectionDriver::RunForwardGPU() for(int i = 0; i < inflags.GetValueInt("iter"); i++) { miopenPadReflection( - GetHandle(), inputDesc, in_dev->GetMem(), outputDesc, out_dev->GetMem(), padding); + GetHandle(), inputDesc, in_dev->GetMem(), outputDesc, out_dev->GetMem(), padding.data(), padding.size()); float time = 0.0; miopenGetKernelTime(GetHandle(), &time); @@ -374,7 +376,7 @@ int PadReflectionDriver::VerifyForward() const Tref tolerance = GetTolerance(); auto error = miopen::rms_range(outhost, out); - if(!std::isfinite(error) || error > 0) + if(std::abs(static_cast(error)) != 0.0f) { std::cout << "Pad Reflection FAILED: " << error << " > " << tolerance << std::endl; return EC_VerifyFwd; diff --git a/include/miopen/miopen.h b/include/miopen/miopen.h index 003b989f9a..a5c92ad65f 100644 --- a/include/miopen/miopen.h +++ b/include/miopen/miopen.h @@ -2,7 +2,7 @@ * * MIT License * - * Copyright (c) 2023 Advanced Micro Devices, Inc. + * Copyright (c) 2024 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -26,7 +26,6 @@ #ifndef MIOPEN_GUARD_MIOPEN_H_ #define MIOPEN_GUARD_MIOPEN_H_ -#include #ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wextern-c-compat" @@ -5819,7 +5818,8 @@ MIOPEN_EXPORT miopenStatus_t miopenPadReflection(miopenHandle_t handle, const void* x, const miopenTensorDescriptor_t yDesc, void* y, - const std::vector padding); + const size_t * padding, + const size_t num_padding); /** @} */ // CLOSEOUT PAD REFLECTION DOXYGEN GROUP diff --git a/src/include/miopen/pad_reflection.hpp b/src/include/miopen/pad_reflection.hpp index fcd265922a..19d39b040b 100644 --- a/src/include/miopen/pad_reflection.hpp +++ b/src/include/miopen/pad_reflection.hpp @@ -2,7 +2,7 @@ * * MIT License * - * Copyright (c) 2023 Advanced Micro Devices, Inc. + * Copyright (c) 2024 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -38,6 +38,7 @@ miopenStatus_t PadReflection(Handle& handle, ConstData_t x, const TensorDescriptor& yDesc, Data_t y, - std::vector padding); + const size_t * padding, + const size_t num_padding); } // namespace miopen #endif // MIOPEN_PAD_REFLECTION_HPP_ diff --git a/src/include/miopen/pad_reflection/invoke_params.hpp b/src/include/miopen/pad_reflection/invoke_params.hpp index c7b847676c..2ff8bad10c 100644 --- a/src/include/miopen/pad_reflection/invoke_params.hpp +++ b/src/include/miopen/pad_reflection/invoke_params.hpp @@ -2,7 +2,7 @@ * * MIT License * - * Copyright (c) 2023 Advanced Micro Devices, Inc. + * Copyright (c) 2024 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -42,7 +42,9 @@ struct InvokeParams : public miopen::InvokeParams ConstData_t x = nullptr; Data_t y = nullptr; - const std::vector* padding = nullptr; + const size_t * padding = nullptr; + size_t num_padding = 0; + std::size_t GetWorkspaceSize() const { return 0; } Data_t GetWorkspace() const { return nullptr; } }; diff --git a/src/include/miopen/pad_reflection/problem_description.hpp b/src/include/miopen/pad_reflection/problem_description.hpp index 19b70ca476..65c258970d 100644 --- a/src/include/miopen/pad_reflection/problem_description.hpp +++ b/src/include/miopen/pad_reflection/problem_description.hpp @@ -2,7 +2,7 @@ * * MIT License * - * Copyright (c) 2023 Advanced Micro Devices, Inc. + * Copyright (c) 2024 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -41,13 +41,13 @@ struct ProblemDescription : ProblemDescriptionBase { ProblemDescription(const TensorDescriptor& xDesc_, const TensorDescriptor& yDesc_, - const std::vector padding_) - : xDesc(xDesc_), yDesc(yDesc_), padding(padding_) + const size_t num_padding_) + : xDesc(xDesc_), yDesc(yDesc_), num_padding(num_padding_) { } const TensorDescriptor& GetXDesc() const { return xDesc; } const TensorDescriptor& GetYDesc() const { return yDesc; } - const std::vector& GetPadding() const { return padding; } + size_t GetNumPadding() const { return num_padding; } bool IsSameType() const { @@ -75,12 +75,26 @@ struct ProblemDescription : ProblemDescriptionBase return true; } + + bool IsRightNumPadding() const + { + if(!(num_padding == 1 || num_padding == 4)) + { +#if MIOPEN_BUILD_DEV || !MIOPEN_NDEBUG + MIOPEN_THROW(miopenStatusBadParm, "Pad Reflection: Padding input needs to have 1 or 4 elements only."); +#else + return false; +#endif + } + return true; + } + NetworkConfig MakeNetworkConfig() const override; private: TensorDescriptor xDesc; TensorDescriptor yDesc; - std::vector padding; + size_t num_padding; }; } // namespace pad_reflection diff --git a/src/include/miopen/pad_reflection/solvers.hpp b/src/include/miopen/pad_reflection/solvers.hpp index e6913f5192..e2da94bc4b 100644 --- a/src/include/miopen/pad_reflection/solvers.hpp +++ b/src/include/miopen/pad_reflection/solvers.hpp @@ -2,7 +2,7 @@ * * MIT License * - * Copyright (c) 2023 Advanced Micro Devices, Inc. + * Copyright (c) 2024 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -48,11 +48,6 @@ struct PadReflection final : PadReflectionSolver ConvSolution GetSolution(const ExecutionContext& context, const miopen::pad_reflection::ProblemDescription& problem) const override; - std::size_t - GetWorkspaceSize(const ExecutionContext& context, - const miopen::pad_reflection::ProblemDescription& problem) const override; - - bool MayNeedWorkspace() const override { return false; } }; } // namespace pad_reflection diff --git a/src/kernels/MIOpenPadReflection.cpp b/src/kernels/MIOpenPadReflection.cpp index 7d67dc7019..fa70b00add 100644 --- a/src/kernels/MIOpenPadReflection.cpp +++ b/src/kernels/MIOpenPadReflection.cpp @@ -2,7 +2,7 @@ * * MIT License * - * Copyright (c) 2023 Advanced Micro Devices, Inc. + * Copyright (c) 2024 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal diff --git a/src/pad_reflection.cpp b/src/pad_reflection.cpp index e3ac64d2be..5ddc10c3df 100644 --- a/src/pad_reflection.cpp +++ b/src/pad_reflection.cpp @@ -2,7 +2,7 @@ * * MIT License * - * Copyright (c) 2023 Advanced Micro Devices, Inc. + * Copyright (c) 2024 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -40,9 +40,10 @@ miopenStatus_t PadReflection(Handle& handle, ConstData_t x, const TensorDescriptor& yDesc, Data_t y, - const std::vector padding) + const size_t * padding, + const size_t num_padding) { - const auto problem = pad_reflection::ProblemDescription{xDesc, yDesc, padding}; + const auto problem = pad_reflection::ProblemDescription{xDesc, yDesc, num_padding}; const auto invoke_params = [&]() { auto tmp = pad_reflection::InvokeParams{}; @@ -51,7 +52,8 @@ miopenStatus_t PadReflection(Handle& handle, tmp.yDesc = &yDesc; tmp.x = x; tmp.y = y; - tmp.padding = &padding; + tmp.padding = padding; + tmp.num_padding = num_padding; return tmp; }(); diff --git a/src/pad_reflection/problem_description.cpp b/src/pad_reflection/problem_description.cpp index 98cc727c17..403b9e913b 100644 --- a/src/pad_reflection/problem_description.cpp +++ b/src/pad_reflection/problem_description.cpp @@ -2,7 +2,7 @@ * * MIT License * - * Copyright (c) 2023 Advanced Micro Devices, Inc. + * Copyright (c) 2024 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal diff --git a/src/pad_reflection_api.cpp b/src/pad_reflection_api.cpp index 97fe6a7fe6..e8854b2aac 100644 --- a/src/pad_reflection_api.cpp +++ b/src/pad_reflection_api.cpp @@ -2,7 +2,7 @@ * * MIT License * - * Copyright (c) 2023 Advanced Micro Devices, Inc. + * Copyright (c) 2024 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -35,7 +35,8 @@ extern "C" miopenStatus_t miopenPadReflection(miopenHandle_t handle, const void* x, const miopenTensorDescriptor_t yDesc, void* y, - const std::vector padding) + const size_t * padding, + const size_t num_padding) { MIOPEN_LOG_FUNCTION(handle, xDesc, x, yDesc, y, padding); @@ -45,6 +46,7 @@ extern "C" miopenStatus_t miopenPadReflection(miopenHandle_t handle, DataCast(x), miopen::deref(yDesc), DataCast(y), - padding); + padding, + num_padding); }); } diff --git a/src/solver/pad_reflection/pad_reflection.cpp b/src/solver/pad_reflection/pad_reflection.cpp index a75feadfe0..148cde5f3b 100644 --- a/src/solver/pad_reflection/pad_reflection.cpp +++ b/src/solver/pad_reflection/pad_reflection.cpp @@ -2,7 +2,7 @@ * * MIT License * - * Copyright (c) 2023 Advanced Micro Devices, Inc. + * Copyright (c) 2024 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -48,16 +48,11 @@ bool PadReflection::IsApplicable( return false; if(!problem.IsAllPacked()) return false; + if(!problem.IsRightNumPadding()) + return false; return true; } -std::size_t PadReflection::GetWorkspaceSize( - [[maybe_unused]] const ExecutionContext& context, - [[maybe_unused]] const miopen::pad_reflection::ProblemDescription& problem) const -{ - return 0; -} - ConvSolution PadReflection::GetSolution( [[maybe_unused]] const ExecutionContext& context, [[maybe_unused]] const miopen::pad_reflection::ProblemDescription& problem) const @@ -113,9 +108,21 @@ ConvSolution PadReflection::GetSolution( auto output_size = std::accumulate(ydims.begin(), ydims.end(), 1ULL, std::multiplies()); - auto padding = params.padding; - long padding_l = (*padding)[0]; - long padding_t = (*padding)[2]; + auto padding = params.padding; + auto num_padding = params.num_padding; + long padding_l, padding_t; + if(num_padding == 1) + { + padding_l = padding[0]; + padding_t = padding[0]; + } + else if(num_padding == 4) + { + padding_l = padding[0]; + padding_t = padding[2]; + } + // long padding_l = (*padding)[0]; + // long padding_t = (*padding)[2]; size_t in_H = xdims[2]; size_t in_W = xdims[3]; size_t output_size_1 = ydims[1]; diff --git a/test/cpu_pad_reflection.hpp b/test/cpu_pad_reflection.hpp index ef15909e7c..55e6935d8b 100644 --- a/test/cpu_pad_reflection.hpp +++ b/test/cpu_pad_reflection.hpp @@ -2,7 +2,7 @@ * * MIT License * - * Copyright (c) 2023 Advanced Micro Devices, Inc. + * Copyright (c) 2024 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal diff --git a/test/gtest/pad_reflection.cpp b/test/gtest/pad_reflection.cpp index 1eefd15d92..93f93aecd4 100644 --- a/test/gtest/pad_reflection.cpp +++ b/test/gtest/pad_reflection.cpp @@ -2,7 +2,7 @@ * * MIT License * - * Copyright (c) 2023 Advanced Micro Devices, Inc. + * Copyright (c) 2024 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -46,6 +46,14 @@ struct PadReflectionTestFloat : PadReflectionTest { }; +struct PadReflectionTestHalf : PadReflectionTest +{ +}; + +struct PadReflectionTestBF16 : PadReflectionTest +{ +}; + } // namespace pad_reflection using namespace pad_reflection; @@ -62,6 +70,40 @@ TEST_P(PadReflectionTestFloat, PadReflectionFw) } }; +TEST_P(PadReflectionTestHalf, PadReflectionFw) +{ + if(miopen::IsEnabled(ENV(MIOPEN_TEST_ALL)) && (GetFloatArg() == "--half")) + { + RunTest(); + Verify(); + } + else + { + GTEST_SKIP(); + } +}; + +TEST_P(PadReflectionTestBF16, PadReflectionFw) +{ + if(miopen::IsEnabled(ENV(MIOPEN_TEST_ALL)) && (GetFloatArg() == "--bfloat16")) + { + RunTest(); + Verify(); + } + else + { + GTEST_SKIP(); + } +}; + INSTANTIATE_TEST_SUITE_P(PadReflectionTestSet, PadReflectionTestFloat, testing::ValuesIn(PadReflectionTestFloatConfigs())); + +INSTANTIATE_TEST_SUITE_P(PadReflectionTestSet, + PadReflectionTestHalf, + testing::ValuesIn(PadReflectionTestFloatConfigs())); + +INSTANTIATE_TEST_SUITE_P(PadReflectionTestSet, + PadReflectionTestBF16, + testing::ValuesIn(PadReflectionTestFloatConfigs())); diff --git a/test/gtest/pad_reflection.hpp b/test/gtest/pad_reflection.hpp index b755639f08..5f3ce15478 100644 --- a/test/gtest/pad_reflection.hpp +++ b/test/gtest/pad_reflection.hpp @@ -2,7 +2,7 @@ * * MIT License * - * Copyright (c) 2023 Advanced Micro Devices, Inc. + * Copyright (c) 2024 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -41,11 +41,11 @@ struct PadReflectionCase size_t D; size_t H; size_t W; - int padding[4]; + size_t padding[4]; friend std::ostream& operator<<(std::ostream& os, const PadReflectionCase& tc) { return os << " N:" << tc.N << " C:" << tc.C << " D:" << tc.D << " H:" << tc.H - << " W:" << tc.W; + << " W:" << tc.W << " Padding:" << tc.padding[0] << " " << tc.padding[1] << " " << tc.padding[2] << " " << tc.padding[3]; } std::vector GetInput() @@ -82,7 +82,7 @@ struct PadReflectionCase std::vector paddingVector; for(int i = 0; i < 4; ++i) { - paddingVector.push_back(static_cast(padding[i])); + paddingVector.push_back(padding[i]); } return paddingVector; } @@ -92,11 +92,11 @@ std::vector PadReflectionTestFloatConfigs() { // n c d h w padding // clang-format off return { - { 1, 1, 0, 3, 3, {2, 2, 2, 2}}, - { 48, 8, 0, 512, 512, {1, 1, 1, 1}}, + { 1, 1, 0, 3, 3, {2}}, + { 48, 8, 0, 512, 512, {1}}, { 48, 8, 0, 512, 512, {1, 1, 3, 3}}, { 48, 8, 0, 512, 512, {0, 0, 2, 2}}, - { 16, 311, 0, 98, 512, {1, 1, 1, 1}}, + { 16, 311, 0, 98, 512, {1}}, { 16, 311, 0, 98, 512, {1, 1, 3, 3}}, { 16, 311, 0, 98, 512, {0, 0, 2, 2}}, }; @@ -152,7 +152,7 @@ struct PadReflectionTest : public ::testing::TestWithParam miopenStatus_t status; status = miopen::PadReflection( - handle, input.desc, input_dev.get(), output.desc, output_dev.get(), padding); + handle, input.desc, input_dev.get(), output.desc, output_dev.get(), padding.data(), padding.size()); EXPECT_EQ(status, miopenStatusSuccess); From cd548ef582348ca90e53aaed40954ae0889efb72 Mon Sep 17 00:00:00 2001 From: Duong Le Date: Tue, 16 Apr 2024 08:40:59 +0000 Subject: [PATCH 10/29] githook clean up --- driver/pad_reflection_driver.hpp | 12 +++++++++--- include/miopen/miopen.h | 12 ++++++------ src/include/miopen/pad_reflection.hpp | 2 +- .../miopen/pad_reflection/invoke_params.hpp | 4 ++-- .../pad_reflection/problem_description.hpp | 8 ++++---- src/pad_reflection.cpp | 16 ++++++++-------- src/pad_reflection_api.cpp | 2 +- test/gtest/pad_reflection.hpp | 12 +++++++++--- 8 files changed, 40 insertions(+), 28 deletions(-) diff --git a/driver/pad_reflection_driver.hpp b/driver/pad_reflection_driver.hpp index f964b47e6f..f53a1ffe7d 100644 --- a/driver/pad_reflection_driver.hpp +++ b/driver/pad_reflection_driver.hpp @@ -187,7 +187,8 @@ int PadReflectionDriver::GetandSetData() padding.push_back(std::stoul(token)); } - if (!(padding.size() == 1 or padding.size() == 4)) { + if(!(padding.size() == 1 or padding.size() == 4)) + { std::cerr << "Error Padding Lengths\n" << std::endl; } @@ -319,8 +320,13 @@ int PadReflectionDriver::RunForwardGPU() for(int i = 0; i < inflags.GetValueInt("iter"); i++) { - miopenPadReflection( - GetHandle(), inputDesc, in_dev->GetMem(), outputDesc, out_dev->GetMem(), padding.data(), padding.size()); + miopenPadReflection(GetHandle(), + inputDesc, + in_dev->GetMem(), + outputDesc, + out_dev->GetMem(), + padding.data(), + padding.size()); float time = 0.0; miopenGetKernelTime(GetHandle(), &time); diff --git a/include/miopen/miopen.h b/include/miopen/miopen.h index a5c92ad65f..4626464794 100644 --- a/include/miopen/miopen.h +++ b/include/miopen/miopen.h @@ -5814,12 +5814,12 @@ MIOPEN_EXPORT miopenStatus_t miopenSumForward(miopenHandle_t handle, * @return miopenStatus_t */ MIOPEN_EXPORT miopenStatus_t miopenPadReflection(miopenHandle_t handle, - const miopenTensorDescriptor_t xDesc, - const void* x, - const miopenTensorDescriptor_t yDesc, - void* y, - const size_t * padding, - const size_t num_padding); + const miopenTensorDescriptor_t xDesc, + const void* x, + const miopenTensorDescriptor_t yDesc, + void* y, + const size_t* padding, + const size_t num_padding); /** @} */ // CLOSEOUT PAD REFLECTION DOXYGEN GROUP diff --git a/src/include/miopen/pad_reflection.hpp b/src/include/miopen/pad_reflection.hpp index 19d39b040b..ce65b16ec3 100644 --- a/src/include/miopen/pad_reflection.hpp +++ b/src/include/miopen/pad_reflection.hpp @@ -38,7 +38,7 @@ miopenStatus_t PadReflection(Handle& handle, ConstData_t x, const TensorDescriptor& yDesc, Data_t y, - const size_t * padding, + const size_t* padding, const size_t num_padding); } // namespace miopen #endif // MIOPEN_PAD_REFLECTION_HPP_ diff --git a/src/include/miopen/pad_reflection/invoke_params.hpp b/src/include/miopen/pad_reflection/invoke_params.hpp index 2ff8bad10c..b64438d83a 100644 --- a/src/include/miopen/pad_reflection/invoke_params.hpp +++ b/src/include/miopen/pad_reflection/invoke_params.hpp @@ -42,8 +42,8 @@ struct InvokeParams : public miopen::InvokeParams ConstData_t x = nullptr; Data_t y = nullptr; - const size_t * padding = nullptr; - size_t num_padding = 0; + const size_t* padding = nullptr; + size_t num_padding = 0; std::size_t GetWorkspaceSize() const { return 0; } Data_t GetWorkspace() const { return nullptr; } diff --git a/src/include/miopen/pad_reflection/problem_description.hpp b/src/include/miopen/pad_reflection/problem_description.hpp index 65c258970d..3172512757 100644 --- a/src/include/miopen/pad_reflection/problem_description.hpp +++ b/src/include/miopen/pad_reflection/problem_description.hpp @@ -42,7 +42,7 @@ struct ProblemDescription : ProblemDescriptionBase ProblemDescription(const TensorDescriptor& xDesc_, const TensorDescriptor& yDesc_, const size_t num_padding_) - : xDesc(xDesc_), yDesc(yDesc_), num_padding(num_padding_) + : xDesc(xDesc_), yDesc(yDesc_), num_padding(num_padding_) { } const TensorDescriptor& GetXDesc() const { return xDesc; } @@ -75,19 +75,19 @@ struct ProblemDescription : ProblemDescriptionBase return true; } - bool IsRightNumPadding() const { if(!(num_padding == 1 || num_padding == 4)) { #if MIOPEN_BUILD_DEV || !MIOPEN_NDEBUG - MIOPEN_THROW(miopenStatusBadParm, "Pad Reflection: Padding input needs to have 1 or 4 elements only."); + MIOPEN_THROW(miopenStatusBadParm, + "Pad Reflection: Padding input needs to have 1 or 4 elements only."); #else return false; #endif } return true; - } + } NetworkConfig MakeNetworkConfig() const override; diff --git a/src/pad_reflection.cpp b/src/pad_reflection.cpp index 5ddc10c3df..4605984f90 100644 --- a/src/pad_reflection.cpp +++ b/src/pad_reflection.cpp @@ -40,19 +40,19 @@ miopenStatus_t PadReflection(Handle& handle, ConstData_t x, const TensorDescriptor& yDesc, Data_t y, - const size_t * padding, + const size_t* padding, const size_t num_padding) { const auto problem = pad_reflection::ProblemDescription{xDesc, yDesc, num_padding}; const auto invoke_params = [&]() { - auto tmp = pad_reflection::InvokeParams{}; - tmp.type = InvokeType::Run; - tmp.xDesc = &xDesc; - tmp.yDesc = &yDesc; - tmp.x = x; - tmp.y = y; - tmp.padding = padding; + auto tmp = pad_reflection::InvokeParams{}; + tmp.type = InvokeType::Run; + tmp.xDesc = &xDesc; + tmp.yDesc = &yDesc; + tmp.x = x; + tmp.y = y; + tmp.padding = padding; tmp.num_padding = num_padding; return tmp; }(); diff --git a/src/pad_reflection_api.cpp b/src/pad_reflection_api.cpp index e8854b2aac..bbe94d3ec6 100644 --- a/src/pad_reflection_api.cpp +++ b/src/pad_reflection_api.cpp @@ -35,7 +35,7 @@ extern "C" miopenStatus_t miopenPadReflection(miopenHandle_t handle, const void* x, const miopenTensorDescriptor_t yDesc, void* y, - const size_t * padding, + const size_t* padding, const size_t num_padding) { MIOPEN_LOG_FUNCTION(handle, xDesc, x, yDesc, y, padding); diff --git a/test/gtest/pad_reflection.hpp b/test/gtest/pad_reflection.hpp index 5f3ce15478..c164885baa 100644 --- a/test/gtest/pad_reflection.hpp +++ b/test/gtest/pad_reflection.hpp @@ -45,7 +45,8 @@ struct PadReflectionCase friend std::ostream& operator<<(std::ostream& os, const PadReflectionCase& tc) { return os << " N:" << tc.N << " C:" << tc.C << " D:" << tc.D << " H:" << tc.H - << " W:" << tc.W << " Padding:" << tc.padding[0] << " " << tc.padding[1] << " " << tc.padding[2] << " " << tc.padding[3]; + << " W:" << tc.W << " Padding:" << tc.padding[0] << " " << tc.padding[1] << " " + << tc.padding[2] << " " << tc.padding[3]; } std::vector GetInput() @@ -151,8 +152,13 @@ struct PadReflectionTest : public ::testing::TestWithParam cpu_pad_reflection(input, ref_output, padding); miopenStatus_t status; - status = miopen::PadReflection( - handle, input.desc, input_dev.get(), output.desc, output_dev.get(), padding.data(), padding.size()); + status = miopen::PadReflection(handle, + input.desc, + input_dev.get(), + output.desc, + output_dev.get(), + padding.data(), + padding.size()); EXPECT_EQ(status, miopenStatusSuccess); From f21e7f57777fa48481c2d2de7e7fdfe5b29ffab1 Mon Sep 17 00:00:00 2001 From: Duong Le Date: Tue, 16 Apr 2024 09:26:23 +0000 Subject: [PATCH 11/29] remove maybe_unused for problem in PadReflection::IsApplicable and PadReflection::GetSolution --- src/solver/pad_reflection/pad_reflection.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/solver/pad_reflection/pad_reflection.cpp b/src/solver/pad_reflection/pad_reflection.cpp index 148cde5f3b..b0b9fc16b2 100644 --- a/src/solver/pad_reflection/pad_reflection.cpp +++ b/src/solver/pad_reflection/pad_reflection.cpp @@ -42,7 +42,7 @@ namespace pad_reflection { bool PadReflection::IsApplicable( [[maybe_unused]] const ExecutionContext& context, - [[maybe_unused]] const miopen::pad_reflection::ProblemDescription& problem) const + const miopen::pad_reflection::ProblemDescription& problem) const { if(!problem.IsSameType()) return false; @@ -55,7 +55,7 @@ bool PadReflection::IsApplicable( ConvSolution PadReflection::GetSolution( [[maybe_unused]] const ExecutionContext& context, - [[maybe_unused]] const miopen::pad_reflection::ProblemDescription& problem) const + const miopen::pad_reflection::ProblemDescription& problem) const { auto result = ConvSolution{miopenStatusSuccess}; From 825111e197f5f5e8ad84d35f01a6649188d56b0f Mon Sep 17 00:00:00 2001 From: Duong Le Date: Tue, 16 Apr 2024 09:26:39 +0000 Subject: [PATCH 12/29] githook cleanup --- src/solver/pad_reflection/pad_reflection.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/solver/pad_reflection/pad_reflection.cpp b/src/solver/pad_reflection/pad_reflection.cpp index b0b9fc16b2..b2b03a2d91 100644 --- a/src/solver/pad_reflection/pad_reflection.cpp +++ b/src/solver/pad_reflection/pad_reflection.cpp @@ -40,9 +40,8 @@ namespace solver { namespace pad_reflection { -bool PadReflection::IsApplicable( - [[maybe_unused]] const ExecutionContext& context, - const miopen::pad_reflection::ProblemDescription& problem) const +bool PadReflection::IsApplicable([[maybe_unused]] const ExecutionContext& context, + const miopen::pad_reflection::ProblemDescription& problem) const { if(!problem.IsSameType()) return false; @@ -53,9 +52,9 @@ bool PadReflection::IsApplicable( return true; } -ConvSolution PadReflection::GetSolution( - [[maybe_unused]] const ExecutionContext& context, - const miopen::pad_reflection::ProblemDescription& problem) const +ConvSolution +PadReflection::GetSolution([[maybe_unused]] const ExecutionContext& context, + const miopen::pad_reflection::ProblemDescription& problem) const { auto result = ConvSolution{miopenStatusSuccess}; From 6f4e1a0352dfe9444be3f7b7dd23b925ae5e2491 Mon Sep 17 00:00:00 2001 From: Duong Le Date: Tue, 16 Apr 2024 15:09:12 +0000 Subject: [PATCH 13/29] update miopen.h padreflection description --- include/miopen/miopen.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/include/miopen/miopen.h b/include/miopen/miopen.h index 4626464794..c4d4c082c7 100644 --- a/include/miopen/miopen.h +++ b/include/miopen/miopen.h @@ -5803,14 +5803,12 @@ MIOPEN_EXPORT miopenStatus_t miopenSumForward(miopenHandle_t handle, /*! @brief Add padding by the reflection of the tensor * * @param handle MIOpen handle (input) - * @param nanPropagation Nan number propagation mode (input) - * @param workspace Address of the allocated workspace data (input) - * @param workspaceSizeInBytes Size in bytes of the allocated workspace data (input) * @param xDesc Tensor descriptor for data input tensor x (input) * @param x Data tensor x (input) - * @param dim Dimensions to sum. (input) * @param yDesc Tensor descriptor for output data tensor y (input) * @param y Data tensor y (output) + * @param padding Padding array contain 1 or 4 elements (input) + * @param num_padding Number of elements in padding, equals 1 or 4 (input) * @return miopenStatus_t */ MIOPEN_EXPORT miopenStatus_t miopenPadReflection(miopenHandle_t handle, From c4aedd15ea3978ba8fd06c89d76486b1a8d886f7 Mon Sep 17 00:00:00 2001 From: Duong Le Date: Tue, 16 Apr 2024 15:15:05 +0000 Subject: [PATCH 14/29] update "CLOSEOUT padreflection DOXYGEN GROUP" --- include/miopen/miopen.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/miopen/miopen.h b/include/miopen/miopen.h index c4d4c082c7..18c60f17b6 100644 --- a/include/miopen/miopen.h +++ b/include/miopen/miopen.h @@ -5820,7 +5820,7 @@ MIOPEN_EXPORT miopenStatus_t miopenPadReflection(miopenHandle_t handle, const size_t num_padding); /** @} */ -// CLOSEOUT PAD REFLECTION DOXYGEN GROUP +// CLOSEOUT padreflection DOXYGEN GROUP #endif #ifdef MIOPEN_BETA_API From a805d4258317c4103f2bd8ead6083cc1cd7f5e6e Mon Sep 17 00:00:00 2001 From: Duong Le Date: Wed, 17 Apr 2024 03:12:41 +0000 Subject: [PATCH 15/29] Chang padding_l -> padding_left, padding_t -> padding_top for padreflection kernel --- src/kernels/MIOpenPadReflection.cpp | 36 ++++++++++++++--------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/src/kernels/MIOpenPadReflection.cpp b/src/kernels/MIOpenPadReflection.cpp index fa70b00add..3a4922399c 100644 --- a/src/kernels/MIOpenPadReflection.cpp +++ b/src/kernels/MIOpenPadReflection.cpp @@ -35,8 +35,8 @@ template __device__ void padReflection2dFwdContiguous(const TI* __restrict__ input, TO* __restrict__ output, uint64_t output_size, - long padding_l, - long padding_t, + long padding_left, + long padding_top, const size_t in_H, const size_t in_W, const size_t output_size_1, @@ -59,34 +59,34 @@ __device__ void padReflection2dFwdContiguous(const TI* __restrict__ input, n = nc / output_size_1; c = nc % output_size_1; - long in_start_x = max(0L, -padding_l); - long in_start_y = max(0L, -padding_t); - long out_start_x = max(0L, padding_l); - long out_start_y = max(0L, padding_t); + long in_start_x = max(0L, -padding_left); + long in_start_y = max(0L, -padding_top); + long out_start_x = max(0L, padding_left); + long out_start_y = max(0L, padding_top); - if(w < padding_l) + if(w < padding_left) { - w = padding_l * 2 - w; + w = padding_left * 2 - w; } - else if(padding_l <= w && w < in_W + padding_l) + else if(padding_left <= w && w < in_W + padding_left) { } else { - w = (in_W + padding_l - 1) * 2 - w; + w = (in_W + padding_left - 1) * 2 - w; } w = w - out_start_x + in_start_x; - if(h < padding_t) + if(h < padding_top) { - h = padding_t * 2 - h; + h = padding_top * 2 - h; } - else if(padding_t <= h && h < in_H + padding_t) + else if(padding_top <= h && h < in_H + padding_top) { } else { - h = (in_H + padding_t - 1) * 2 - h; + h = (in_H + padding_top - 1) * 2 - h; } h = h - out_start_y + in_start_y; @@ -97,8 +97,8 @@ __device__ void padReflection2dFwdContiguous(const TI* __restrict__ input, extern "C" __global__ void PadReflection2dFwdContiguous(const INPUT_TYPE* __restrict__ input, OUTPUT_TYPE* __restrict__ output, uint64_t output_size, - long padding_l, - long padding_t, + long padding_left, + long padding_top, const size_t in_H, const size_t in_W, const size_t output_size_1, @@ -112,8 +112,8 @@ extern "C" __global__ void PadReflection2dFwdContiguous(const INPUT_TYPE* __rest padReflection2dFwdContiguous(input, output, output_size, - padding_l, - padding_t, + padding_left, + padding_top, in_H, in_W, output_size_1, From c916b1d4d45978492def1503f2539eb7c1caec4c Mon Sep 17 00:00:00 2001 From: Duong Le Date: Wed, 17 Apr 2024 03:22:15 +0000 Subject: [PATCH 16/29] remove unused condition line in padreflection kernel --- src/kernels/MIOpenPadReflection.cpp | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/src/kernels/MIOpenPadReflection.cpp b/src/kernels/MIOpenPadReflection.cpp index 3a4922399c..e269f168ff 100644 --- a/src/kernels/MIOpenPadReflection.cpp +++ b/src/kernels/MIOpenPadReflection.cpp @@ -68,10 +68,7 @@ __device__ void padReflection2dFwdContiguous(const TI* __restrict__ input, { w = padding_left * 2 - w; } - else if(padding_left <= w && w < in_W + padding_left) - { - } - else + else if(!(padding_left <= w && w < in_W + padding_left)) { w = (in_W + padding_left - 1) * 2 - w; } @@ -81,10 +78,7 @@ __device__ void padReflection2dFwdContiguous(const TI* __restrict__ input, { h = padding_top * 2 - h; } - else if(padding_top <= h && h < in_H + padding_top) - { - } - else + else if(!(padding_top <= h && h < in_H + padding_top)) { h = (in_H + padding_top - 1) * 2 - h; } From a008c17039ef89a729d072c0f174df1312b932f9 Mon Sep 17 00:00:00 2001 From: Duong Le Date: Wed, 17 Apr 2024 03:22:38 +0000 Subject: [PATCH 17/29] remove unused line --- src/solver/pad_reflection/pad_reflection.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/solver/pad_reflection/pad_reflection.cpp b/src/solver/pad_reflection/pad_reflection.cpp index b2b03a2d91..d0aebbd521 100644 --- a/src/solver/pad_reflection/pad_reflection.cpp +++ b/src/solver/pad_reflection/pad_reflection.cpp @@ -120,8 +120,6 @@ PadReflection::GetSolution([[maybe_unused]] const ExecutionContext& context, padding_l = padding[0]; padding_t = padding[2]; } - // long padding_l = (*padding)[0]; - // long padding_t = (*padding)[2]; size_t in_H = xdims[2]; size_t in_W = xdims[3]; size_t output_size_1 = ydims[1]; From c0eacdf799b045395f0fe0816d33c07a5aef635c Mon Sep 17 00:00:00 2001 From: Duong Le Date: Wed, 17 Apr 2024 03:24:29 +0000 Subject: [PATCH 18/29] indent test_case --- test/gtest/pad_reflection.hpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/test/gtest/pad_reflection.hpp b/test/gtest/pad_reflection.hpp index c164885baa..482bc89a71 100644 --- a/test/gtest/pad_reflection.hpp +++ b/test/gtest/pad_reflection.hpp @@ -93,13 +93,13 @@ std::vector PadReflectionTestFloatConfigs() { // n c d h w padding // clang-format off return { - { 1, 1, 0, 3, 3, {2}}, - { 48, 8, 0, 512, 512, {1}}, - { 48, 8, 0, 512, 512, {1, 1, 3, 3}}, - { 48, 8, 0, 512, 512, {0, 0, 2, 2}}, - { 16, 311, 0, 98, 512, {1}}, - { 16, 311, 0, 98, 512, {1, 1, 3, 3}}, - { 16, 311, 0, 98, 512, {0, 0, 2, 2}}, + { 1, 1, 0, 3, 3, {2}}, + { 48, 8, 0, 512, 512, {1}}, + { 48, 8, 0, 512, 512, {1, 1, 3, 3}}, + { 48, 8, 0, 512, 512, {0, 0, 2, 2}}, + { 16, 311, 0, 98, 512, {1}}, + { 16, 311, 0, 98, 512, {1, 1, 3, 3}}, + { 16, 311, 0, 98, 512, {0, 0, 2, 2}}, }; // clang-format on } From e41d851c6c8ae49ece2299504d17ca18d81ba75a Mon Sep 17 00:00:00 2001 From: Duong Le Date: Fri, 19 Apr 2024 02:52:32 +0000 Subject: [PATCH 19/29] Update padreflection group -> pad group for doxygen --- include/miopen/miopen.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/miopen/miopen.h b/include/miopen/miopen.h index 18c60f17b6..c4bb5f01e8 100644 --- a/include/miopen/miopen.h +++ b/include/miopen/miopen.h @@ -68,7 +68,7 @@ * @defgroup argmax * @defgroup groupnorm * @defgroup cat - * @defgroup padreflection + * @defgroup pad * */ @@ -5796,7 +5796,7 @@ MIOPEN_EXPORT miopenStatus_t miopenSumForward(miopenHandle_t handle, #ifdef MIOPEN_BETA_API // Padreflection APIs -/** @addtogroup padreflection +/** @addtogroup pad * * @{ */ @@ -5820,7 +5820,7 @@ MIOPEN_EXPORT miopenStatus_t miopenPadReflection(miopenHandle_t handle, const size_t num_padding); /** @} */ -// CLOSEOUT padreflection DOXYGEN GROUP +// CLOSEOUT pad DOXYGEN GROUP #endif #ifdef MIOPEN_BETA_API From b7d79aca073ed9fd60d239caaeb870d98fc9546c Mon Sep 17 00:00:00 2001 From: Duong Le Date: Fri, 19 Apr 2024 06:34:22 +0000 Subject: [PATCH 20/29] update docs index padreflection group -> pad group --- docs/reference/index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/index.rst b/docs/reference/index.rst index 60883f9a1b..58828bdcce 100644 --- a/docs/reference/index.rst +++ b/docs/reference/index.rst @@ -31,5 +31,5 @@ The MIOpen API library is structured as follows: * :doc:`Sum <../doxygen/html/group__sum>` (experimental) * :doc:`GroupNorm <../doxygen/html/group__groupnorm>` (experimental) * :doc:`Cat <../doxygen/html/group__cat>` (experimental) - * :doc:`PadReflection <../doxygen/html/group__padreflection>` (experimental) + * :doc:`Pad <../doxygen/html/group__pad>` (experimental) * :doc:`Argmax<./argmax>` (experimental) From 2d32b06bad4cbb74d141d589758a24f029d4e0e7 Mon Sep 17 00:00:00 2001 From: Duong Le Date: Tue, 6 Aug 2024 07:44:20 +0000 Subject: [PATCH 21/29] githook format --- driver/driver.hpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/driver/driver.hpp b/driver/driver.hpp index 677a4b0dc4..fbf4cd4e86 100644 --- a/driver/driver.hpp +++ b/driver/driver.hpp @@ -206,8 +206,9 @@ inline std::string ParseBaseArg(int argc, char* argv[]) arg != "adamwfp16" && arg != "ampadamw" && arg != "transformersadamw" && arg != "transformersadamwfp16" && arg != "transformersampadamw" && arg != "getitem" && arg != "getitemfp16" && arg != "getitembfp16" && arg != "reducecalculation" && - arg != "reducecalculationfp16" && arg != "reducecalculationbfp16" && arg != "padreflection" && - arg != "padreflectionfp16" && arg != "padreflectionbfp16" && arg != "--version") + arg != "reducecalculationfp16" && arg != "reducecalculationbfp16" && + arg != "padreflection" && arg != "padreflectionfp16" && arg != "padreflectionbfp16" && + arg != "--version") { printf("FAILED: Invalid Base Input Argument\n"); Usage(); From b427a78b9d74abc0f51a679214a5149aef2b223b Mon Sep 17 00:00:00 2001 From: Duong Le Date: Tue, 6 Aug 2024 09:58:05 +0000 Subject: [PATCH 22/29] update driver and gtest format --- driver/pad_reflection_driver.hpp | 48 ++------------------------------ test/gtest/pad_reflection.cpp | 14 ++++++---- 2 files changed, 12 insertions(+), 50 deletions(-) diff --git a/driver/pad_reflection_driver.hpp b/driver/pad_reflection_driver.hpp index f53a1ffe7d..881cb45c46 100644 --- a/driver/pad_reflection_driver.hpp +++ b/driver/pad_reflection_driver.hpp @@ -31,8 +31,6 @@ #include "tensor_driver.hpp" #include "timer.hpp" #include "random.hpp" -#include -#include #include #include #include @@ -128,7 +126,6 @@ class PadReflectionDriver : public Driver InputFlags& GetInputFlags() override { return inflags; } int GetandSetData() override; - std::vector GetInputTensorLengthsFromCmdLine(); int AllocateBuffersAndCopy() override; @@ -192,7 +189,7 @@ int PadReflectionDriver::GetandSetData() std::cerr << "Error Padding Lengths\n" << std::endl; } - std::vector in_len = GetInputTensorLengthsFromCmdLine(); + std::vector in_len = inflags.GetValueTensor("DimLengths").lengths; SetTensorNd(inputDesc, in_len, data_type); @@ -227,11 +224,8 @@ template int PadReflectionDriver::AddCmdLineArgs() { inflags.AddInputFlag("forw", 'F', "1", "Run only Forward Pad Reflection (Default=1)", "int"); - inflags.AddInputFlag("batchsize", 'n', "256", "Mini-batch size (Default=100)", "int"); - inflags.AddInputFlag("in_channels", 'c', "4", "Number of Input Channels (Default=3)", "int"); - inflags.AddInputFlag("in_d", 'D', "0", "Input Depth (Default=0)", "int"); - inflags.AddInputFlag("in_h", 'H', "32", "Input Height (Default=32)", "int"); - inflags.AddInputFlag("in_w", 'W', "32", "Input Width (Default=32)", "int"); + inflags.AddTensorFlag( + "DimLengths", 'D', "256x4x32x32", "The dimensional lengths of the input tensor"); inflags.AddInputFlag("padding", 'P', "1,1,1,1", "Padding array (Default=1 or 1,1,1,1)", "str"); @@ -244,42 +238,6 @@ int PadReflectionDriver::AddCmdLineArgs() return miopenStatusSuccess; } -template -std::vector PadReflectionDriver::GetInputTensorLengthsFromCmdLine() -{ - int in_n = inflags.GetValueInt("batchsize"); - int in_c = inflags.GetValueInt("in_channels"); - int in_w = inflags.GetValueInt("in_w"); - int in_h = inflags.GetValueInt("in_h"); - int in_d = inflags.GetValueInt("in_d"); - - if((in_n != 0) && (in_c != 0) && (in_d != 0) && (in_h != 0) && (in_w != 0)) - { - return std::vector({in_n, in_c, in_d, in_h, in_w}); - } - else if((in_n != 0) && (in_c != 0) && (in_h != 0) && (in_w != 0)) - { - return std::vector({in_n, in_c, in_h, in_w}); - } - else if((in_n != 0) && (in_c != 0) && (in_w != 0)) - { - return std::vector({in_n, in_c, in_w}); - } - else if((in_n != 0) && (in_w != 0)) - { - return std::vector({in_n, in_w}); - } - else if(in_n != 0) - { - return std::vector({in_n}); - } - else - { - std::cerr << "Error Input Tensor Lengths\n" << std::endl; - return std::vector({0}); - } -} - template int PadReflectionDriver::AllocateBuffersAndCopy() { diff --git a/test/gtest/pad_reflection.cpp b/test/gtest/pad_reflection.cpp index 93f93aecd4..38c57464a8 100644 --- a/test/gtest/pad_reflection.cpp +++ b/test/gtest/pad_reflection.cpp @@ -34,7 +34,7 @@ namespace pad_reflection { std::string GetFloatArg() { - const auto& tmp = miopen::GetStringEnv(ENV(MIOPEN_TEST_FLOAT_ARG)); + const auto& tmp = env::value(MIOPEN_TEST_FLOAT_ARG); if(tmp.empty()) { return ""; @@ -46,7 +46,7 @@ struct PadReflectionTestFloat : PadReflectionTest { }; -struct PadReflectionTestHalf : PadReflectionTest +struct PadReflectionTestHalf : PadReflectionTest { }; @@ -55,11 +55,13 @@ struct PadReflectionTestBF16 : PadReflectionTest }; } // namespace pad_reflection + using namespace pad_reflection; TEST_P(PadReflectionTestFloat, PadReflectionFw) { - if(miopen::IsEnabled(ENV(MIOPEN_TEST_ALL)) && (GetFloatArg() == "--float")) + if(!MIOPEN_TEST_ALL || + (env::enabled(MIOPEN_TEST_ALL) && env::value(MIOPEN_TEST_FLOAT_ARG) == "--float")) { RunTest(); Verify(); @@ -72,7 +74,8 @@ TEST_P(PadReflectionTestFloat, PadReflectionFw) TEST_P(PadReflectionTestHalf, PadReflectionFw) { - if(miopen::IsEnabled(ENV(MIOPEN_TEST_ALL)) && (GetFloatArg() == "--half")) + if(!MIOPEN_TEST_ALL || + (env::enabled(MIOPEN_TEST_ALL) && env::value(MIOPEN_TEST_FLOAT_ARG) == "--half")) { RunTest(); Verify(); @@ -85,7 +88,8 @@ TEST_P(PadReflectionTestHalf, PadReflectionFw) TEST_P(PadReflectionTestBF16, PadReflectionFw) { - if(miopen::IsEnabled(ENV(MIOPEN_TEST_ALL)) && (GetFloatArg() == "--bfloat16")) + if(!MIOPEN_TEST_ALL || + (env::enabled(MIOPEN_TEST_ALL) && env::value(MIOPEN_TEST_FLOAT_ARG) == "--bfloat16")) { RunTest(); Verify(); From 3314ea26a2c0414b781f6cd9af7550f43d86240b Mon Sep 17 00:00:00 2001 From: Duong Le Date: Tue, 6 Aug 2024 21:29:45 +0000 Subject: [PATCH 23/29] add pad reflection to solver --- src/include/miopen/solver_id.hpp | 3 ++- src/kernels/MIOpenPadReflection.cpp | 2 +- src/solver.cpp | 3 +++ 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/include/miopen/solver_id.hpp b/src/include/miopen/solver_id.hpp index 81c15f6bea..8721a67a14 100644 --- a/src/include/miopen/solver_id.hpp +++ b/src/include/miopen/solver_id.hpp @@ -59,7 +59,8 @@ enum class Primitive Mha, Softmax, Adam, - Item + Item, + PadReflection }; struct MIOPEN_INTERNALS_EXPORT Id diff --git a/src/kernels/MIOpenPadReflection.cpp b/src/kernels/MIOpenPadReflection.cpp index e269f168ff..bfb4f2b00d 100644 --- a/src/kernels/MIOpenPadReflection.cpp +++ b/src/kernels/MIOpenPadReflection.cpp @@ -23,7 +23,7 @@ * SOFTWARE. * *******************************************************************************/ -#include + #ifndef MIOPEN_DONT_USE_HIP_RUNTIME_HEADERS #include #include diff --git a/src/solver.cpp b/src/solver.cpp index 6b451ca498..25ae51b562 100644 --- a/src/solver.cpp +++ b/src/solver.cpp @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -673,6 +674,8 @@ inline SolverRegistrar::SolverRegistrar(IdRegistryData& registry) fusion::ConvWinoFuryRxSFused<2, 3>{}.SolverDbId(), miopenConvolutionAlgoWinograd); + Register(registry, ++id, Primitive::PadReflection, pad_reflection::PadReflection{}.SolverDbId()); + // IMPORTANT: New solvers should be added to the end of the function! } From 2a4e52ebe778496c5c8c5d632aeeb10009b81740 Mon Sep 17 00:00:00 2001 From: Duong Le Date: Tue, 6 Aug 2024 21:30:00 +0000 Subject: [PATCH 24/29] githook format --- src/kernels/MIOpenPadReflection.cpp | 2 +- src/solver.cpp | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/kernels/MIOpenPadReflection.cpp b/src/kernels/MIOpenPadReflection.cpp index bfb4f2b00d..9e2379ddfc 100644 --- a/src/kernels/MIOpenPadReflection.cpp +++ b/src/kernels/MIOpenPadReflection.cpp @@ -23,7 +23,7 @@ * SOFTWARE. * *******************************************************************************/ - + #ifndef MIOPEN_DONT_USE_HIP_RUNTIME_HEADERS #include #include diff --git a/src/solver.cpp b/src/solver.cpp index 25ae51b562..47d1ab33bd 100644 --- a/src/solver.cpp +++ b/src/solver.cpp @@ -674,7 +674,8 @@ inline SolverRegistrar::SolverRegistrar(IdRegistryData& registry) fusion::ConvWinoFuryRxSFused<2, 3>{}.SolverDbId(), miopenConvolutionAlgoWinograd); - Register(registry, ++id, Primitive::PadReflection, pad_reflection::PadReflection{}.SolverDbId()); + Register( + registry, ++id, Primitive::PadReflection, pad_reflection::PadReflection{}.SolverDbId()); // IMPORTANT: New solvers should be added to the end of the function! } From ea86f83cd73538c266839cd2fde0e723e98a0297 Mon Sep 17 00:00:00 2001 From: Duong Le Date: Wed, 7 Aug 2024 03:39:21 +0000 Subject: [PATCH 25/29] fix uninitialize variable --- src/solver/pad_reflection/pad_reflection.cpp | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/solver/pad_reflection/pad_reflection.cpp b/src/solver/pad_reflection/pad_reflection.cpp index d0aebbd521..08b33e90e7 100644 --- a/src/solver/pad_reflection/pad_reflection.cpp +++ b/src/solver/pad_reflection/pad_reflection.cpp @@ -109,13 +109,9 @@ PadReflection::GetSolution([[maybe_unused]] const ExecutionContext& context, auto padding = params.padding; auto num_padding = params.num_padding; - long padding_l, padding_t; - if(num_padding == 1) - { - padding_l = padding[0]; - padding_t = padding[0]; - } - else if(num_padding == 4) + long padding_l = padding[0]; + long padding_t = padding[0]; + if(num_padding == 4) { padding_l = padding[0]; padding_t = padding[2]; From 871ad5709d4c7ff0939a4ef32c066feba0e38042 Mon Sep 17 00:00:00 2001 From: Duong Le Date: Wed, 7 Aug 2024 03:40:00 +0000 Subject: [PATCH 26/29] githook format --- src/solver/pad_reflection/pad_reflection.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/solver/pad_reflection/pad_reflection.cpp b/src/solver/pad_reflection/pad_reflection.cpp index 08b33e90e7..b5e85b15bb 100644 --- a/src/solver/pad_reflection/pad_reflection.cpp +++ b/src/solver/pad_reflection/pad_reflection.cpp @@ -109,8 +109,8 @@ PadReflection::GetSolution([[maybe_unused]] const ExecutionContext& context, auto padding = params.padding; auto num_padding = params.num_padding; - long padding_l = padding[0]; - long padding_t = padding[0]; + long padding_l = padding[0]; + long padding_t = padding[0]; if(num_padding == 4) { padding_l = padding[0]; From afb2464d5af72a3cd3d7f492acf5e2a495972df4 Mon Sep 17 00:00:00 2001 From: Duong Le Date: Wed, 7 Aug 2024 10:38:27 +0000 Subject: [PATCH 27/29] fix tidy --- src/include/miopen/pad_reflection.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/include/miopen/pad_reflection.hpp b/src/include/miopen/pad_reflection.hpp index ce65b16ec3..017d020340 100644 --- a/src/include/miopen/pad_reflection.hpp +++ b/src/include/miopen/pad_reflection.hpp @@ -39,6 +39,6 @@ miopenStatus_t PadReflection(Handle& handle, const TensorDescriptor& yDesc, Data_t y, const size_t* padding, - const size_t num_padding); + size_t num_padding); } // namespace miopen #endif // MIOPEN_PAD_REFLECTION_HPP_ From 07c7fe9a098ceb983509690cd68a12cd21728e5b Mon Sep 17 00:00:00 2001 From: Duong Le Date: Fri, 9 Aug 2024 04:21:16 +0000 Subject: [PATCH 28/29] update gtest --- test/gtest/pad_reflection.cpp | 40 ++++++++++++++++++++++------------- test/gtest/pad_reflection.hpp | 30 +++++++++++++++++++------- 2 files changed, 47 insertions(+), 23 deletions(-) diff --git a/test/gtest/pad_reflection.cpp b/test/gtest/pad_reflection.cpp index 38c57464a8..0f5b97840c 100644 --- a/test/gtest/pad_reflection.cpp +++ b/test/gtest/pad_reflection.cpp @@ -42,23 +42,25 @@ std::string GetFloatArg() return tmp; } -struct PadReflectionTestFloat : PadReflectionTest +struct GPU_PadReflection_FP32 : PadReflectionTest { }; -struct PadReflectionTestHalf : PadReflectionTest +struct GPU_PadReflection_FP16 : PadReflectionTest { }; -struct PadReflectionTestBF16 : PadReflectionTest +struct GPU_PadReflection_BFP16 : PadReflectionTest { }; } // namespace pad_reflection -using namespace pad_reflection; +using pad_reflection::GPU_PadReflection_FP32; +using pad_reflection::GPU_PadReflection_FP16; +using pad_reflection::GPU_PadReflection_BFP16; -TEST_P(PadReflectionTestFloat, PadReflectionFw) +TEST_P(GPU_PadReflection_FP32, Test) { if(!MIOPEN_TEST_ALL || (env::enabled(MIOPEN_TEST_ALL) && env::value(MIOPEN_TEST_FLOAT_ARG) == "--float")) @@ -72,7 +74,7 @@ TEST_P(PadReflectionTestFloat, PadReflectionFw) } }; -TEST_P(PadReflectionTestHalf, PadReflectionFw) +TEST_P(GPU_PadReflection_FP16, Test) { if(!MIOPEN_TEST_ALL || (env::enabled(MIOPEN_TEST_ALL) && env::value(MIOPEN_TEST_FLOAT_ARG) == "--half")) @@ -86,7 +88,7 @@ TEST_P(PadReflectionTestHalf, PadReflectionFw) } }; -TEST_P(PadReflectionTestBF16, PadReflectionFw) +TEST_P(GPU_PadReflection_BFP16, Test) { if(!MIOPEN_TEST_ALL || (env::enabled(MIOPEN_TEST_ALL) && env::value(MIOPEN_TEST_FLOAT_ARG) == "--bfloat16")) @@ -100,14 +102,22 @@ TEST_P(PadReflectionTestBF16, PadReflectionFw) } }; -INSTANTIATE_TEST_SUITE_P(PadReflectionTestSet, - PadReflectionTestFloat, - testing::ValuesIn(PadReflectionTestFloatConfigs())); +INSTANTIATE_TEST_SUITE_P(Smoke, + GPU_PadReflection_FP32, + testing::ValuesIn(PadReflectionSmokeTestFloatConfigs())); +INSTANTIATE_TEST_SUITE_P(Smoke, + GPU_PadReflection_FP16, + testing::ValuesIn(PadReflectionSmokeTestFloatConfigs())); +INSTANTIATE_TEST_SUITE_P(Smoke, + GPU_PadReflection_BFP16, + testing::ValuesIn(PadReflectionSmokeTestFloatConfigs())); -INSTANTIATE_TEST_SUITE_P(PadReflectionTestSet, - PadReflectionTestHalf, +INSTANTIATE_TEST_SUITE_P(Full, + GPU_PadReflection_FP32, testing::ValuesIn(PadReflectionTestFloatConfigs())); - -INSTANTIATE_TEST_SUITE_P(PadReflectionTestSet, - PadReflectionTestBF16, +INSTANTIATE_TEST_SUITE_P(Full, + GPU_PadReflection_FP16, + testing::ValuesIn(PadReflectionTestFloatConfigs())); +INSTANTIATE_TEST_SUITE_P(Full, + GPU_PadReflection_BFP16, testing::ValuesIn(PadReflectionTestFloatConfigs())); diff --git a/test/gtest/pad_reflection.hpp b/test/gtest/pad_reflection.hpp index 482bc89a71..e1abac149d 100644 --- a/test/gtest/pad_reflection.hpp +++ b/test/gtest/pad_reflection.hpp @@ -93,17 +93,25 @@ std::vector PadReflectionTestFloatConfigs() { // n c d h w padding // clang-format off return { - { 1, 1, 0, 3, 3, {2}}, - { 48, 8, 0, 512, 512, {1}}, + { 48, 8, 0, 512, 512, {1, 1, 1, 1}}, { 48, 8, 0, 512, 512, {1, 1, 3, 3}}, { 48, 8, 0, 512, 512, {0, 0, 2, 2}}, - { 16, 311, 0, 98, 512, {1}}, + { 16, 311, 0, 98, 512, {1, 1, 1, 1}}, { 16, 311, 0, 98, 512, {1, 1, 3, 3}}, { 16, 311, 0, 98, 512, {0, 0, 2, 2}}, }; // clang-format on } +std::vector PadReflectionSmokeTestFloatConfigs() +{ // n c d h w padding + // clang-format off + return { + { 1, 1, 0, 3, 3, {2}}, + }; + // clang-format on +} + template struct PadReflectionTest : public ::testing::TestWithParam { @@ -160,17 +168,23 @@ struct PadReflectionTest : public ::testing::TestWithParam padding.data(), padding.size()); - EXPECT_EQ(status, miopenStatusSuccess); + ASSERT_EQ(status, miopenStatusSuccess); output.data = handle.Read(output_dev, output.data.size()); } void Verify() { - for(int i = 0; i < output.data.size() - 1; ++i) - { - EXPECT_EQ(output.data[i], ref_output.data[i]); - } + // Computation error of fp16 is ~2^13 (=8192) bigger than + // the one of fp32 because mantissa is shorter by 13 bits. + auto tolerance = std::is_same::value ? 1.5e-6 : 8.2e-3; + // bf16 mantissa has 7 bits, by 3 bits shorter than fp16. + if(std::is_same::value) + tolerance *= 8.0; + + auto error = miopen::rms_range(ref_output, output); + ASSERT_EQ(miopen::range_distance(ref_output), miopen::range_distance(output)); + EXPECT_LT(error, tolerance); } PadReflectionCase pad_reflection_config; From 186974c5a34ecd90ad181dcfde46f9fcf70232c9 Mon Sep 17 00:00:00 2001 From: Duong Le Date: Fri, 9 Aug 2024 04:23:03 +0000 Subject: [PATCH 29/29] githook format --- test/gtest/pad_reflection.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/gtest/pad_reflection.cpp b/test/gtest/pad_reflection.cpp index 0f5b97840c..ad4563152c 100644 --- a/test/gtest/pad_reflection.cpp +++ b/test/gtest/pad_reflection.cpp @@ -56,9 +56,9 @@ struct GPU_PadReflection_BFP16 : PadReflectionTest } // namespace pad_reflection -using pad_reflection::GPU_PadReflection_FP32; -using pad_reflection::GPU_PadReflection_FP16; using pad_reflection::GPU_PadReflection_BFP16; +using pad_reflection::GPU_PadReflection_FP16; +using pad_reflection::GPU_PadReflection_FP32; TEST_P(GPU_PadReflection_FP32, Test) {