diff --git a/docs/reference/index.rst b/docs/reference/index.rst index 90e29ffaa9..23a5f9986c 100644 --- a/docs/reference/index.rst +++ b/docs/reference/index.rst @@ -35,3 +35,4 @@ The MIOpen API library is structured as follows: * :doc:`ReduceExtreme <../doxygen/html/group__ReduceExtreme>` (experimental) * :doc:`Getitem <../doxygen/html/group__getitem>` (experimental) * :doc:`ReduceCalculation <../doxygen/html/group__ReduceCalculation>` (experimental) + * :doc:`Pad <../doxygen/html/group__pad>` (experimental) diff --git a/driver/CMakeLists.txt b/driver/CMakeLists.txt index cd663eb8b4..8d1433ff14 100644 --- a/driver/CMakeLists.txt +++ b/driver/CMakeLists.txt @@ -47,6 +47,7 @@ add_executable(MIOpenDriver dm_groupnorm.cpp dm_layernorm.cpp dm_lrn.cpp + dm_pad_reflection.cpp dm_pool.cpp dm_reduce.cpp dm_reduceextreme.cpp diff --git a/driver/dm_pad_reflection.cpp b/driver/dm_pad_reflection.cpp new file mode 100644 index 0000000000..b036d5ccb6 --- /dev/null +++ b/driver/dm_pad_reflection.cpp @@ -0,0 +1,39 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ +#include "registry_driver_maker.hpp" +#include "pad_reflection_driver.hpp" +static Driver* makeDriver(const std::string& base_arg) +{ + if(base_arg == "padreflection") + return new PadReflectionDriver(); + if(base_arg == "padreflectionfp16") + return new PadReflectionDriver(); + if(base_arg == "padreflectionbfp16") + return new PadReflectionDriver(); + return nullptr; +} + +REGISTER_DRIVER_MAKER(makeDriver); diff --git a/driver/driver.hpp b/driver/driver.hpp index b23df690d1..fbf4cd4e86 100644 --- a/driver/driver.hpp +++ b/driver/driver.hpp @@ -175,7 +175,7 @@ inline void PadBufferSize(size_t& sz, int datatype_sz) "groupnorm[bfp16|fp16], cat[bfp16|fp16], addlayernorm[bfp16|fp16], " "t5layernorm[bfp16|fp16], adam[fp16], ampadam, reduceextreme[bfp16|fp16], " "adamw[fp16], ampadamw, transformersadamw[fp16], transformersampadamw, " - "getitem[bfp16|fp16], reducecalculation[bfp16|fp16]\n"); + "getitem[bfp16|fp16], reducecalculation[bfp16|fp16], padreflection[bfp16|fp16]\n"); exit(0); // NOLINT (concurrency-mt-unsafe) } @@ -206,7 +206,9 @@ inline std::string ParseBaseArg(int argc, char* argv[]) arg != "adamwfp16" && arg != "ampadamw" && arg != "transformersadamw" && arg != "transformersadamwfp16" && arg != "transformersampadamw" && arg != "getitem" && arg != "getitemfp16" && arg != "getitembfp16" && arg != "reducecalculation" && - arg != "reducecalculationfp16" && arg != "reducecalculationbfp16" && arg != "--version") + arg != "reducecalculationfp16" && arg != "reducecalculationbfp16" && + arg != "padreflection" && arg != "padreflectionfp16" && arg != "padreflectionbfp16" && + arg != "--version") { printf("FAILED: Invalid Base Input Argument\n"); Usage(); diff --git a/driver/pad_reflection_driver.hpp b/driver/pad_reflection_driver.hpp new file mode 100644 index 0000000000..881cb45c46 --- /dev/null +++ b/driver/pad_reflection_driver.hpp @@ -0,0 +1,363 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ +#ifndef GUARD_MIOPEN_PAD_REFLECTION_DRIVER_HPP +#define GUARD_MIOPEN_PAD_REFLECTION_DRIVER_HPP + +#include "InputFlags.hpp" +#include "driver.hpp" +#include "tensor_driver.hpp" +#include "timer.hpp" +#include "random.hpp" +#include +#include +#include +#include +#include +#include +#include <../test/tensor_holder.hpp> +#include <../test/verify.hpp> + +#ifndef MLO_PADREFLECTIONHOST_H_ +#define MLO_PADREFLECTIONHOST_H_ + +template +void mloPadReflectionRunHost(miopenTensorDescriptor_t inputDesc, + miopenTensorDescriptor_t outputDesc, + Tgpu* input, + Tcheck* outputhost, + std::vector padding) +{ + auto input_dims = miopen::deref(inputDesc).GetLengths(); + auto output_dims = miopen::deref(outputDesc).GetLengths(); + auto output_numel = + std::accumulate(output_dims.begin(), output_dims.end(), 1L, std::multiplies()); + + long padding_l = padding[0]; + long padding_t = padding[2]; + auto input_strides = miopen::deref(inputDesc).GetStrides(); + size_t in_H = input_dims[2]; + size_t in_W = input_dims[3]; + + for(size_t gid = 0; gid < output_numel; gid++) + { + long n, c, h, w; + // GET_NCHW(n, c, h, w, gid, output); + ulong nch = (gid) / output_dims[3]; + w = (gid) % output_dims[3]; + ulong nc = nch / output_dims[2]; + h = nch % output_dims[2]; + n = nc / output_dims[1]; + c = nc % output_dims[1]; + + long in_start_x = max(0L, -padding_l); + long in_start_y = max(0L, -padding_t); + long out_start_x = max(0L, padding_l); + long out_start_y = max(0L, padding_t); + + if(w < padding_l) + { + w = padding_l * 2 - w; + } + else if(padding_l <= w && w < in_W + padding_l) + { + } + else + { + w = (in_W + padding_l - 1) * 2 - w; + } + w = w - out_start_x + in_start_x; + + if(h < padding_t) + { + h = padding_t * 2 - h; + } + else if(padding_t <= h && h < in_H + padding_t) + { + } + else + { + h = (in_H + padding_t - 1) * 2 - h; + } + h = h - out_start_y + in_start_y; + + outputhost[gid] = input[(input_strides[3] * (w)) + (input_strides[2] * (h)) + + (input_strides[1] * (c)) + (input_strides[0] * (n)) + 0]; + } +} +#endif + +template +class PadReflectionDriver : public Driver +{ +public: + PadReflectionDriver() : Driver() + { + miopenCreateTensorDescriptor(&inputDesc); + miopenCreateTensorDescriptor(&outputDesc); + + data_type = miopen_type{}; + } + + int AddCmdLineArgs() override; + int ParseCmdLineArgs(int argc, char* argv[]) override; + InputFlags& GetInputFlags() override { return inflags; } + + int GetandSetData() override; + + int AllocateBuffersAndCopy() override; + + int RunForwardGPU() override; + int RunForwardCPU(); + + int RunBackwardGPU() override; + + Tref GetTolerance(); + int VerifyBackward() override; + int VerifyForward() override; + ~PadReflectionDriver() override + { + miopenDestroyTensorDescriptor(inputDesc); + miopenDestroyTensorDescriptor(outputDesc); + } + +private: + InputFlags inflags; + + int forw; + + miopenTensorDescriptor_t inputDesc; + miopenTensorDescriptor_t outputDesc; + + std::unique_ptr in_dev; + std::unique_ptr out_dev; + + std::vector in; + std::vector out; + std::vector outhost; + + std::vector padding; +}; + +template +int PadReflectionDriver::ParseCmdLineArgs(int argc, char* argv[]) +{ + inflags.Parse(argc, argv); + + if(inflags.GetValueInt("time") == 1) + { + miopenEnableProfiling(GetHandle(), true); + } + return miopenStatusSuccess; +} + +template +int PadReflectionDriver::GetandSetData() +{ + std::string padding_str = inflags.GetValueStr("padding"); + std::stringstream ss(padding_str); + std::string token; + while(std::getline(ss, token, ',')) + { + padding.push_back(std::stoul(token)); + } + + if(!(padding.size() == 1 or padding.size() == 4)) + { + std::cerr << "Error Padding Lengths\n" << std::endl; + } + + std::vector in_len = inflags.GetValueTensor("DimLengths").lengths; + + SetTensorNd(inputDesc, in_len, data_type); + + std::vector out_len; + for(int i = 0; i < in_len.size(); i++) + { + // If H + if(i == 2) + { + out_len.push_back(in_len[i] + 2 * padding[2]); + } + // If W + else if(i == 3) + { + out_len.push_back(in_len[i] + 2 * padding[0]); + } + else + { + out_len.push_back(in_len[i]); + } + } + + if(out_len.empty()) + out_len.push_back(1); + + SetTensorNd(outputDesc, out_len, data_type); + + return 0; +} + +template +int PadReflectionDriver::AddCmdLineArgs() +{ + inflags.AddInputFlag("forw", 'F', "1", "Run only Forward Pad Reflection (Default=1)", "int"); + inflags.AddTensorFlag( + "DimLengths", 'D', "256x4x32x32", "The dimensional lengths of the input tensor"); + + inflags.AddInputFlag("padding", 'P', "1,1,1,1", "Padding array (Default=1 or 1,1,1,1)", "str"); + + inflags.AddInputFlag("iter", 'i', "10", "Number of Iterations (Default=10)", "int"); + inflags.AddInputFlag("verify", 'V', "1", "Verify Each Layer (Default=1)", "int"); + inflags.AddInputFlag("time", 't', "0", "Time Each Layer (Default=0)", "int"); + inflags.AddInputFlag( + "wall", 'w', "0", "Wall-clock Time Each Layer, Requires time == 1 (Default=0)", "int"); + + return miopenStatusSuccess; +} + +template +int PadReflectionDriver::AllocateBuffersAndCopy() +{ + size_t in_sz = GetTensorSize(inputDesc); + size_t out_sz = GetTensorSize(outputDesc); + + uint32_t ctx = 0; + + in_dev = std::unique_ptr(new GPUMem(ctx, in_sz, sizeof(Tgpu))); + out_dev = std::unique_ptr(new GPUMem(ctx, out_sz, sizeof(Tgpu))); + + in = std::vector(in_sz, static_cast(0)); + out = std::vector(out_sz, static_cast(0)); + outhost = std::vector(out_sz, static_cast(0)); + + for(int i = 0; i < in_sz; i++) + { + in[i] = prng::gen_A_to_B(static_cast(0.0), static_cast(1.0)); + } + + if(in_dev->ToGPU(GetStream(), in.data()) != 0) + std::cerr << "Error copying (in) to GPU, size: " << in_dev->GetSize() << std::endl; + + if(out_dev->ToGPU(GetStream(), out.data()) != 0) + std::cerr << "Error copying (out) to GPU, size: " << out_dev->GetSize() << std::endl; + + return miopenStatusSuccess; +} + +template +int PadReflectionDriver::RunForwardGPU() +{ + float kernel_total_time = 0; + float kernel_first_time = 0; + + Timer t; + START_TIME + + for(int i = 0; i < inflags.GetValueInt("iter"); i++) + { + miopenPadReflection(GetHandle(), + inputDesc, + in_dev->GetMem(), + outputDesc, + out_dev->GetMem(), + padding.data(), + padding.size()); + + float time = 0.0; + miopenGetKernelTime(GetHandle(), &time); + kernel_total_time += time; + if(i == 0) + kernel_first_time = time; + } + + if(inflags.GetValueInt("time") == 1) + { + STOP_TIME + int iter = inflags.GetValueInt("iter"); + if(WALL_CLOCK) + std::cout << "Wall-clock Time Forward Pad Reflection Elapsed: " << t.gettime_ms() / iter + << " ms\n"; + + float kernel_average_time = + iter > 1 ? (kernel_total_time - kernel_first_time) / (iter - 1) : kernel_first_time; + std::cout << "GPU Kernel Time Forward Pad Reflection Elapsed: " << kernel_average_time + << " ms\n"; + } + + if(out_dev->FromGPU(GetStream(), out.data()) != 0) + std::cerr << "Error copying (out_dev) from GPU, size: " << out_dev->GetSize() << std::endl; + + return miopenStatusSuccess; +} + +template +int PadReflectionDriver::RunForwardCPU() +{ + mloPadReflectionRunHost(inputDesc, outputDesc, in.data(), outhost.data(), padding); + + return miopenStatusSuccess; +} + +template +int PadReflectionDriver::RunBackwardGPU() +{ + return miopenStatusSuccess; +} + +template +Tref PadReflectionDriver::GetTolerance() +{ + return 0; +} + +template +int PadReflectionDriver::VerifyForward() +{ + RunForwardCPU(); + const Tref tolerance = GetTolerance(); + auto error = miopen::rms_range(outhost, out); + + if(std::abs(static_cast(error)) != 0.0f) + { + std::cout << "Pad Reflection FAILED: " << error << " > " << tolerance << std::endl; + return EC_VerifyFwd; + } + else + { + std::cout << "Pad Reflection Verifies OK on CPU reference (" << error << " == " << tolerance + << ')' << std::endl; + } + + return miopenStatusSuccess; +} + +template +int PadReflectionDriver::VerifyBackward() +{ + return miopenStatusSuccess; +} + +#endif // GUARD_MIOPEN_PAD_REFLECTION_DRIVER_HPP diff --git a/include/miopen/miopen.h b/include/miopen/miopen.h index 3b9bbeccc1..7d5b420687 100644 --- a/include/miopen/miopen.h +++ b/include/miopen/miopen.h @@ -2,7 +2,7 @@ * * MIT License * - * Copyright (c) 2023 Advanced Micro Devices, Inc. + * Copyright (c) 2024 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -70,6 +70,7 @@ * @defgroup SGD * @defgroup getitem * @defgroup ReduceCalculation + * @defgroup pad * */ @@ -5921,6 +5922,35 @@ miopenReduceCalculationForward(miopenHandle_t handle, /** @} */ // CLOSEOUT REDUCE CALCULATION DOXYGEN GROUP +#endif + +#ifdef MIOPEN_BETA_API +// Padreflection APIs +/** @addtogroup pad + * + * @{ + */ +/*! @brief Add padding by the reflection of the tensor + * + * @param handle MIOpen handle (input) + * @param xDesc Tensor descriptor for data input tensor x (input) + * @param x Data tensor x (input) + * @param yDesc Tensor descriptor for output data tensor y (input) + * @param y Data tensor y (output) + * @param padding Padding array contain 1 or 4 elements (input) + * @param num_padding Number of elements in padding, equals 1 or 4 (input) + * @return miopenStatus_t + */ +MIOPEN_EXPORT miopenStatus_t miopenPadReflection(miopenHandle_t handle, + const miopenTensorDescriptor_t xDesc, + const void* x, + const miopenTensorDescriptor_t yDesc, + void* y, + const size_t* padding, + const size_t num_padding); + +/** @} */ +// CLOSEOUT pad DOXYGEN GROUP #endif // MIOPEN_BETA_API #ifdef MIOPEN_BETA_API diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 77acf3f7d3..70237ab1f2 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -159,6 +159,8 @@ set( MIOpen_Source mha/problem_description.cpp op_args.cpp operator.cpp + pad_reflection/problem_description.cpp + pad_reflection_api.cpp performance_config.cpp pooling/problem_description.cpp pooling_api.cpp @@ -294,6 +296,7 @@ set( MIOpen_Source solver/layernorm/forward_t5layernorm.cpp solver/mha/mha_solver_backward.cpp solver/mha/mha_solver_forward.cpp + solver/pad_reflection/pad_reflection.cpp solver/pooling/forward2d.cpp solver/pooling/forwardNaive.cpp solver/pooling/forwardNd.cpp @@ -503,6 +506,7 @@ if( MIOPEN_BACKEND MATCHES "OpenCL" OR MIOPEN_BACKEND STREQUAL "HIPOC" OR MIOPEN kernels/MIOpenLRNBwd.cl kernels/MIOpenLRNFwd.cl kernels/MIOpenNeuron.cl + kernels/MIOpenPadReflection.cpp kernels/MIOpenPooling.cl kernels/MIOpenPoolingBwd.cl kernels/MIOpenPoolingBwdND.cl @@ -650,6 +654,7 @@ if( MIOPEN_BACKEND MATCHES "OpenCL" OR MIOPEN_BACKEND STREQUAL "HIPOC" OR MIOPEN hip/hip_build_utils.cpp hip/batched_transpose_sol.cpp hip/general_tensor_reorder_sol.cpp + pad_reflection.cpp pooling.cpp t5layernorm.cpp ocl/fusionopconvocl.cpp diff --git a/src/include/miopen/pad_reflection.hpp b/src/include/miopen/pad_reflection.hpp new file mode 100644 index 0000000000..017d020340 --- /dev/null +++ b/src/include/miopen/pad_reflection.hpp @@ -0,0 +1,44 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ +#ifndef MIOPEN_PAD_REFLECTION_HPP_ +#define MIOPEN_PAD_REFLECTION_HPP_ + +#include + +namespace miopen { + +struct Handle; +struct TensorDescriptor; + +miopenStatus_t PadReflection(Handle& handle, + const TensorDescriptor& xDesc, + ConstData_t x, + const TensorDescriptor& yDesc, + Data_t y, + const size_t* padding, + size_t num_padding); +} // namespace miopen +#endif // MIOPEN_PAD_REFLECTION_HPP_ diff --git a/src/include/miopen/pad_reflection/invoke_params.hpp b/src/include/miopen/pad_reflection/invoke_params.hpp new file mode 100644 index 0000000000..b64438d83a --- /dev/null +++ b/src/include/miopen/pad_reflection/invoke_params.hpp @@ -0,0 +1,54 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ + +#pragma once + +#include +#include + +namespace miopen { +namespace pad_reflection { + +struct InvokeParams : public miopen::InvokeParams +{ + InvokeParams() = default; + + const TensorDescriptor* xDesc = nullptr; + const TensorDescriptor* yDesc = nullptr; + + ConstData_t x = nullptr; + Data_t y = nullptr; + + const size_t* padding = nullptr; + size_t num_padding = 0; + + std::size_t GetWorkspaceSize() const { return 0; } + Data_t GetWorkspace() const { return nullptr; } +}; + +} // namespace pad_reflection + +} // namespace miopen diff --git a/src/include/miopen/pad_reflection/problem_description.hpp b/src/include/miopen/pad_reflection/problem_description.hpp new file mode 100644 index 0000000000..3172512757 --- /dev/null +++ b/src/include/miopen/pad_reflection/problem_description.hpp @@ -0,0 +1,102 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ +#pragma once + +#include +#include +#include +#include +#include + +namespace miopen { + +struct NetworkConfig; + +namespace pad_reflection { + +struct ProblemDescription : ProblemDescriptionBase +{ + ProblemDescription(const TensorDescriptor& xDesc_, + const TensorDescriptor& yDesc_, + const size_t num_padding_) + : xDesc(xDesc_), yDesc(yDesc_), num_padding(num_padding_) + { + } + const TensorDescriptor& GetXDesc() const { return xDesc; } + const TensorDescriptor& GetYDesc() const { return yDesc; } + size_t GetNumPadding() const { return num_padding; } + + bool IsSameType() const + { + if(xDesc.GetType() != yDesc.GetType()) + { +#if MIOPEN_BUILD_DEV || !MIOPEN_NDEBUG + MIOPEN_THROW(miopenStatusBadParm, "Pad Reflection: Tensor types do not match."); +#else + return false; +#endif + } + return true; + } + + bool IsAllPacked() const + { + if(!(xDesc.IsPacked() && yDesc.IsPacked())) + { +#if MIOPEN_BUILD_DEV || !MIOPEN_NDEBUG + MIOPEN_THROW(miopenStatusBadParm, "Pad Reflection: Unpacked tensors not supported."); +#else + return false; +#endif + } + return true; + } + + bool IsRightNumPadding() const + { + if(!(num_padding == 1 || num_padding == 4)) + { +#if MIOPEN_BUILD_DEV || !MIOPEN_NDEBUG + MIOPEN_THROW(miopenStatusBadParm, + "Pad Reflection: Padding input needs to have 1 or 4 elements only."); +#else + return false; +#endif + } + return true; + } + + NetworkConfig MakeNetworkConfig() const override; + +private: + TensorDescriptor xDesc; + TensorDescriptor yDesc; + size_t num_padding; +}; + +} // namespace pad_reflection + +} // namespace miopen diff --git a/src/include/miopen/pad_reflection/solvers.hpp b/src/include/miopen/pad_reflection/solvers.hpp new file mode 100644 index 0000000000..e2da94bc4b --- /dev/null +++ b/src/include/miopen/pad_reflection/solvers.hpp @@ -0,0 +1,57 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ +#pragma once + +#include +#include +#include + +namespace miopen { + +namespace solver { + +namespace pad_reflection { + +using PadReflectionSolver = + NonTunableSolverBase; + +struct PadReflection final : PadReflectionSolver +{ + const std::string& SolverDbId() const override { return GetSolverDbId(); } + + bool IsApplicable(const ExecutionContext& context, + const miopen::pad_reflection::ProblemDescription& problem) const override; + + ConvSolution + GetSolution(const ExecutionContext& context, + const miopen::pad_reflection::ProblemDescription& problem) const override; +}; + +} // namespace pad_reflection + +} // namespace solver + +} // namespace miopen diff --git a/src/include/miopen/solver_id.hpp b/src/include/miopen/solver_id.hpp index 81c15f6bea..8721a67a14 100644 --- a/src/include/miopen/solver_id.hpp +++ b/src/include/miopen/solver_id.hpp @@ -59,7 +59,8 @@ enum class Primitive Mha, Softmax, Adam, - Item + Item, + PadReflection }; struct MIOPEN_INTERNALS_EXPORT Id diff --git a/src/kernels/MIOpenPadReflection.cpp b/src/kernels/MIOpenPadReflection.cpp new file mode 100644 index 0000000000..9e2379ddfc --- /dev/null +++ b/src/kernels/MIOpenPadReflection.cpp @@ -0,0 +1,120 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ + +#ifndef MIOPEN_DONT_USE_HIP_RUNTIME_HEADERS +#include +#include +#endif + +#include "float_types.h" + +template +__device__ void padReflection2dFwdContiguous(const TI* __restrict__ input, + TO* __restrict__ output, + uint64_t output_size, + long padding_left, + long padding_top, + const size_t in_H, + const size_t in_W, + const size_t output_size_1, + const size_t output_size_2, + const size_t output_size_3, + const size_t input_stride_0, + const size_t input_stride_1, + const size_t input_stride_2, + const size_t input_stride_3) +{ + const size_t gid = threadIdx.x + blockIdx.x * blockDim.x; + if(gid >= output_size) + return; + + long n, c, h, w; + ulong nch = (gid) / output_size_3; + w = (gid) % output_size_3; + ulong nc = nch / output_size_2; + h = nch % output_size_2; + n = nc / output_size_1; + c = nc % output_size_1; + + long in_start_x = max(0L, -padding_left); + long in_start_y = max(0L, -padding_top); + long out_start_x = max(0L, padding_left); + long out_start_y = max(0L, padding_top); + + if(w < padding_left) + { + w = padding_left * 2 - w; + } + else if(!(padding_left <= w && w < in_W + padding_left)) + { + w = (in_W + padding_left - 1) * 2 - w; + } + w = w - out_start_x + in_start_x; + + if(h < padding_top) + { + h = padding_top * 2 - h; + } + else if(!(padding_top <= h && h < in_H + padding_top)) + { + h = (in_H + padding_top - 1) * 2 - h; + } + h = h - out_start_y + in_start_y; + + output[gid] = input[(input_stride_3 * (w)) + (input_stride_2 * (h)) + (input_stride_1 * (c)) + + (input_stride_0 * (n)) + 0]; +} + +extern "C" __global__ void PadReflection2dFwdContiguous(const INPUT_TYPE* __restrict__ input, + OUTPUT_TYPE* __restrict__ output, + uint64_t output_size, + long padding_left, + long padding_top, + const size_t in_H, + const size_t in_W, + const size_t output_size_1, + const size_t output_size_2, + const size_t output_size_3, + const size_t input_stride_0, + const size_t input_stride_1, + const size_t input_stride_2, + const size_t input_stride_3) +{ + padReflection2dFwdContiguous(input, + output, + output_size, + padding_left, + padding_top, + in_H, + in_W, + output_size_1, + output_size_2, + output_size_3, + input_stride_0, + input_stride_1, + input_stride_2, + input_stride_3); +} \ No newline at end of file diff --git a/src/pad_reflection.cpp b/src/pad_reflection.cpp new file mode 100644 index 0000000000..4605984f90 --- /dev/null +++ b/src/pad_reflection.cpp @@ -0,0 +1,67 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace miopen { + +miopenStatus_t PadReflection(Handle& handle, + const TensorDescriptor& xDesc, + ConstData_t x, + const TensorDescriptor& yDesc, + Data_t y, + const size_t* padding, + const size_t num_padding) +{ + const auto problem = pad_reflection::ProblemDescription{xDesc, yDesc, num_padding}; + + const auto invoke_params = [&]() { + auto tmp = pad_reflection::InvokeParams{}; + tmp.type = InvokeType::Run; + tmp.xDesc = &xDesc; + tmp.yDesc = &yDesc; + tmp.x = x; + tmp.y = y; + tmp.padding = padding; + tmp.num_padding = num_padding; + return tmp; + }(); + + const auto algo = AlgorithmName{"PadReflection"}; + const auto solvers = solver::SolverContainer{}; + solvers.ExecutePrimitive(handle, problem, algo, invoke_params); + + return miopenStatusSuccess; +} + +} // namespace miopen diff --git a/src/pad_reflection/problem_description.cpp b/src/pad_reflection/problem_description.cpp new file mode 100644 index 0000000000..403b9e913b --- /dev/null +++ b/src/pad_reflection/problem_description.cpp @@ -0,0 +1,54 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ + +#include +#include + +#include + +namespace miopen { + +namespace pad_reflection { + +NetworkConfig ProblemDescription::MakeNetworkConfig() const +{ + auto xlength = xDesc.GetLengths(); + auto ylength = yDesc.GetLengths(); + + auto output_size = std::accumulate( + ylength.begin(), ylength.end(), static_cast(1), std::multiplies()); + auto dtype = xDesc.GetType(); + + std::ostringstream ss; + + ss << "dtype" << dtype; + ss << "output_size" << output_size; + return NetworkConfig{ss.str()}; +} + +} // namespace pad_reflection + +} // namespace miopen diff --git a/src/pad_reflection_api.cpp b/src/pad_reflection_api.cpp new file mode 100644 index 0000000000..bbe94d3ec6 --- /dev/null +++ b/src/pad_reflection_api.cpp @@ -0,0 +1,52 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ + +#include +#include +#include +#include +#include + +extern "C" miopenStatus_t miopenPadReflection(miopenHandle_t handle, + const miopenTensorDescriptor_t xDesc, + const void* x, + const miopenTensorDescriptor_t yDesc, + void* y, + const size_t* padding, + const size_t num_padding) +{ + MIOPEN_LOG_FUNCTION(handle, xDesc, x, yDesc, y, padding); + + return miopen::try_([&] { + miopen::PadReflection(miopen::deref(handle), + miopen::deref(xDesc), + DataCast(x), + miopen::deref(yDesc), + DataCast(y), + padding, + num_padding); + }); +} diff --git a/src/solver.cpp b/src/solver.cpp index 6b451ca498..47d1ab33bd 100644 --- a/src/solver.cpp +++ b/src/solver.cpp @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -673,6 +674,9 @@ inline SolverRegistrar::SolverRegistrar(IdRegistryData& registry) fusion::ConvWinoFuryRxSFused<2, 3>{}.SolverDbId(), miopenConvolutionAlgoWinograd); + Register( + registry, ++id, Primitive::PadReflection, pad_reflection::PadReflection{}.SolverDbId()); + // IMPORTANT: New solvers should be added to the end of the function! } diff --git a/src/solver/pad_reflection/pad_reflection.cpp b/src/solver/pad_reflection/pad_reflection.cpp new file mode 100644 index 0000000000..b5e85b15bb --- /dev/null +++ b/src/solver/pad_reflection/pad_reflection.cpp @@ -0,0 +1,152 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include + +#define LOCAL_SIZE 256 + +namespace miopen { + +namespace solver { + +namespace pad_reflection { + +bool PadReflection::IsApplicable([[maybe_unused]] const ExecutionContext& context, + const miopen::pad_reflection::ProblemDescription& problem) const +{ + if(!problem.IsSameType()) + return false; + if(!problem.IsAllPacked()) + return false; + if(!problem.IsRightNumPadding()) + return false; + return true; +} + +ConvSolution +PadReflection::GetSolution([[maybe_unused]] const ExecutionContext& context, + const miopen::pad_reflection::ProblemDescription& problem) const +{ + auto result = ConvSolution{miopenStatusSuccess}; + + auto input_dtype = miopen::GetDataType(problem.GetXDesc().GetType()); + auto output_dtype = miopen::GetDataType(problem.GetYDesc().GetType()); + auto xdims = problem.GetXDesc().GetLengths(); + auto ydims = problem.GetYDesc().GetLengths(); + + { + auto kernel = KernelInfo{}; + + kernel.kernel_file = "MIOpenPadReflection.cpp"; + kernel.kernel_name = "PadReflection2dFwdContiguous"; + auto output_numel = + std::accumulate(ydims.begin(), ydims.end(), 1ULL, std::multiplies()); + + const auto build_params = KernelBuildParameters{ + {"INPUT_TYPE", input_dtype == "bfloat16" ? "ushort" : input_dtype}, + {"OUTPUT_TYPE", output_dtype == "bfloat16" ? "ushort" : output_dtype}, + }; + + kernel.comp_options = build_params.GenerateFor(kbp::HIP{}); + + size_t xlocalsize = LOCAL_SIZE; + size_t xgridsize = AlignUp(output_numel, xlocalsize); + size_t ylocalsize = 1; + size_t ygridsize = 1; + size_t zlocalsize = 1; + size_t zgridsize = 1; + kernel.l_wk.push_back(xlocalsize); + kernel.l_wk.push_back(ylocalsize); + kernel.l_wk.push_back(zlocalsize); + + kernel.g_wk.push_back(xgridsize); + kernel.g_wk.push_back(ygridsize); + kernel.g_wk.push_back(zgridsize); + + result.construction_params.push_back(kernel); + } + result.invoker_factory = [](const std::vector& kernels) { + return [=](const Handle& handle_, const AnyInvokeParams& raw_params) { + decltype(auto) kernel = handle_.Run(kernels[0]); + decltype(auto) params = raw_params.CastTo(); + + auto xdims = params.xDesc->GetLengths(); + auto ydims = params.yDesc->GetLengths(); + + auto xstrides = params.xDesc->GetStrides(); + + auto output_size = + std::accumulate(ydims.begin(), ydims.end(), 1ULL, std::multiplies()); + + auto padding = params.padding; + auto num_padding = params.num_padding; + long padding_l = padding[0]; + long padding_t = padding[0]; + if(num_padding == 4) + { + padding_l = padding[0]; + padding_t = padding[2]; + } + size_t in_H = xdims[2]; + size_t in_W = xdims[3]; + size_t output_size_1 = ydims[1]; + size_t output_size_2 = ydims[2]; + size_t output_size_3 = ydims[3]; + size_t input_stride_0 = xstrides[0]; + size_t input_stride_1 = xstrides[1]; + size_t input_stride_2 = xstrides[2]; + size_t input_stride_3 = xstrides[3]; + kernel(params.x, + params.y, + output_size, + padding_l, + padding_t, + in_H, + in_W, + output_size_1, + output_size_2, + output_size_3, + input_stride_0, + input_stride_1, + input_stride_2, + input_stride_3); + }; + }; + + return result; +} + +} // namespace pad_reflection + +} // namespace solver + +} // namespace miopen diff --git a/test/cpu_pad_reflection.hpp b/test/cpu_pad_reflection.hpp new file mode 100644 index 0000000000..55e6935d8b --- /dev/null +++ b/test/cpu_pad_reflection.hpp @@ -0,0 +1,96 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ +#ifndef GUARD_CPU_PAD_REFLECTION_HPP +#define GUARD_CPU_PAD_REFLECTION_HPP + +#include "tensor_holder.hpp" + +template +void cpu_pad_reflection(tensor input_tensor, + tensor& ref_output_tensor, + const std::vector padding) +{ + long padding_l = padding[0]; + long padding_t = padding[2]; + auto input_dims = input_tensor.desc.GetLengths(); + auto output_dims = ref_output_tensor.desc.GetLengths(); + auto input = input_tensor.data.data(); + auto output = ref_output_tensor.data.data(); + auto input_strides = input_tensor.desc.GetStrides(); + auto output_size = + std::accumulate(output_dims.begin(), output_dims.end(), 1L, std::multiplies()); + + size_t in_H = input_dims[2]; + size_t in_W = input_dims[3]; + + for(size_t gid = 0; gid < output_size; ++gid) + { + + long n, c, h, w; + // GET_NCHW(n, c, h, w, gid, output); + ulong nch = (gid) / output_dims[3]; + w = (gid) % output_dims[3]; + ulong nc = nch / output_dims[2]; + h = nch % output_dims[2]; + n = nc / output_dims[1]; + c = nc % output_dims[1]; + + long in_start_x = max(0L, -padding_l); + long in_start_y = max(0L, -padding_t); + long out_start_x = max(0L, padding_l); + long out_start_y = max(0L, padding_t); + + if(w < padding_l) + { + w = padding_l * 2 - w; + } + else if(padding_l <= w && w < in_W + padding_l) + { + } + else + { + w = (in_W + padding_l - 1) * 2 - w; + } + w = w - out_start_x + in_start_x; + + if(h < padding_t) + { + h = padding_t * 2 - h; + } + else if(padding_t <= h && h < in_H + padding_t) + { + } + else + { + h = (in_H + padding_t - 1) * 2 - h; + } + h = h - out_start_y + in_start_y; + + output[gid] = input[(input_strides[3] * (w)) + (input_strides[2] * (h)) + + (input_strides[1] * (c)) + (input_strides[0] * (n)) + 0]; + } +} +#endif diff --git a/test/gtest/pad_reflection.cpp b/test/gtest/pad_reflection.cpp new file mode 100644 index 0000000000..ad4563152c --- /dev/null +++ b/test/gtest/pad_reflection.cpp @@ -0,0 +1,123 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ + +#include "pad_reflection.hpp" +#include + +MIOPEN_DECLARE_ENV_VAR_STR(MIOPEN_TEST_FLOAT_ARG) +MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_TEST_ALL) + +namespace pad_reflection { + +std::string GetFloatArg() +{ + const auto& tmp = env::value(MIOPEN_TEST_FLOAT_ARG); + if(tmp.empty()) + { + return ""; + } + return tmp; +} + +struct GPU_PadReflection_FP32 : PadReflectionTest +{ +}; + +struct GPU_PadReflection_FP16 : PadReflectionTest +{ +}; + +struct GPU_PadReflection_BFP16 : PadReflectionTest +{ +}; + +} // namespace pad_reflection + +using pad_reflection::GPU_PadReflection_BFP16; +using pad_reflection::GPU_PadReflection_FP16; +using pad_reflection::GPU_PadReflection_FP32; + +TEST_P(GPU_PadReflection_FP32, Test) +{ + if(!MIOPEN_TEST_ALL || + (env::enabled(MIOPEN_TEST_ALL) && env::value(MIOPEN_TEST_FLOAT_ARG) == "--float")) + { + RunTest(); + Verify(); + } + else + { + GTEST_SKIP(); + } +}; + +TEST_P(GPU_PadReflection_FP16, Test) +{ + if(!MIOPEN_TEST_ALL || + (env::enabled(MIOPEN_TEST_ALL) && env::value(MIOPEN_TEST_FLOAT_ARG) == "--half")) + { + RunTest(); + Verify(); + } + else + { + GTEST_SKIP(); + } +}; + +TEST_P(GPU_PadReflection_BFP16, Test) +{ + if(!MIOPEN_TEST_ALL || + (env::enabled(MIOPEN_TEST_ALL) && env::value(MIOPEN_TEST_FLOAT_ARG) == "--bfloat16")) + { + RunTest(); + Verify(); + } + else + { + GTEST_SKIP(); + } +}; + +INSTANTIATE_TEST_SUITE_P(Smoke, + GPU_PadReflection_FP32, + testing::ValuesIn(PadReflectionSmokeTestFloatConfigs())); +INSTANTIATE_TEST_SUITE_P(Smoke, + GPU_PadReflection_FP16, + testing::ValuesIn(PadReflectionSmokeTestFloatConfigs())); +INSTANTIATE_TEST_SUITE_P(Smoke, + GPU_PadReflection_BFP16, + testing::ValuesIn(PadReflectionSmokeTestFloatConfigs())); + +INSTANTIATE_TEST_SUITE_P(Full, + GPU_PadReflection_FP32, + testing::ValuesIn(PadReflectionTestFloatConfigs())); +INSTANTIATE_TEST_SUITE_P(Full, + GPU_PadReflection_FP16, + testing::ValuesIn(PadReflectionTestFloatConfigs())); +INSTANTIATE_TEST_SUITE_P(Full, + GPU_PadReflection_BFP16, + testing::ValuesIn(PadReflectionTestFloatConfigs())); diff --git a/test/gtest/pad_reflection.hpp b/test/gtest/pad_reflection.hpp new file mode 100644 index 0000000000..e1abac149d --- /dev/null +++ b/test/gtest/pad_reflection.hpp @@ -0,0 +1,198 @@ +/******************************************************************************* + * + * MIT License + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + *******************************************************************************/ + +#include "../driver/tensor_driver.hpp" +#include "cpu_pad_reflection.hpp" +#include "get_handle.hpp" +#include "random.hpp" +#include "tensor_holder.hpp" +#include "verify.hpp" +#include +#include +#include + +struct PadReflectionCase +{ + size_t N; + size_t C; + size_t D; + size_t H; + size_t W; + size_t padding[4]; + friend std::ostream& operator<<(std::ostream& os, const PadReflectionCase& tc) + { + return os << " N:" << tc.N << " C:" << tc.C << " D:" << tc.D << " H:" << tc.H + << " W:" << tc.W << " Padding:" << tc.padding[0] << " " << tc.padding[1] << " " + << tc.padding[2] << " " << tc.padding[3]; + } + + std::vector GetInput() + { + if((N != 0) && (C != 0) && (D != 0) && (H != 0) && (W != 0)) + { + return std::vector({N, C, D, H, W}); + } + else if((N != 0) && (C != 0) && (H != 0) && (W != 0)) + { + return std::vector({N, C, H, W}); + } + else if((N != 0) && (C != 0) && (W != 0)) + { + return std::vector({N, C, W}); + } + else if((N != 0) && (W != 0)) + { + return std::vector({N, W}); + } + else if((N != 0)) + { + return std::vector({N}); + } + else + { + std::cout << "Error Input Tensor Lengths\n" << std::endl; + return std::vector({0}); + } + } + + std::vector GetPadding() + { + std::vector paddingVector; + for(int i = 0; i < 4; ++i) + { + paddingVector.push_back(padding[i]); + } + return paddingVector; + } +}; + +std::vector PadReflectionTestFloatConfigs() +{ // n c d h w padding + // clang-format off + return { + { 48, 8, 0, 512, 512, {1, 1, 1, 1}}, + { 48, 8, 0, 512, 512, {1, 1, 3, 3}}, + { 48, 8, 0, 512, 512, {0, 0, 2, 2}}, + { 16, 311, 0, 98, 512, {1, 1, 1, 1}}, + { 16, 311, 0, 98, 512, {1, 1, 3, 3}}, + { 16, 311, 0, 98, 512, {0, 0, 2, 2}}, + }; + // clang-format on +} + +std::vector PadReflectionSmokeTestFloatConfigs() +{ // n c d h w padding + // clang-format off + return { + { 1, 1, 0, 3, 3, {2}}, + }; + // clang-format on +} + +template +struct PadReflectionTest : public ::testing::TestWithParam +{ +protected: + void SetUp() override + { + auto&& handle = get_handle(); + pad_reflection_config = GetParam(); + auto gen_value = [](auto...) { return prng::gen_descreet_uniform_sign(1e-2, 100); }; + + auto in_dims = pad_reflection_config.GetInput(); + auto padding = pad_reflection_config.GetPadding(); + input = tensor{in_dims}.generate(gen_value); + std::vector out_dims; + + for(int i = 0; i < in_dims.size(); i++) + { + if(i == 2) + { + out_dims.push_back(in_dims[i] + 2 * padding[2]); + } + else if(i == 3) + { + out_dims.push_back(in_dims[i] + 2 * padding[0]); + } + else + { + out_dims.push_back(in_dims[i]); + } + } + + output = tensor{out_dims}; + std::fill(output.begin(), output.end(), std::numeric_limits::quiet_NaN()); + + ref_output = tensor{out_dims}; + std::fill(ref_output.begin(), ref_output.end(), std::numeric_limits::quiet_NaN()); + + input_dev = handle.Write(input.data); + output_dev = handle.Write(output.data); + } + void RunTest() + { + auto&& handle = get_handle(); + auto padding = pad_reflection_config.GetPadding(); + + cpu_pad_reflection(input, ref_output, padding); + miopenStatus_t status; + + status = miopen::PadReflection(handle, + input.desc, + input_dev.get(), + output.desc, + output_dev.get(), + padding.data(), + padding.size()); + + ASSERT_EQ(status, miopenStatusSuccess); + + output.data = handle.Read(output_dev, output.data.size()); + } + + void Verify() + { + // Computation error of fp16 is ~2^13 (=8192) bigger than + // the one of fp32 because mantissa is shorter by 13 bits. + auto tolerance = std::is_same::value ? 1.5e-6 : 8.2e-3; + // bf16 mantissa has 7 bits, by 3 bits shorter than fp16. + if(std::is_same::value) + tolerance *= 8.0; + + auto error = miopen::rms_range(ref_output, output); + ASSERT_EQ(miopen::range_distance(ref_output), miopen::range_distance(output)); + EXPECT_LT(error, tolerance); + } + PadReflectionCase pad_reflection_config; + + tensor input; + tensor output; + + tensor ref_output; + + miopen::Allocator::ManageDataPtr input_dev; + miopen::Allocator::ManageDataPtr output_dev; +};