diff --git a/docs/reference/index.rst b/docs/reference/index.rst
index 90e29ffaa9..23a5f9986c 100644
--- a/docs/reference/index.rst
+++ b/docs/reference/index.rst
@@ -35,3 +35,4 @@ The MIOpen API library is structured as follows:
   * :doc:`ReduceExtreme <../doxygen/html/group__ReduceExtreme>` (experimental)
   * :doc:`Getitem <../doxygen/html/group__getitem>` (experimental)
   * :doc:`ReduceCalculation <../doxygen/html/group__ReduceCalculation>` (experimental)
+  * :doc:`Pad <../doxygen/html/group__pad>` (experimental)
diff --git a/driver/CMakeLists.txt b/driver/CMakeLists.txt
index cd663eb8b4..8d1433ff14 100644
--- a/driver/CMakeLists.txt
+++ b/driver/CMakeLists.txt
@@ -47,6 +47,7 @@ add_executable(MIOpenDriver
     dm_groupnorm.cpp
     dm_layernorm.cpp
     dm_lrn.cpp
+    dm_pad_reflection.cpp
     dm_pool.cpp
     dm_reduce.cpp
     dm_reduceextreme.cpp
diff --git a/driver/dm_pad_reflection.cpp b/driver/dm_pad_reflection.cpp
new file mode 100644
index 0000000000..b036d5ccb6
--- /dev/null
+++ b/driver/dm_pad_reflection.cpp
@@ -0,0 +1,39 @@
+/*******************************************************************************
+ *
+ * MIT License
+ *
+ * Copyright (c) 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *******************************************************************************/
+#include "registry_driver_maker.hpp"
+#include "pad_reflection_driver.hpp"
+static Driver* makeDriver(const std::string& base_arg)
+{
+    if(base_arg == "padreflection")
+        return new PadReflectionDriver<float, float>();
+    if(base_arg == "padreflectionfp16")
+        return new PadReflectionDriver<float16, float>();
+    if(base_arg == "padreflectionbfp16")
+        return new PadReflectionDriver<bfloat16, float>();
+    return nullptr;
+}
+
+REGISTER_DRIVER_MAKER(makeDriver);
diff --git a/driver/driver.hpp b/driver/driver.hpp
index b23df690d1..fbf4cd4e86 100644
--- a/driver/driver.hpp
+++ b/driver/driver.hpp
@@ -175,7 +175,7 @@ inline void PadBufferSize(size_t& sz, int datatype_sz)
            "groupnorm[bfp16|fp16], cat[bfp16|fp16], addlayernorm[bfp16|fp16], "
            "t5layernorm[bfp16|fp16], adam[fp16], ampadam, reduceextreme[bfp16|fp16], "
            "adamw[fp16], ampadamw, transformersadamw[fp16], transformersampadamw, "
-           "getitem[bfp16|fp16], reducecalculation[bfp16|fp16]\n");
+           "getitem[bfp16|fp16], reducecalculation[bfp16|fp16], padreflection[bfp16|fp16]\n");
     exit(0); // NOLINT (concurrency-mt-unsafe)
 }
 
@@ -206,7 +206,9 @@ inline std::string ParseBaseArg(int argc, char* argv[])
        arg != "adamwfp16" && arg != "ampadamw" && arg != "transformersadamw" &&
        arg != "transformersadamwfp16" && arg != "transformersampadamw" && arg != "getitem" &&
        arg != "getitemfp16" && arg != "getitembfp16" && arg != "reducecalculation" &&
-       arg != "reducecalculationfp16" && arg != "reducecalculationbfp16" && arg != "--version")
+       arg != "reducecalculationfp16" && arg != "reducecalculationbfp16" &&
+       arg != "padreflection" && arg != "padreflectionfp16" && arg != "padreflectionbfp16" &&
+       arg != "--version")
     {
         printf("FAILED: Invalid Base Input Argument\n");
         Usage();
diff --git a/driver/pad_reflection_driver.hpp b/driver/pad_reflection_driver.hpp
new file mode 100644
index 0000000000..881cb45c46
--- /dev/null
+++ b/driver/pad_reflection_driver.hpp
@@ -0,0 +1,363 @@
+/*******************************************************************************
+ *
+ * MIT License
+ *
+ * Copyright (c) 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *******************************************************************************/
+#ifndef GUARD_MIOPEN_PAD_REFLECTION_DRIVER_HPP
+#define GUARD_MIOPEN_PAD_REFLECTION_DRIVER_HPP
+
+#include "InputFlags.hpp"
+#include "driver.hpp"
+#include "tensor_driver.hpp"
+#include "timer.hpp"
+#include "random.hpp"
+#include <cstdlib>
+#include <memory>
+#include <miopen/miopen.h>
+#include <miopen/tensor.hpp>
+#include <numeric>
+#include <vector>
+#include <../test/tensor_holder.hpp>
+#include <../test/verify.hpp>
+
+#ifndef MLO_PADREFLECTIONHOST_H_
+#define MLO_PADREFLECTIONHOST_H_
+
+template <typename Tgpu, typename Tcheck>
+void mloPadReflectionRunHost(miopenTensorDescriptor_t inputDesc,
+                             miopenTensorDescriptor_t outputDesc,
+                             Tgpu* input,
+                             Tcheck* outputhost,
+                             std::vector<size_t> padding)
+{
+    auto input_dims  = miopen::deref(inputDesc).GetLengths();
+    auto output_dims = miopen::deref(outputDesc).GetLengths();
+    auto output_numel =
+        std::accumulate(output_dims.begin(), output_dims.end(), 1L, std::multiplies<int64_t>());
+
+    long padding_l     = padding[0];
+    long padding_t     = padding[2];
+    auto input_strides = miopen::deref(inputDesc).GetStrides();
+    size_t in_H        = input_dims[2];
+    size_t in_W        = input_dims[3];
+
+    for(size_t gid = 0; gid < output_numel; gid++)
+    {
+        long n, c, h, w;
+        // GET_NCHW(n, c, h, w, gid, output);
+        ulong nch = (gid) / output_dims[3];
+        w         = (gid) % output_dims[3];
+        ulong nc  = nch / output_dims[2];
+        h         = nch % output_dims[2];
+        n         = nc / output_dims[1];
+        c         = nc % output_dims[1];
+
+        long in_start_x  = max(0L, -padding_l);
+        long in_start_y  = max(0L, -padding_t);
+        long out_start_x = max(0L, padding_l);
+        long out_start_y = max(0L, padding_t);
+
+        if(w < padding_l)
+        {
+            w = padding_l * 2 - w;
+        }
+        else if(padding_l <= w && w < in_W + padding_l)
+        {
+        }
+        else
+        {
+            w = (in_W + padding_l - 1) * 2 - w;
+        }
+        w = w - out_start_x + in_start_x;
+
+        if(h < padding_t)
+        {
+            h = padding_t * 2 - h;
+        }
+        else if(padding_t <= h && h < in_H + padding_t)
+        {
+        }
+        else
+        {
+            h = (in_H + padding_t - 1) * 2 - h;
+        }
+        h = h - out_start_y + in_start_y;
+
+        outputhost[gid] = input[(input_strides[3] * (w)) + (input_strides[2] * (h)) +
+                                (input_strides[1] * (c)) + (input_strides[0] * (n)) + 0];
+    }
+}
+#endif
+
+template <typename Tgpu, typename Tref>
+class PadReflectionDriver : public Driver
+{
+public:
+    PadReflectionDriver() : Driver()
+    {
+        miopenCreateTensorDescriptor(&inputDesc);
+        miopenCreateTensorDescriptor(&outputDesc);
+
+        data_type = miopen_type<Tgpu>{};
+    }
+
+    int AddCmdLineArgs() override;
+    int ParseCmdLineArgs(int argc, char* argv[]) override;
+    InputFlags& GetInputFlags() override { return inflags; }
+
+    int GetandSetData() override;
+
+    int AllocateBuffersAndCopy() override;
+
+    int RunForwardGPU() override;
+    int RunForwardCPU();
+
+    int RunBackwardGPU() override;
+
+    Tref GetTolerance();
+    int VerifyBackward() override;
+    int VerifyForward() override;
+    ~PadReflectionDriver() override
+    {
+        miopenDestroyTensorDescriptor(inputDesc);
+        miopenDestroyTensorDescriptor(outputDesc);
+    }
+
+private:
+    InputFlags inflags;
+
+    int forw;
+
+    miopenTensorDescriptor_t inputDesc;
+    miopenTensorDescriptor_t outputDesc;
+
+    std::unique_ptr<GPUMem> in_dev;
+    std::unique_ptr<GPUMem> out_dev;
+
+    std::vector<Tgpu> in;
+    std::vector<Tgpu> out;
+    std::vector<Tref> outhost;
+
+    std::vector<size_t> padding;
+};
+
+template <typename Tgpu, typename Tref>
+int PadReflectionDriver<Tgpu, Tref>::ParseCmdLineArgs(int argc, char* argv[])
+{
+    inflags.Parse(argc, argv);
+
+    if(inflags.GetValueInt("time") == 1)
+    {
+        miopenEnableProfiling(GetHandle(), true);
+    }
+    return miopenStatusSuccess;
+}
+
+template <typename Tgpu, typename Tref>
+int PadReflectionDriver<Tgpu, Tref>::GetandSetData()
+{
+    std::string padding_str = inflags.GetValueStr("padding");
+    std::stringstream ss(padding_str);
+    std::string token;
+    while(std::getline(ss, token, ','))
+    {
+        padding.push_back(std::stoul(token));
+    }
+
+    if(!(padding.size() == 1 or padding.size() == 4))
+    {
+        std::cerr << "Error Padding Lengths\n" << std::endl;
+    }
+
+    std::vector<int> in_len = inflags.GetValueTensor("DimLengths").lengths;
+
+    SetTensorNd(inputDesc, in_len, data_type);
+
+    std::vector<int> out_len;
+    for(int i = 0; i < in_len.size(); i++)
+    {
+        // If H
+        if(i == 2)
+        {
+            out_len.push_back(in_len[i] + 2 * padding[2]);
+        }
+        // If W
+        else if(i == 3)
+        {
+            out_len.push_back(in_len[i] + 2 * padding[0]);
+        }
+        else
+        {
+            out_len.push_back(in_len[i]);
+        }
+    }
+
+    if(out_len.empty())
+        out_len.push_back(1);
+
+    SetTensorNd(outputDesc, out_len, data_type);
+
+    return 0;
+}
+
+template <typename Tgpu, typename Tref>
+int PadReflectionDriver<Tgpu, Tref>::AddCmdLineArgs()
+{
+    inflags.AddInputFlag("forw", 'F', "1", "Run only Forward Pad Reflection (Default=1)", "int");
+    inflags.AddTensorFlag(
+        "DimLengths", 'D', "256x4x32x32", "The dimensional lengths of the input tensor");
+
+    inflags.AddInputFlag("padding", 'P', "1,1,1,1", "Padding array (Default=1 or 1,1,1,1)", "str");
+
+    inflags.AddInputFlag("iter", 'i', "10", "Number of Iterations (Default=10)", "int");
+    inflags.AddInputFlag("verify", 'V', "1", "Verify Each Layer (Default=1)", "int");
+    inflags.AddInputFlag("time", 't', "0", "Time Each Layer (Default=0)", "int");
+    inflags.AddInputFlag(
+        "wall", 'w', "0", "Wall-clock Time Each Layer, Requires time == 1 (Default=0)", "int");
+
+    return miopenStatusSuccess;
+}
+
+template <typename Tgpu, typename Tref>
+int PadReflectionDriver<Tgpu, Tref>::AllocateBuffersAndCopy()
+{
+    size_t in_sz  = GetTensorSize(inputDesc);
+    size_t out_sz = GetTensorSize(outputDesc);
+
+    uint32_t ctx = 0;
+
+    in_dev  = std::unique_ptr<GPUMem>(new GPUMem(ctx, in_sz, sizeof(Tgpu)));
+    out_dev = std::unique_ptr<GPUMem>(new GPUMem(ctx, out_sz, sizeof(Tgpu)));
+
+    in      = std::vector<Tgpu>(in_sz, static_cast<Tgpu>(0));
+    out     = std::vector<Tgpu>(out_sz, static_cast<Tgpu>(0));
+    outhost = std::vector<Tref>(out_sz, static_cast<Tref>(0));
+
+    for(int i = 0; i < in_sz; i++)
+    {
+        in[i] = prng::gen_A_to_B<Tgpu>(static_cast<Tgpu>(0.0), static_cast<Tgpu>(1.0));
+    }
+
+    if(in_dev->ToGPU(GetStream(), in.data()) != 0)
+        std::cerr << "Error copying (in) to GPU, size: " << in_dev->GetSize() << std::endl;
+
+    if(out_dev->ToGPU(GetStream(), out.data()) != 0)
+        std::cerr << "Error copying (out) to GPU, size: " << out_dev->GetSize() << std::endl;
+
+    return miopenStatusSuccess;
+}
+
+template <typename Tgpu, typename Tref>
+int PadReflectionDriver<Tgpu, Tref>::RunForwardGPU()
+{
+    float kernel_total_time = 0;
+    float kernel_first_time = 0;
+
+    Timer t;
+    START_TIME
+
+    for(int i = 0; i < inflags.GetValueInt("iter"); i++)
+    {
+        miopenPadReflection(GetHandle(),
+                            inputDesc,
+                            in_dev->GetMem(),
+                            outputDesc,
+                            out_dev->GetMem(),
+                            padding.data(),
+                            padding.size());
+
+        float time = 0.0;
+        miopenGetKernelTime(GetHandle(), &time);
+        kernel_total_time += time;
+        if(i == 0)
+            kernel_first_time = time;
+    }
+
+    if(inflags.GetValueInt("time") == 1)
+    {
+        STOP_TIME
+        int iter = inflags.GetValueInt("iter");
+        if(WALL_CLOCK)
+            std::cout << "Wall-clock Time Forward Pad Reflection Elapsed: " << t.gettime_ms() / iter
+                      << " ms\n";
+
+        float kernel_average_time =
+            iter > 1 ? (kernel_total_time - kernel_first_time) / (iter - 1) : kernel_first_time;
+        std::cout << "GPU Kernel Time Forward Pad Reflection Elapsed: " << kernel_average_time
+                  << " ms\n";
+    }
+
+    if(out_dev->FromGPU(GetStream(), out.data()) != 0)
+        std::cerr << "Error copying (out_dev) from GPU, size: " << out_dev->GetSize() << std::endl;
+
+    return miopenStatusSuccess;
+}
+
+template <typename Tgpu, typename Tref>
+int PadReflectionDriver<Tgpu, Tref>::RunForwardCPU()
+{
+    mloPadReflectionRunHost<Tgpu, Tref>(inputDesc, outputDesc, in.data(), outhost.data(), padding);
+
+    return miopenStatusSuccess;
+}
+
+template <typename Tgpu, typename Tref>
+int PadReflectionDriver<Tgpu, Tref>::RunBackwardGPU()
+{
+    return miopenStatusSuccess;
+}
+
+template <typename Tgpu, typename Tref>
+Tref PadReflectionDriver<Tgpu, Tref>::GetTolerance()
+{
+    return 0;
+}
+
+template <typename Tgpu, typename Tref>
+int PadReflectionDriver<Tgpu, Tref>::VerifyForward()
+{
+    RunForwardCPU();
+    const Tref tolerance = GetTolerance();
+    auto error           = miopen::rms_range(outhost, out);
+
+    if(std::abs(static_cast<float>(error)) != 0.0f)
+    {
+        std::cout << "Pad Reflection FAILED: " << error << " > " << tolerance << std::endl;
+        return EC_VerifyFwd;
+    }
+    else
+    {
+        std::cout << "Pad Reflection Verifies OK on CPU reference (" << error << " == " << tolerance
+                  << ')' << std::endl;
+    }
+
+    return miopenStatusSuccess;
+}
+
+template <typename Tgpu, typename Tref>
+int PadReflectionDriver<Tgpu, Tref>::VerifyBackward()
+{
+    return miopenStatusSuccess;
+}
+
+#endif // GUARD_MIOPEN_PAD_REFLECTION_DRIVER_HPP
diff --git a/include/miopen/miopen.h b/include/miopen/miopen.h
index 3b9bbeccc1..7d5b420687 100644
--- a/include/miopen/miopen.h
+++ b/include/miopen/miopen.h
@@ -2,7 +2,7 @@
  *
  * MIT License
  *
- * Copyright (c) 2023 Advanced Micro Devices, Inc.
+ * Copyright (c) 2024 Advanced Micro Devices, Inc.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -70,6 +70,7 @@
  * @defgroup SGD
  * @defgroup getitem
  * @defgroup ReduceCalculation
+ * @defgroup pad
  *
  */
 
@@ -5921,6 +5922,35 @@ miopenReduceCalculationForward(miopenHandle_t handle,
 
 /** @} */
 // CLOSEOUT REDUCE CALCULATION DOXYGEN GROUP
+#endif
+
+#ifdef MIOPEN_BETA_API
+// Padreflection APIs
+/** @addtogroup pad
+ *
+ *  @{
+ */
+/*! @brief Add padding by the reflection of the tensor
+ *
+ * @param handle                   MIOpen handle (input)
+ * @param xDesc                    Tensor descriptor for data input tensor x (input)
+ * @param x                        Data tensor x (input)
+ * @param yDesc                    Tensor descriptor for output data tensor y (input)
+ * @param y                        Data tensor y (output)
+ * @param padding                  Padding array contain 1 or 4 elements (input)
+ * @param num_padding              Number of elements in padding, equals 1 or 4 (input)
+ * @return                         miopenStatus_t
+ */
+MIOPEN_EXPORT miopenStatus_t miopenPadReflection(miopenHandle_t handle,
+                                                 const miopenTensorDescriptor_t xDesc,
+                                                 const void* x,
+                                                 const miopenTensorDescriptor_t yDesc,
+                                                 void* y,
+                                                 const size_t* padding,
+                                                 const size_t num_padding);
+
+/** @} */
+// CLOSEOUT pad DOXYGEN GROUP
 #endif // MIOPEN_BETA_API
 
 #ifdef MIOPEN_BETA_API
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 77acf3f7d3..70237ab1f2 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -159,6 +159,8 @@ set( MIOpen_Source
     mha/problem_description.cpp
     op_args.cpp
     operator.cpp
+    pad_reflection/problem_description.cpp
+    pad_reflection_api.cpp
     performance_config.cpp
     pooling/problem_description.cpp
     pooling_api.cpp
@@ -294,6 +296,7 @@ set( MIOpen_Source
     solver/layernorm/forward_t5layernorm.cpp
     solver/mha/mha_solver_backward.cpp
     solver/mha/mha_solver_forward.cpp
+    solver/pad_reflection/pad_reflection.cpp
     solver/pooling/forward2d.cpp
     solver/pooling/forwardNaive.cpp
     solver/pooling/forwardNd.cpp
@@ -503,6 +506,7 @@ if( MIOPEN_BACKEND MATCHES "OpenCL" OR MIOPEN_BACKEND STREQUAL "HIPOC" OR MIOPEN
         kernels/MIOpenLRNBwd.cl
         kernels/MIOpenLRNFwd.cl
         kernels/MIOpenNeuron.cl
+        kernels/MIOpenPadReflection.cpp
         kernels/MIOpenPooling.cl
         kernels/MIOpenPoolingBwd.cl
         kernels/MIOpenPoolingBwdND.cl
@@ -650,6 +654,7 @@ if( MIOPEN_BACKEND MATCHES "OpenCL" OR MIOPEN_BACKEND STREQUAL "HIPOC" OR MIOPEN
         hip/hip_build_utils.cpp
         hip/batched_transpose_sol.cpp
         hip/general_tensor_reorder_sol.cpp
+        pad_reflection.cpp
         pooling.cpp
         t5layernorm.cpp
         ocl/fusionopconvocl.cpp
diff --git a/src/include/miopen/pad_reflection.hpp b/src/include/miopen/pad_reflection.hpp
new file mode 100644
index 0000000000..017d020340
--- /dev/null
+++ b/src/include/miopen/pad_reflection.hpp
@@ -0,0 +1,44 @@
+/*******************************************************************************
+ *
+ * MIT License
+ *
+ * Copyright (c) 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *******************************************************************************/
+#ifndef MIOPEN_PAD_REFLECTION_HPP_
+#define MIOPEN_PAD_REFLECTION_HPP_
+
+#include <miopen/common.hpp>
+
+namespace miopen {
+
+struct Handle;
+struct TensorDescriptor;
+
+miopenStatus_t PadReflection(Handle& handle,
+                             const TensorDescriptor& xDesc,
+                             ConstData_t x,
+                             const TensorDescriptor& yDesc,
+                             Data_t y,
+                             const size_t* padding,
+                             size_t num_padding);
+} // namespace miopen
+#endif // MIOPEN_PAD_REFLECTION_HPP_
diff --git a/src/include/miopen/pad_reflection/invoke_params.hpp b/src/include/miopen/pad_reflection/invoke_params.hpp
new file mode 100644
index 0000000000..b64438d83a
--- /dev/null
+++ b/src/include/miopen/pad_reflection/invoke_params.hpp
@@ -0,0 +1,54 @@
+/*******************************************************************************
+ *
+ * MIT License
+ *
+ * Copyright (c) 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *******************************************************************************/
+
+#pragma once
+
+#include <miopen/invoke_params.hpp>
+#include <miopen/tensor.hpp>
+
+namespace miopen {
+namespace pad_reflection {
+
+struct InvokeParams : public miopen::InvokeParams
+{
+    InvokeParams() = default;
+
+    const TensorDescriptor* xDesc = nullptr;
+    const TensorDescriptor* yDesc = nullptr;
+
+    ConstData_t x = nullptr;
+    Data_t y      = nullptr;
+
+    const size_t* padding = nullptr;
+    size_t num_padding    = 0;
+
+    std::size_t GetWorkspaceSize() const { return 0; }
+    Data_t GetWorkspace() const { return nullptr; }
+};
+
+} // namespace pad_reflection
+
+} // namespace miopen
diff --git a/src/include/miopen/pad_reflection/problem_description.hpp b/src/include/miopen/pad_reflection/problem_description.hpp
new file mode 100644
index 0000000000..3172512757
--- /dev/null
+++ b/src/include/miopen/pad_reflection/problem_description.hpp
@@ -0,0 +1,102 @@
+/*******************************************************************************
+ *
+ * MIT License
+ *
+ * Copyright (c) 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *******************************************************************************/
+#pragma once
+
+#include <miopen/activ.hpp>
+#include <miopen/problem_description_base.hpp>
+#include <miopen/tensor.hpp>
+#include <cassert>
+#include <string>
+
+namespace miopen {
+
+struct NetworkConfig;
+
+namespace pad_reflection {
+
+struct ProblemDescription : ProblemDescriptionBase
+{
+    ProblemDescription(const TensorDescriptor& xDesc_,
+                       const TensorDescriptor& yDesc_,
+                       const size_t num_padding_)
+        : xDesc(xDesc_), yDesc(yDesc_), num_padding(num_padding_)
+    {
+    }
+    const TensorDescriptor& GetXDesc() const { return xDesc; }
+    const TensorDescriptor& GetYDesc() const { return yDesc; }
+    size_t GetNumPadding() const { return num_padding; }
+
+    bool IsSameType() const
+    {
+        if(xDesc.GetType() != yDesc.GetType())
+        {
+#if MIOPEN_BUILD_DEV || !MIOPEN_NDEBUG
+            MIOPEN_THROW(miopenStatusBadParm, "Pad Reflection: Tensor types do not match.");
+#else
+            return false;
+#endif
+        }
+        return true;
+    }
+
+    bool IsAllPacked() const
+    {
+        if(!(xDesc.IsPacked() && yDesc.IsPacked()))
+        {
+#if MIOPEN_BUILD_DEV || !MIOPEN_NDEBUG
+            MIOPEN_THROW(miopenStatusBadParm, "Pad Reflection: Unpacked tensors not supported.");
+#else
+            return false;
+#endif
+        }
+        return true;
+    }
+
+    bool IsRightNumPadding() const
+    {
+        if(!(num_padding == 1 || num_padding == 4))
+        {
+#if MIOPEN_BUILD_DEV || !MIOPEN_NDEBUG
+            MIOPEN_THROW(miopenStatusBadParm,
+                         "Pad Reflection: Padding input needs to have 1 or 4 elements only.");
+#else
+            return false;
+#endif
+        }
+        return true;
+    }
+
+    NetworkConfig MakeNetworkConfig() const override;
+
+private:
+    TensorDescriptor xDesc;
+    TensorDescriptor yDesc;
+    size_t num_padding;
+};
+
+} // namespace pad_reflection
+
+} // namespace miopen
diff --git a/src/include/miopen/pad_reflection/solvers.hpp b/src/include/miopen/pad_reflection/solvers.hpp
new file mode 100644
index 0000000000..e2da94bc4b
--- /dev/null
+++ b/src/include/miopen/pad_reflection/solvers.hpp
@@ -0,0 +1,57 @@
+/*******************************************************************************
+ *
+ * MIT License
+ *
+ * Copyright (c) 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *******************************************************************************/
+#pragma once
+
+#include <miopen/pad_reflection/problem_description.hpp>
+#include <miopen/solver.hpp>
+#include <utility>
+
+namespace miopen {
+
+namespace solver {
+
+namespace pad_reflection {
+
+using PadReflectionSolver =
+    NonTunableSolverBase<ExecutionContext, miopen::pad_reflection::ProblemDescription>;
+
+struct PadReflection final : PadReflectionSolver
+{
+    const std::string& SolverDbId() const override { return GetSolverDbId<PadReflection>(); }
+
+    bool IsApplicable(const ExecutionContext& context,
+                      const miopen::pad_reflection::ProblemDescription& problem) const override;
+
+    ConvSolution
+    GetSolution(const ExecutionContext& context,
+                const miopen::pad_reflection::ProblemDescription& problem) const override;
+};
+
+} // namespace pad_reflection
+
+} // namespace solver
+
+} // namespace miopen
diff --git a/src/include/miopen/solver_id.hpp b/src/include/miopen/solver_id.hpp
index 81c15f6bea..8721a67a14 100644
--- a/src/include/miopen/solver_id.hpp
+++ b/src/include/miopen/solver_id.hpp
@@ -59,7 +59,8 @@ enum class Primitive
     Mha,
     Softmax,
     Adam,
-    Item
+    Item,
+    PadReflection
 };
 
 struct MIOPEN_INTERNALS_EXPORT Id
diff --git a/src/kernels/MIOpenPadReflection.cpp b/src/kernels/MIOpenPadReflection.cpp
new file mode 100644
index 0000000000..9e2379ddfc
--- /dev/null
+++ b/src/kernels/MIOpenPadReflection.cpp
@@ -0,0 +1,120 @@
+/*******************************************************************************
+ *
+ * MIT License
+ *
+ * Copyright (c) 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *******************************************************************************/
+
+#ifndef MIOPEN_DONT_USE_HIP_RUNTIME_HEADERS
+#include <hip/hip_fp16.h>
+#include <hip/hip_runtime.h>
+#endif
+
+#include "float_types.h"
+
+template <typename TI, typename TO>
+__device__ void padReflection2dFwdContiguous(const TI* __restrict__ input,
+                                             TO* __restrict__ output,
+                                             uint64_t output_size,
+                                             long padding_left,
+                                             long padding_top,
+                                             const size_t in_H,
+                                             const size_t in_W,
+                                             const size_t output_size_1,
+                                             const size_t output_size_2,
+                                             const size_t output_size_3,
+                                             const size_t input_stride_0,
+                                             const size_t input_stride_1,
+                                             const size_t input_stride_2,
+                                             const size_t input_stride_3)
+{
+    const size_t gid = threadIdx.x + blockIdx.x * blockDim.x;
+    if(gid >= output_size)
+        return;
+
+    long n, c, h, w;
+    ulong nch = (gid) / output_size_3;
+    w         = (gid) % output_size_3;
+    ulong nc  = nch / output_size_2;
+    h         = nch % output_size_2;
+    n         = nc / output_size_1;
+    c         = nc % output_size_1;
+
+    long in_start_x  = max(0L, -padding_left);
+    long in_start_y  = max(0L, -padding_top);
+    long out_start_x = max(0L, padding_left);
+    long out_start_y = max(0L, padding_top);
+
+    if(w < padding_left)
+    {
+        w = padding_left * 2 - w;
+    }
+    else if(!(padding_left <= w && w < in_W + padding_left))
+    {
+        w = (in_W + padding_left - 1) * 2 - w;
+    }
+    w = w - out_start_x + in_start_x;
+
+    if(h < padding_top)
+    {
+        h = padding_top * 2 - h;
+    }
+    else if(!(padding_top <= h && h < in_H + padding_top))
+    {
+        h = (in_H + padding_top - 1) * 2 - h;
+    }
+    h = h - out_start_y + in_start_y;
+
+    output[gid] = input[(input_stride_3 * (w)) + (input_stride_2 * (h)) + (input_stride_1 * (c)) +
+                        (input_stride_0 * (n)) + 0];
+}
+
+extern "C" __global__ void PadReflection2dFwdContiguous(const INPUT_TYPE* __restrict__ input,
+                                                        OUTPUT_TYPE* __restrict__ output,
+                                                        uint64_t output_size,
+                                                        long padding_left,
+                                                        long padding_top,
+                                                        const size_t in_H,
+                                                        const size_t in_W,
+                                                        const size_t output_size_1,
+                                                        const size_t output_size_2,
+                                                        const size_t output_size_3,
+                                                        const size_t input_stride_0,
+                                                        const size_t input_stride_1,
+                                                        const size_t input_stride_2,
+                                                        const size_t input_stride_3)
+{
+    padReflection2dFwdContiguous<INPUT_TYPE, OUTPUT_TYPE>(input,
+                                                          output,
+                                                          output_size,
+                                                          padding_left,
+                                                          padding_top,
+                                                          in_H,
+                                                          in_W,
+                                                          output_size_1,
+                                                          output_size_2,
+                                                          output_size_3,
+                                                          input_stride_0,
+                                                          input_stride_1,
+                                                          input_stride_2,
+                                                          input_stride_3);
+}
\ No newline at end of file
diff --git a/src/pad_reflection.cpp b/src/pad_reflection.cpp
new file mode 100644
index 0000000000..4605984f90
--- /dev/null
+++ b/src/pad_reflection.cpp
@@ -0,0 +1,67 @@
+/*******************************************************************************
+ *
+ * MIT License
+ *
+ * Copyright (c) 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *******************************************************************************/
+
+#include <miopen/datatype.hpp>
+#include <miopen/find_solution.hpp>
+#include <miopen/float_equal.hpp>
+#include <miopen/kernel_cache.hpp>
+#include <miopen/pad_reflection/invoke_params.hpp>
+#include <miopen/pad_reflection/solvers.hpp>
+#include <miopen/pad_reflection.hpp>
+#include <miopen/tensor.hpp>
+
+namespace miopen {
+
+miopenStatus_t PadReflection(Handle& handle,
+                             const TensorDescriptor& xDesc,
+                             ConstData_t x,
+                             const TensorDescriptor& yDesc,
+                             Data_t y,
+                             const size_t* padding,
+                             const size_t num_padding)
+{
+    const auto problem = pad_reflection::ProblemDescription{xDesc, yDesc, num_padding};
+
+    const auto invoke_params = [&]() {
+        auto tmp        = pad_reflection::InvokeParams{};
+        tmp.type        = InvokeType::Run;
+        tmp.xDesc       = &xDesc;
+        tmp.yDesc       = &yDesc;
+        tmp.x           = x;
+        tmp.y           = y;
+        tmp.padding     = padding;
+        tmp.num_padding = num_padding;
+        return tmp;
+    }();
+
+    const auto algo    = AlgorithmName{"PadReflection"};
+    const auto solvers = solver::SolverContainer<solver::pad_reflection::PadReflection>{};
+    solvers.ExecutePrimitive(handle, problem, algo, invoke_params);
+
+    return miopenStatusSuccess;
+}
+
+} // namespace miopen
diff --git a/src/pad_reflection/problem_description.cpp b/src/pad_reflection/problem_description.cpp
new file mode 100644
index 0000000000..403b9e913b
--- /dev/null
+++ b/src/pad_reflection/problem_description.cpp
@@ -0,0 +1,54 @@
+/*******************************************************************************
+ *
+ * MIT License
+ *
+ * Copyright (c) 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *******************************************************************************/
+
+#include <miopen/pad_reflection/problem_description.hpp>
+#include <miopen/names.hpp>
+
+#include <sstream>
+
+namespace miopen {
+
+namespace pad_reflection {
+
+NetworkConfig ProblemDescription::MakeNetworkConfig() const
+{
+    auto xlength = xDesc.GetLengths();
+    auto ylength = yDesc.GetLengths();
+
+    auto output_size = std::accumulate(
+        ylength.begin(), ylength.end(), static_cast<size_t>(1), std::multiplies<size_t>());
+    auto dtype = xDesc.GetType();
+
+    std::ostringstream ss;
+
+    ss << "dtype" << dtype;
+    ss << "output_size" << output_size;
+    return NetworkConfig{ss.str()};
+}
+
+} // namespace pad_reflection
+
+} // namespace miopen
diff --git a/src/pad_reflection_api.cpp b/src/pad_reflection_api.cpp
new file mode 100644
index 0000000000..bbe94d3ec6
--- /dev/null
+++ b/src/pad_reflection_api.cpp
@@ -0,0 +1,52 @@
+/*******************************************************************************
+ *
+ * MIT License
+ *
+ * Copyright (c) 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *******************************************************************************/
+
+#include <miopen/pad_reflection.hpp>
+#include <miopen/errors.hpp>
+#include <miopen/handle.hpp>
+#include <miopen/logger.hpp>
+#include <miopen/tensor_ops.hpp>
+
+extern "C" miopenStatus_t miopenPadReflection(miopenHandle_t handle,
+                                              const miopenTensorDescriptor_t xDesc,
+                                              const void* x,
+                                              const miopenTensorDescriptor_t yDesc,
+                                              void* y,
+                                              const size_t* padding,
+                                              const size_t num_padding)
+{
+    MIOPEN_LOG_FUNCTION(handle, xDesc, x, yDesc, y, padding);
+
+    return miopen::try_([&] {
+        miopen::PadReflection(miopen::deref(handle),
+                              miopen::deref(xDesc),
+                              DataCast(x),
+                              miopen::deref(yDesc),
+                              DataCast(y),
+                              padding,
+                              num_padding);
+    });
+}
diff --git a/src/solver.cpp b/src/solver.cpp
index 6b451ca498..47d1ab33bd 100644
--- a/src/solver.cpp
+++ b/src/solver.cpp
@@ -34,6 +34,7 @@
 #include <miopen/groupnorm/solvers.hpp>
 #include <miopen/getitem/solvers.hpp>
 #include <miopen/layernorm/solvers.hpp>
+#include <miopen/pad_reflection/solvers.hpp>
 #include <miopen/pooling/solvers.hpp>
 #include <miopen/reduce/solvers.hpp>
 #include <miopen/mha/solvers.hpp>
@@ -673,6 +674,9 @@ inline SolverRegistrar::SolverRegistrar(IdRegistryData& registry)
              fusion::ConvWinoFuryRxSFused<2, 3>{}.SolverDbId(),
              miopenConvolutionAlgoWinograd);
 
+    Register(
+        registry, ++id, Primitive::PadReflection, pad_reflection::PadReflection{}.SolverDbId());
+
     // IMPORTANT: New solvers should be added to the end of the function!
 }
 
diff --git a/src/solver/pad_reflection/pad_reflection.cpp b/src/solver/pad_reflection/pad_reflection.cpp
new file mode 100644
index 0000000000..b5e85b15bb
--- /dev/null
+++ b/src/solver/pad_reflection/pad_reflection.cpp
@@ -0,0 +1,152 @@
+/*******************************************************************************
+ *
+ * MIT License
+ *
+ * Copyright (c) 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *******************************************************************************/
+
+#include <cstddef>
+#include <miopen/datatype.hpp>
+#include <miopen/kernel_build_params.hpp>
+#include <miopen/pad_reflection/invoke_params.hpp>
+#include <miopen/pad_reflection/solvers.hpp>
+#include <miopen/pad_reflection.hpp>
+#include <miopen/target_properties.hpp>
+
+#define LOCAL_SIZE 256
+
+namespace miopen {
+
+namespace solver {
+
+namespace pad_reflection {
+
+bool PadReflection::IsApplicable([[maybe_unused]] const ExecutionContext& context,
+                                 const miopen::pad_reflection::ProblemDescription& problem) const
+{
+    if(!problem.IsSameType())
+        return false;
+    if(!problem.IsAllPacked())
+        return false;
+    if(!problem.IsRightNumPadding())
+        return false;
+    return true;
+}
+
+ConvSolution
+PadReflection::GetSolution([[maybe_unused]] const ExecutionContext& context,
+                           const miopen::pad_reflection::ProblemDescription& problem) const
+{
+    auto result = ConvSolution{miopenStatusSuccess};
+
+    auto input_dtype  = miopen::GetDataType(problem.GetXDesc().GetType());
+    auto output_dtype = miopen::GetDataType(problem.GetYDesc().GetType());
+    auto xdims        = problem.GetXDesc().GetLengths();
+    auto ydims        = problem.GetYDesc().GetLengths();
+
+    {
+        auto kernel = KernelInfo{};
+
+        kernel.kernel_file = "MIOpenPadReflection.cpp";
+        kernel.kernel_name = "PadReflection2dFwdContiguous";
+        auto output_numel =
+            std::accumulate(ydims.begin(), ydims.end(), 1ULL, std::multiplies<size_t>());
+
+        const auto build_params = KernelBuildParameters{
+            {"INPUT_TYPE", input_dtype == "bfloat16" ? "ushort" : input_dtype},
+            {"OUTPUT_TYPE", output_dtype == "bfloat16" ? "ushort" : output_dtype},
+        };
+
+        kernel.comp_options = build_params.GenerateFor(kbp::HIP{});
+
+        size_t xlocalsize = LOCAL_SIZE;
+        size_t xgridsize  = AlignUp(output_numel, xlocalsize);
+        size_t ylocalsize = 1;
+        size_t ygridsize  = 1;
+        size_t zlocalsize = 1;
+        size_t zgridsize  = 1;
+        kernel.l_wk.push_back(xlocalsize);
+        kernel.l_wk.push_back(ylocalsize);
+        kernel.l_wk.push_back(zlocalsize);
+
+        kernel.g_wk.push_back(xgridsize);
+        kernel.g_wk.push_back(ygridsize);
+        kernel.g_wk.push_back(zgridsize);
+
+        result.construction_params.push_back(kernel);
+    }
+    result.invoker_factory = [](const std::vector<Kernel>& kernels) {
+        return [=](const Handle& handle_, const AnyInvokeParams& raw_params) {
+            decltype(auto) kernel = handle_.Run(kernels[0]);
+            decltype(auto) params = raw_params.CastTo<miopen::pad_reflection::InvokeParams>();
+
+            auto xdims = params.xDesc->GetLengths();
+            auto ydims = params.yDesc->GetLengths();
+
+            auto xstrides = params.xDesc->GetStrides();
+
+            auto output_size =
+                std::accumulate(ydims.begin(), ydims.end(), 1ULL, std::multiplies<size_t>());
+
+            auto padding     = params.padding;
+            auto num_padding = params.num_padding;
+            long padding_l   = padding[0];
+            long padding_t   = padding[0];
+            if(num_padding == 4)
+            {
+                padding_l = padding[0];
+                padding_t = padding[2];
+            }
+            size_t in_H           = xdims[2];
+            size_t in_W           = xdims[3];
+            size_t output_size_1  = ydims[1];
+            size_t output_size_2  = ydims[2];
+            size_t output_size_3  = ydims[3];
+            size_t input_stride_0 = xstrides[0];
+            size_t input_stride_1 = xstrides[1];
+            size_t input_stride_2 = xstrides[2];
+            size_t input_stride_3 = xstrides[3];
+            kernel(params.x,
+                   params.y,
+                   output_size,
+                   padding_l,
+                   padding_t,
+                   in_H,
+                   in_W,
+                   output_size_1,
+                   output_size_2,
+                   output_size_3,
+                   input_stride_0,
+                   input_stride_1,
+                   input_stride_2,
+                   input_stride_3);
+        };
+    };
+
+    return result;
+}
+
+} // namespace pad_reflection
+
+} // namespace solver
+
+} // namespace miopen
diff --git a/test/cpu_pad_reflection.hpp b/test/cpu_pad_reflection.hpp
new file mode 100644
index 0000000000..55e6935d8b
--- /dev/null
+++ b/test/cpu_pad_reflection.hpp
@@ -0,0 +1,96 @@
+/*******************************************************************************
+ *
+ * MIT License
+ *
+ * Copyright (c) 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *******************************************************************************/
+#ifndef GUARD_CPU_PAD_REFLECTION_HPP
+#define GUARD_CPU_PAD_REFLECTION_HPP
+
+#include "tensor_holder.hpp"
+
+template <class T>
+void cpu_pad_reflection(tensor<T> input_tensor,
+                        tensor<T>& ref_output_tensor,
+                        const std::vector<size_t> padding)
+{
+    long padding_l     = padding[0];
+    long padding_t     = padding[2];
+    auto input_dims    = input_tensor.desc.GetLengths();
+    auto output_dims   = ref_output_tensor.desc.GetLengths();
+    auto input         = input_tensor.data.data();
+    auto output        = ref_output_tensor.data.data();
+    auto input_strides = input_tensor.desc.GetStrides();
+    auto output_size =
+        std::accumulate(output_dims.begin(), output_dims.end(), 1L, std::multiplies<int64_t>());
+
+    size_t in_H = input_dims[2];
+    size_t in_W = input_dims[3];
+
+    for(size_t gid = 0; gid < output_size; ++gid)
+    {
+
+        long n, c, h, w;
+        // GET_NCHW(n, c, h, w, gid, output);
+        ulong nch = (gid) / output_dims[3];
+        w         = (gid) % output_dims[3];
+        ulong nc  = nch / output_dims[2];
+        h         = nch % output_dims[2];
+        n         = nc / output_dims[1];
+        c         = nc % output_dims[1];
+
+        long in_start_x  = max(0L, -padding_l);
+        long in_start_y  = max(0L, -padding_t);
+        long out_start_x = max(0L, padding_l);
+        long out_start_y = max(0L, padding_t);
+
+        if(w < padding_l)
+        {
+            w = padding_l * 2 - w;
+        }
+        else if(padding_l <= w && w < in_W + padding_l)
+        {
+        }
+        else
+        {
+            w = (in_W + padding_l - 1) * 2 - w;
+        }
+        w = w - out_start_x + in_start_x;
+
+        if(h < padding_t)
+        {
+            h = padding_t * 2 - h;
+        }
+        else if(padding_t <= h && h < in_H + padding_t)
+        {
+        }
+        else
+        {
+            h = (in_H + padding_t - 1) * 2 - h;
+        }
+        h = h - out_start_y + in_start_y;
+
+        output[gid] = input[(input_strides[3] * (w)) + (input_strides[2] * (h)) +
+                            (input_strides[1] * (c)) + (input_strides[0] * (n)) + 0];
+    }
+}
+#endif
diff --git a/test/gtest/pad_reflection.cpp b/test/gtest/pad_reflection.cpp
new file mode 100644
index 0000000000..ad4563152c
--- /dev/null
+++ b/test/gtest/pad_reflection.cpp
@@ -0,0 +1,123 @@
+/*******************************************************************************
+ *
+ * MIT License
+ *
+ * Copyright (c) 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *******************************************************************************/
+
+#include "pad_reflection.hpp"
+#include <miopen/env.hpp>
+
+MIOPEN_DECLARE_ENV_VAR_STR(MIOPEN_TEST_FLOAT_ARG)
+MIOPEN_DECLARE_ENV_VAR_BOOL(MIOPEN_TEST_ALL)
+
+namespace pad_reflection {
+
+std::string GetFloatArg()
+{
+    const auto& tmp = env::value(MIOPEN_TEST_FLOAT_ARG);
+    if(tmp.empty())
+    {
+        return "";
+    }
+    return tmp;
+}
+
+struct GPU_PadReflection_FP32 : PadReflectionTest<float>
+{
+};
+
+struct GPU_PadReflection_FP16 : PadReflectionTest<half>
+{
+};
+
+struct GPU_PadReflection_BFP16 : PadReflectionTest<bfloat16>
+{
+};
+
+} // namespace pad_reflection
+
+using pad_reflection::GPU_PadReflection_BFP16;
+using pad_reflection::GPU_PadReflection_FP16;
+using pad_reflection::GPU_PadReflection_FP32;
+
+TEST_P(GPU_PadReflection_FP32, Test)
+{
+    if(!MIOPEN_TEST_ALL ||
+       (env::enabled(MIOPEN_TEST_ALL) && env::value(MIOPEN_TEST_FLOAT_ARG) == "--float"))
+    {
+        RunTest();
+        Verify();
+    }
+    else
+    {
+        GTEST_SKIP();
+    }
+};
+
+TEST_P(GPU_PadReflection_FP16, Test)
+{
+    if(!MIOPEN_TEST_ALL ||
+       (env::enabled(MIOPEN_TEST_ALL) && env::value(MIOPEN_TEST_FLOAT_ARG) == "--half"))
+    {
+        RunTest();
+        Verify();
+    }
+    else
+    {
+        GTEST_SKIP();
+    }
+};
+
+TEST_P(GPU_PadReflection_BFP16, Test)
+{
+    if(!MIOPEN_TEST_ALL ||
+       (env::enabled(MIOPEN_TEST_ALL) && env::value(MIOPEN_TEST_FLOAT_ARG) == "--bfloat16"))
+    {
+        RunTest();
+        Verify();
+    }
+    else
+    {
+        GTEST_SKIP();
+    }
+};
+
+INSTANTIATE_TEST_SUITE_P(Smoke,
+                         GPU_PadReflection_FP32,
+                         testing::ValuesIn(PadReflectionSmokeTestFloatConfigs()));
+INSTANTIATE_TEST_SUITE_P(Smoke,
+                         GPU_PadReflection_FP16,
+                         testing::ValuesIn(PadReflectionSmokeTestFloatConfigs()));
+INSTANTIATE_TEST_SUITE_P(Smoke,
+                         GPU_PadReflection_BFP16,
+                         testing::ValuesIn(PadReflectionSmokeTestFloatConfigs()));
+
+INSTANTIATE_TEST_SUITE_P(Full,
+                         GPU_PadReflection_FP32,
+                         testing::ValuesIn(PadReflectionTestFloatConfigs()));
+INSTANTIATE_TEST_SUITE_P(Full,
+                         GPU_PadReflection_FP16,
+                         testing::ValuesIn(PadReflectionTestFloatConfigs()));
+INSTANTIATE_TEST_SUITE_P(Full,
+                         GPU_PadReflection_BFP16,
+                         testing::ValuesIn(PadReflectionTestFloatConfigs()));
diff --git a/test/gtest/pad_reflection.hpp b/test/gtest/pad_reflection.hpp
new file mode 100644
index 0000000000..e1abac149d
--- /dev/null
+++ b/test/gtest/pad_reflection.hpp
@@ -0,0 +1,198 @@
+/*******************************************************************************
+ *
+ * MIT License
+ *
+ * Copyright (c) 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ *******************************************************************************/
+
+#include "../driver/tensor_driver.hpp"
+#include "cpu_pad_reflection.hpp"
+#include "get_handle.hpp"
+#include "random.hpp"
+#include "tensor_holder.hpp"
+#include "verify.hpp"
+#include <gtest/gtest.h>
+#include <miopen/miopen.h>
+#include <miopen/pad_reflection.hpp>
+
+struct PadReflectionCase
+{
+    size_t N;
+    size_t C;
+    size_t D;
+    size_t H;
+    size_t W;
+    size_t padding[4];
+    friend std::ostream& operator<<(std::ostream& os, const PadReflectionCase& tc)
+    {
+        return os << " N:" << tc.N << " C:" << tc.C << " D:" << tc.D << " H:" << tc.H
+                  << " W:" << tc.W << " Padding:" << tc.padding[0] << " " << tc.padding[1] << " "
+                  << tc.padding[2] << " " << tc.padding[3];
+    }
+
+    std::vector<size_t> GetInput()
+    {
+        if((N != 0) && (C != 0) && (D != 0) && (H != 0) && (W != 0))
+        {
+            return std::vector<size_t>({N, C, D, H, W});
+        }
+        else if((N != 0) && (C != 0) && (H != 0) && (W != 0))
+        {
+            return std::vector<size_t>({N, C, H, W});
+        }
+        else if((N != 0) && (C != 0) && (W != 0))
+        {
+            return std::vector<size_t>({N, C, W});
+        }
+        else if((N != 0) && (W != 0))
+        {
+            return std::vector<size_t>({N, W});
+        }
+        else if((N != 0))
+        {
+            return std::vector<size_t>({N});
+        }
+        else
+        {
+            std::cout << "Error Input Tensor Lengths\n" << std::endl;
+            return std::vector<size_t>({0});
+        }
+    }
+
+    std::vector<size_t> GetPadding()
+    {
+        std::vector<size_t> paddingVector;
+        for(int i = 0; i < 4; ++i)
+        {
+            paddingVector.push_back(padding[i]);
+        }
+        return paddingVector;
+    }
+};
+
+std::vector<PadReflectionCase> PadReflectionTestFloatConfigs()
+{ // n c d h w padding
+    // clang-format off
+    return {
+        { 48,   8,    0,  512,  512, {1, 1, 1, 1}},
+        { 48,   8,    0,  512,  512, {1, 1, 3, 3}},
+        { 48,   8,    0,  512,  512, {0, 0, 2, 2}},
+        { 16, 311,    0,   98,  512, {1, 1, 1, 1}},
+        { 16, 311,    0,   98,  512, {1, 1, 3, 3}},
+        { 16, 311,    0,   98,  512, {0, 0, 2, 2}},
+      };
+    // clang-format on
+}
+
+std::vector<PadReflectionCase> PadReflectionSmokeTestFloatConfigs()
+{ // n c d h w padding
+    // clang-format off
+    return {
+        {  1,   1,    0,    3,    3, {2}},
+      };
+    // clang-format on
+}
+
+template <typename T = float>
+struct PadReflectionTest : public ::testing::TestWithParam<PadReflectionCase>
+{
+protected:
+    void SetUp() override
+    {
+        auto&& handle         = get_handle();
+        pad_reflection_config = GetParam();
+        auto gen_value = [](auto...) { return prng::gen_descreet_uniform_sign<T>(1e-2, 100); };
+
+        auto in_dims = pad_reflection_config.GetInput();
+        auto padding = pad_reflection_config.GetPadding();
+        input        = tensor<T>{in_dims}.generate(gen_value);
+        std::vector<size_t> out_dims;
+
+        for(int i = 0; i < in_dims.size(); i++)
+        {
+            if(i == 2)
+            {
+                out_dims.push_back(in_dims[i] + 2 * padding[2]);
+            }
+            else if(i == 3)
+            {
+                out_dims.push_back(in_dims[i] + 2 * padding[0]);
+            }
+            else
+            {
+                out_dims.push_back(in_dims[i]);
+            }
+        }
+
+        output = tensor<T>{out_dims};
+        std::fill(output.begin(), output.end(), std::numeric_limits<T>::quiet_NaN());
+
+        ref_output = tensor<T>{out_dims};
+        std::fill(ref_output.begin(), ref_output.end(), std::numeric_limits<T>::quiet_NaN());
+
+        input_dev  = handle.Write(input.data);
+        output_dev = handle.Write(output.data);
+    }
+    void RunTest()
+    {
+        auto&& handle = get_handle();
+        auto padding  = pad_reflection_config.GetPadding();
+
+        cpu_pad_reflection<T>(input, ref_output, padding);
+        miopenStatus_t status;
+
+        status = miopen::PadReflection(handle,
+                                       input.desc,
+                                       input_dev.get(),
+                                       output.desc,
+                                       output_dev.get(),
+                                       padding.data(),
+                                       padding.size());
+
+        ASSERT_EQ(status, miopenStatusSuccess);
+
+        output.data = handle.Read<T>(output_dev, output.data.size());
+    }
+
+    void Verify()
+    {
+        // Computation error of fp16 is ~2^13 (=8192) bigger than
+        // the one of fp32 because mantissa is shorter by 13 bits.
+        auto tolerance = std::is_same<T, float>::value ? 1.5e-6 : 8.2e-3;
+        // bf16 mantissa has 7 bits, by 3 bits shorter than fp16.
+        if(std::is_same<T, bfloat16>::value)
+            tolerance *= 8.0;
+
+        auto error = miopen::rms_range(ref_output, output);
+        ASSERT_EQ(miopen::range_distance(ref_output), miopen::range_distance(output));
+        EXPECT_LT(error, tolerance);
+    }
+    PadReflectionCase pad_reflection_config;
+
+    tensor<T> input;
+    tensor<T> output;
+
+    tensor<T> ref_output;
+
+    miopen::Allocator::ManageDataPtr input_dev;
+    miopen::Allocator::ManageDataPtr output_dev;
+};