Skip to content

Commit

Permalink
[Fix][TransformTensor] Ignore output buffer when BETA=0 (#3184)
Browse files Browse the repository at this point in the history
  • Loading branch information
atamazov authored Aug 8, 2024
1 parent ccfc34f commit a89a850
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 17 deletions.
49 changes: 39 additions & 10 deletions src/kernels/MIOpenSubTensorOpWithTransformKernel.cl
Original file line number Diff line number Diff line change
Expand Up @@ -100,13 +100,42 @@
#define WORK_STRIDE_1 (WORK_LENGTH_2 * WORK_STRIDE_2)
#define WORK_STRIDE_0 (WORK_LENGTH_1 * WORK_STRIDE_1)

#ifndef SUBTENSOR_OP_WITH_SCALAR
#define SUBTENSOR_OP_WITH_SCALAR BREAK_COMPILE_INTENTIONALLY
#ifndef MIOPEN_BETA_IS_ZERO
#error "MIOPEN_BETA_IS_ZERO must be defined"
#endif
#ifndef MIOPEN_ALPHA_IS_ONE
#error "MIOPEN_ALPHA_IS_ONE must be defined"
#endif

#define SUBTENSOR_OP_WITH_SCALAR_SET(t, a) (t = a)
#define SUBTENSOR_OP_WITH_SCALAR_MULTIPLY(t, a) (t *= a)
#define SUBTENSOR_OP_WITH_SCALAR_MAD(tb, b, ta, a) (tb = mad(ta, a, tb * b))
#if MIOPEN_BETA_IS_ZERO && MIOPEN_ALPHA_IS_ONE
#define SUBTENSOR_OP_WITH_ALPHA_BETA(dst, src) \
do \
{ \
(dst) = (src); \
(void)beta; \
(void)alpha; \
} while(0)
#elif MIOPEN_BETA_IS_ZERO
#define SUBTENSOR_OP_WITH_ALPHA_BETA(dst, src) \
do \
{ \
(dst) = (src)*alpha; \
(void)beta; \
} while(0)
#elif MIOPEN_ALPHA_IS_ONE
#define SUBTENSOR_OP_WITH_ALPHA_BETA(dst, src) \
do \
{ \
(dst) = mad((dst), beta, (src)); \
(void)alpha; \
} while(0)
#else
#define SUBTENSOR_OP_WITH_ALPHA_BETA(dst, src) \
do \
{ \
(dst) = mad((src), alpha, (dst)*beta); \
} while(0)
#endif

__kernel void SubTensorOpWithTransform1d(global _FLOAT* __restrict src,
const _FLOAT alpha,
Expand All @@ -127,7 +156,7 @@ __kernel void SubTensorOpWithTransform1d(global _FLOAT* __restrict src,
uint si = src_stride0 * did0 + src_offset;
uint di = dst_stride0 * did0 + dst_offset;

SUBTENSOR_OP_WITH_SCALAR(dst[di], beta, src[si], alpha);
SUBTENSOR_OP_WITH_ALPHA_BETA(dst[di], src[si]);
}
}

Expand Down Expand Up @@ -159,7 +188,7 @@ __kernel void SubTensorOpWithTransform2d(global _FLOAT* __restrict src,
uint si = src_stride0 * did0 + src_stride1 * did1 + src_offset;
uint di = dst_stride0 * did0 + dst_stride1 * did1 + dst_offset;

SUBTENSOR_OP_WITH_SCALAR(dst[di], beta, src[si], alpha);
SUBTENSOR_OP_WITH_ALPHA_BETA(dst[di], src[si]);
}
}
}
Expand Down Expand Up @@ -201,7 +230,7 @@ __kernel void SubTensorOpWithTransform3d(global _FLOAT* __restrict src,
uint si = src_stride0 * did0 + src_stride1 * did1 + src_stride2 * did2 + src_offset;
uint di = dst_stride0 * did0 + dst_stride1 * did1 + dst_stride2 * did2 + dst_offset;

SUBTENSOR_OP_WITH_SCALAR(dst[di], beta, src[si], alpha);
SUBTENSOR_OP_WITH_ALPHA_BETA(dst[di], src[si]);
}
}
}
Expand Down Expand Up @@ -255,7 +284,7 @@ __kernel void SubTensorOpWithTransform4d(global _FLOAT* __restrict src,
uint di = dst_stride0 * did0 + dst_stride1 * did1 + dst_stride2 * did2 +
dst_stride3 * did3 + dst_offset;

SUBTENSOR_OP_WITH_SCALAR(dst[di], beta, src[si], alpha);
SUBTENSOR_OP_WITH_ALPHA_BETA(dst[di], src[si]);
}
}
}
Expand Down Expand Up @@ -319,7 +348,7 @@ __kernel void SubTensorOpWithTransform5d(global _FLOAT* __restrict src,
uint di = dst_stride0 * did0 + dst_stride1 * did1 + dst_stride2 * did2 +
dst_stride3 * did3 + dst_stride4 * did4 + dst_offset;

SUBTENSOR_OP_WITH_SCALAR(dst[di], beta, src[si], alpha);
SUBTENSOR_OP_WITH_ALPHA_BETA(dst[di], src[si]);
}
}
}
Expand Down
17 changes: 10 additions & 7 deletions src/ocl/tensorocl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2187,6 +2187,9 @@ void TransformTensor(const Handle& handle,
MIOPEN_THROW("Tensor x and y batch sizes do not match");
}

const auto is_alpha_one = float_equal(*(static_cast<const float*>(alpha)), 1);
const auto is_beta_zero = float_equal(*(static_cast<const float*>(beta)), 0);

if(xDesc.GetType() == miopenInt8 && yDesc.GetType() == miopenInt8 && x_len.size() >= 3)
{
if(x_len[1] <= y_len[1])
Expand Down Expand Up @@ -2221,8 +2224,7 @@ void TransformTensor(const Handle& handle,
size_t x_offset = i * x_batch_sz;
size_t y_offset = i * y_batch_sz;

if(float_equal(*(static_cast<const float*>(alpha)), 1) &&
float_equal(*(static_cast<const float*>(beta)), 0))
if(is_alpha_one && is_beta_zero)
{
CopyTensor(handle,
((x_len[1] <= y_len[1]) ? x_batch_desc : y_batch_desc),
Expand All @@ -2234,7 +2236,8 @@ void TransformTensor(const Handle& handle,
}
else
{
// TODO: support y=alpha*x+beta*y
MIOPEN_THROW(miopenStatusNotImplemented,
"y=alpha*x+beta*y is not supported for int8 yet");
}
}
}
Expand All @@ -2254,7 +2257,6 @@ void TransformTensor(const Handle& handle,
const TensorDescriptor& xDesc_flat = std::get<0>(flat_descriptors);
const TensorDescriptor& yDesc_flat = std::get<1>(flat_descriptors);

#ifndef NDEBUG
if(xDesc.GetNumDims() != xDesc_flat.GetNumDims())
{
MIOPEN_LOG_I2("x real descriptor: " << xDesc);
Expand All @@ -2266,7 +2268,6 @@ void TransformTensor(const Handle& handle,
MIOPEN_LOG_I2("y real descriptor: " << yDesc);
MIOPEN_LOG_I2("y flat descriptor: " << yDesc_flat);
}
#endif

const std::size_t yDim_flat = yDesc_flat.GetNumDims();

Expand Down Expand Up @@ -2329,8 +2330,10 @@ void TransformTensor(const Handle& handle,

std::size_t wld = 256 < wgd ? 256 : wgd;

std::string parms = "-DSUBTENSOR_OP_WITH_SCALAR=SUBTENSOR_OP_WITH_SCALAR_MAD" +
GetDataTypeKernelParams(dataTypey);
std::string parms =
GetDataTypeKernelParams(dataTypey) //
+ " -DMIOPEN_BETA_IS_ZERO=" + std::to_string(static_cast<int>(is_beta_zero)) //
+ " -DMIOPEN_ALPHA_IS_ONE=" + std::to_string(static_cast<int>(is_alpha_one));

for(int i = 0; i < yDim_flat; ++i)
{
Expand Down

0 comments on commit a89a850

Please sign in to comment.