Skip to content

Commit

Permalink
fix: prevent BOF in the last channel handling
Browse files Browse the repository at this point in the history
Signed-off-by: Junwha Hong <[email protected]>
  • Loading branch information
junwha committed Oct 14, 2024
1 parent 1c7af00 commit 190f85b
Show file tree
Hide file tree
Showing 2 changed files with 90 additions and 6 deletions.
48 changes: 45 additions & 3 deletions src/layer/arm/shufflechannel_arm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ int ShuffleChannel_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Opt

ptr1 += 2;

for (int i = 0; i < size; i++)
for (int i = 0; i < size - 1; i++)
{
float32x4_t _p0 = vld1q_f32(ptr0);
float32x4_t _p1 = vld1q_f32(ptr1);
Expand All @@ -130,6 +130,20 @@ int ShuffleChannel_arm::forward(const Mat& bottom_blob, Mat& top_blob, const Opt
ptr1 += 4;
outptr0 += 4;
}

for (int i = 0; i < 4; i++) {
if (i % 2)
{
*outptr = *ptr1;
ptr1 += 1;
}
else
{
*outptr = *ptr0;
ptr0 += 1;
}
outptr += 1;
}
}

return 0;
Expand Down Expand Up @@ -364,7 +378,7 @@ int ShuffleChannel_arm::forward_bf16s_fp16s(const Mat& bottom_blob, Mat& top_blo

ptr1 += 4;

for (int i = 0; i < size; i++)
for (int i = 0; i < size - 1; i++)
{
uint16x4_t _p0 = vld1_u16(ptr0);
uint16x4_t _p1 = vld1_u16(ptr1);
Expand All @@ -378,6 +392,20 @@ int ShuffleChannel_arm::forward_bf16s_fp16s(const Mat& bottom_blob, Mat& top_blo
ptr1 += 8;
outptr0 += 8;
}

for (int i = 0; i < 8; i++) {
if (i % 2)
{
*outptr = *ptr1;
ptr1 += 1;
}
else
{
*outptr = *ptr0;
ptr0 += 1;
}
outptr += 1;
}
}

return 0;
Expand Down Expand Up @@ -598,7 +626,7 @@ int ShuffleChannel_arm::forward_bf16s_fp16s(const Mat& bottom_blob, Mat& top_blo

ptr1 += 2;

for (int i = 0; i < size; i++)
for (int i = 0; i < size - 1; i++)
{
uint16x4_t _p0 = vld1_u16(ptr0);
uint16x4_t _p1 = vld1_u16(ptr1);
Expand All @@ -611,6 +639,20 @@ int ShuffleChannel_arm::forward_bf16s_fp16s(const Mat& bottom_blob, Mat& top_blo
ptr1 += 4;
outptr0 += 4;
}

for (int i = 0; i < 4; i++) {
if (i % 2)
{
*outptr = *ptr1;
ptr1 += 1;
}
else
{
*outptr = *ptr0;
ptr0 += 1;
}
outptr += 1;
}
}

return 0;
Expand Down
48 changes: 45 additions & 3 deletions src/layer/x86/shufflechannel_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ int ShuffleChannel_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Opt

ptr1 += 8;

for (int i = 0; i < size; i++)
for (int i = 0; i < size - 1; i++)
{
__m256 _p0 = _mm256_loadu_ps(ptr0);
__m256 _p1 = _mm256_loadu_ps(ptr1);
Expand All @@ -134,6 +134,20 @@ int ShuffleChannel_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Opt
ptr1 += 16;
outptr += 16;
}

for (int i = 0; i < 16; i++) {
if (i % 2)
{
*outptr = *ptr1;
ptr1 += 1;
}
else
{
*outptr = *ptr0;
ptr0 += 1;
}
outptr += 1;
}
}

return 0;
Expand Down Expand Up @@ -372,7 +386,7 @@ int ShuffleChannel_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Opt

ptr1 += 4;

for (int i = 0; i < size; i++)
for (int i = 0; i < size - 1; i++)
{
__m128 _p0 = _mm_loadu_ps(ptr0);
__m128 _p1 = _mm_loadu_ps(ptr1);
Expand All @@ -387,6 +401,20 @@ int ShuffleChannel_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Opt
ptr1 += 8;
outptr += 8;
}

for (int i = 0; i < 8; i++) {
if (i % 2)
{
*outptr = *ptr1;
ptr1 += 1;
}
else
{
*outptr = *ptr0;
ptr0 += 1;
}
outptr += 1;
}
}

return 0;
Expand Down Expand Up @@ -607,7 +635,7 @@ int ShuffleChannel_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Opt

ptr1 += 2;

for (int i = 0; i < size; i++)
for (int i = 0; i < size - 1; i++)
{
__m128 _p0 = _mm_loadu_ps(ptr0);
__m128 _p1 = _mm_loadu_ps(ptr1);
Expand All @@ -620,6 +648,20 @@ int ShuffleChannel_x86::forward(const Mat& bottom_blob, Mat& top_blob, const Opt
ptr1 += 4;
outptr += 4;
}

for (int i = 0; i < 4; i++) {
if (i % 2)
{
*outptr = *ptr1;
ptr1 += 1;
}
else
{
*outptr = *ptr0;
ptr0 += 1;
}
outptr += 1;
}
}

return 0;
Expand Down

0 comments on commit 190f85b

Please sign in to comment.