Skip to content

Commit

Permalink
*fix bug in AMX-BF16 optimizations of class SynetConvolution32fBf16Nh…
Browse files Browse the repository at this point in the history
…wcGemm.
  • Loading branch information
ermig1979 committed Mar 13, 2024
1 parent 5ca9f05 commit 636728d
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 2 deletions.
2 changes: 1 addition & 1 deletion src/Simd/SimdAmxBf16SynetConvolution32fBf16NhwcGemm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ namespace Simd
else
{
//SIMD_PERF_BEG("solid");
dst += b * p.dstH * p.dstW * p.dstC;
dst += b * p.dstH * p.dstW * a.bufK;
for (size_t n = (yEnd - yBeg) * p.srcW * p.srcC, i = 0; i < n; i += 32)
Float32ToBFloat16<false, false>(src + i, dst + i, srcMask, dstMask);
}
Expand Down
4 changes: 3 additions & 1 deletion src/Test/TestSynetConvolution32f.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -651,12 +651,14 @@ namespace Test
//result = result && SynetConvolution32fForwardAutoTest(eps, Param(1, 48, 160, 160, 32, _1, _1, _1, _0, _0, 1, a, t), c, f1, f2);
//result = result && SynetConvolution32fForwardAutoTest(eps, Param(1, 32, 80, 80, 32, _3, _1, _1, _1, _1, 1, a, t), c, f1, f2);
//result = result && SynetConvolution32fForwardAutoTest(eps, Param(1, 96, 80, 80, 64, _1, _1, _1, _0, _0, 1, a, t), c, f1, f2);
result = result && SynetConvolution32fForwardAutoTest(eps, Param(2, 256, 1, 1, 512, _1, _1, _1, _0, _0, 1, a, t), c, f1, f2);
result = result && SynetConvolution32fForwardAutoTest(eps, Param(1, 1664, 40, 40, 512, _1, _1, _1, _0, _0, 1, a, t), c, f1, f2);
result = result && SynetConvolution32fForwardAutoTest(eps, Param(2, 192, 5, 5, 256, _3, _1, _1, _0, _0, 1, a, t), c, f1, f2);
#endif
#else
//result = result && SynetConvolution32fForwardAutoTest(eps, Param(1, 16, 160, 160, 16, _3, _1, _1, _1, _1, 1, a, t), c, f1, f2);
result = result && SynetConvolution32fForwardAutoTest(eps, Param(2, 192, 5, 5, 256, _3, _1, _1, _0, _0, 1, a, t), c, f1, f2);
//result = result && SynetConvolution32fForwardAutoTest(eps, Param(2, 192, 5, 5, 256, _3, _1, _1, _0, _0, 1, a, t), c, f1, f2);
result = result && SynetConvolution32fForwardAutoTest(eps, Param(2, 256, 1, 1, 512, _1, _1, _1, _0, _0, 1, a, t), c, f1, f2);
//result = result && SynetConvolution32fForwardAutoTest(eps, Param(1, 3, 1, 8, 8, _1, _1, _1, _0, _0, 1, aId, tT), bf16, f1, f2);


Expand Down

0 comments on commit 636728d

Please sign in to comment.