From e5aed463a28732ebec0bd57b4a6af87e6054ba2f Mon Sep 17 00:00:00 2001 From: Frank Barchard Date: Mon, 9 Dec 2024 10:58:22 -0800 Subject: [PATCH] QC4W GEMM test/benchmark use packw microkernels PiperOrigin-RevId: 704346968 --- bench/qd8-f16-qc4w-gemm.cc | 96 +++++++++--------- bench/qd8-f32-qc4w-gemm.cc | 158 ++++++++++++++--------------- test/qd8-f16-qc4w-gemm-minmax-2.cc | 24 ++--- test/qd8-f16-qc4w-gemm-minmax-3.cc | 26 ++--- test/qd8-f16-qc4w-gemm-minmax-4.cc | 22 ++-- test/qd8-f16-qc4w-gemm-minmax.cc | 24 ++--- test/qd8-f16-qc4w-gemm-minmax.yaml | 96 +++++++++--------- test/qd8-f32-qc4w-gemm-minmax-2.cc | 32 +++--- test/qd8-f32-qc4w-gemm-minmax-3.cc | 42 ++++---- test/qd8-f32-qc4w-gemm-minmax-4.cc | 36 +++---- test/qd8-f32-qc4w-gemm-minmax.cc | 48 ++++----- test/qd8-f32-qc4w-gemm-minmax.yaml | 158 ++++++++++++++--------------- 12 files changed, 381 insertions(+), 381 deletions(-) diff --git a/bench/qd8-f16-qc4w-gemm.cc b/bench/qd8-f16-qc4w-gemm.cc index 916d129ee52..4abce334a28 100644 --- a/bench/qd8-f16-qc4w-gemm.cc +++ b/bench/qd8-f16-qc4w-gemm.cc @@ -291,7 +291,7 @@ GEMMBenchmark(state, xnn_qd8_f16_qc4w_gemm_minmax_ukernel_1x8c8__avx256vnnigfni, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni, /*mr=*/1, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX256VNNIGFNI); } @@ -302,7 +302,7 @@ GEMMBenchmark(state, xnn_qd8_f16_qc4w_gemm_minmax_ukernel_5x8c8__avx256vnnigfni, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni, /*mr=*/5, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX256VNNIGFNI); } @@ -313,7 +313,7 @@ GEMMBenchmark(state, xnn_qd8_f16_qc4w_gemm_minmax_ukernel_7x8c8__avx256vnnigfni, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni, /*mr=*/7, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX256VNNIGFNI); } @@ -324,7 +324,7 @@ GEMMBenchmark(state, xnn_qd8_f16_qc4w_gemm_minmax_ukernel_8x8c8__avx256vnnigfni, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni, /*mr=*/8, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX256VNNIGFNI); } @@ -335,7 +335,7 @@ GEMMBenchmark(state, xnn_qd8_f16_qc4w_gemm_minmax_ukernel_9x8c8__avx256vnnigfni, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni, /*mr=*/9, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX256VNNIGFNI); } @@ -346,7 +346,7 @@ GEMMBenchmark(state, xnn_qd8_f16_qc4w_gemm_minmax_ukernel_10x8c8__avx256vnnigfni, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni, /*mr=*/10, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX256VNNIGFNI); } @@ -357,7 +357,7 @@ GEMMBenchmark(state, xnn_qd8_f16_qc4w_gemm_minmax_ukernel_12x8c8__avx256vnnigfni, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni, /*mr=*/12, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX256VNNIGFNI); } @@ -368,7 +368,7 @@ GEMMBenchmark(state, xnn_qd8_f16_qc4w_gemm_minmax_ukernel_14x8c8__avx256vnnigfni, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni, /*mr=*/14, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX256VNNIGFNI); } @@ -379,7 +379,7 @@ GEMMBenchmark(state, xnn_qd8_f16_qc4w_gemm_minmax_ukernel_1x8c8__avx256vnnigfni_prfm, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm, /*mr=*/1, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX256VNNIGFNI); } @@ -390,7 +390,7 @@ GEMMBenchmark(state, xnn_qd8_f16_qc4w_gemm_minmax_ukernel_5x8c8__avx256vnnigfni_prfm, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm, /*mr=*/5, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX256VNNIGFNI); } @@ -401,7 +401,7 @@ GEMMBenchmark(state, xnn_qd8_f16_qc4w_gemm_minmax_ukernel_7x8c8__avx256vnnigfni_prfm, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm, /*mr=*/7, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX256VNNIGFNI); } @@ -412,7 +412,7 @@ GEMMBenchmark(state, xnn_qd8_f16_qc4w_gemm_minmax_ukernel_8x8c8__avx256vnnigfni_prfm, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm, /*mr=*/8, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX256VNNIGFNI); } @@ -423,7 +423,7 @@ GEMMBenchmark(state, xnn_qd8_f16_qc4w_gemm_minmax_ukernel_9x8c8__avx256vnnigfni_prfm, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm, /*mr=*/9, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX256VNNIGFNI); } @@ -434,7 +434,7 @@ GEMMBenchmark(state, xnn_qd8_f16_qc4w_gemm_minmax_ukernel_10x8c8__avx256vnnigfni_prfm, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm, /*mr=*/10, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX256VNNIGFNI); } @@ -445,7 +445,7 @@ GEMMBenchmark(state, xnn_qd8_f16_qc4w_gemm_minmax_ukernel_12x8c8__avx256vnnigfni_prfm, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm, /*mr=*/12, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX256VNNIGFNI); } @@ -456,7 +456,7 @@ GEMMBenchmark(state, xnn_qd8_f16_qc4w_gemm_minmax_ukernel_14x8c8__avx256vnnigfni_prfm, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm, /*mr=*/14, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX256VNNIGFNI); } @@ -470,7 +470,7 @@ GEMMBenchmark(state, xnn_qd8_f16_qc4w_gemm_minmax_ukernel_1x8c8__avx256vnni, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni, /*mr=*/1, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX256VNNI); } @@ -481,7 +481,7 @@ GEMMBenchmark(state, xnn_qd8_f16_qc4w_gemm_minmax_ukernel_5x8c8__avx256vnni, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni, /*mr=*/5, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX256VNNI); } @@ -492,7 +492,7 @@ GEMMBenchmark(state, xnn_qd8_f16_qc4w_gemm_minmax_ukernel_7x8c8__avx256vnni, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni, /*mr=*/7, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX256VNNI); } @@ -503,7 +503,7 @@ GEMMBenchmark(state, xnn_qd8_f16_qc4w_gemm_minmax_ukernel_8x8c8__avx256vnni, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni, /*mr=*/8, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX256VNNI); } @@ -514,7 +514,7 @@ GEMMBenchmark(state, xnn_qd8_f16_qc4w_gemm_minmax_ukernel_9x8c8__avx256vnni, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni, /*mr=*/9, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX256VNNI); } @@ -525,7 +525,7 @@ GEMMBenchmark(state, xnn_qd8_f16_qc4w_gemm_minmax_ukernel_10x8c8__avx256vnni, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni, /*mr=*/10, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX256VNNI); } @@ -536,7 +536,7 @@ GEMMBenchmark(state, xnn_qd8_f16_qc4w_gemm_minmax_ukernel_12x8c8__avx256vnni, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni, /*mr=*/12, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX256VNNI); } @@ -547,7 +547,7 @@ GEMMBenchmark(state, xnn_qd8_f16_qc4w_gemm_minmax_ukernel_14x8c8__avx256vnni, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni, /*mr=*/14, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX256VNNI); } @@ -558,7 +558,7 @@ GEMMBenchmark(state, xnn_qd8_f16_qc4w_gemm_minmax_ukernel_1x8c8__avx256vnni_prfm, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm, /*mr=*/1, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX256VNNI); } @@ -569,7 +569,7 @@ GEMMBenchmark(state, xnn_qd8_f16_qc4w_gemm_minmax_ukernel_5x8c8__avx256vnni_prfm, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm, /*mr=*/5, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX256VNNI); } @@ -580,7 +580,7 @@ GEMMBenchmark(state, xnn_qd8_f16_qc4w_gemm_minmax_ukernel_7x8c8__avx256vnni_prfm, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm, /*mr=*/7, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX256VNNI); } @@ -591,7 +591,7 @@ GEMMBenchmark(state, xnn_qd8_f16_qc4w_gemm_minmax_ukernel_8x8c8__avx256vnni_prfm, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm, /*mr=*/8, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX256VNNI); } @@ -602,7 +602,7 @@ GEMMBenchmark(state, xnn_qd8_f16_qc4w_gemm_minmax_ukernel_9x8c8__avx256vnni_prfm, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm, /*mr=*/9, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX256VNNI); } @@ -613,7 +613,7 @@ GEMMBenchmark(state, xnn_qd8_f16_qc4w_gemm_minmax_ukernel_10x8c8__avx256vnni_prfm, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm, /*mr=*/10, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX256VNNI); } @@ -624,7 +624,7 @@ GEMMBenchmark(state, xnn_qd8_f16_qc4w_gemm_minmax_ukernel_12x8c8__avx256vnni_prfm, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm, /*mr=*/12, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX256VNNI); } @@ -635,7 +635,7 @@ GEMMBenchmark(state, xnn_qd8_f16_qc4w_gemm_minmax_ukernel_14x8c8__avx256vnni_prfm, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm, /*mr=*/14, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX256VNNI); } @@ -916,7 +916,7 @@ GEMMBenchmark(state, xnn_qd8_f16_qc4w_gemm_minmax_ukernel_1x8c8__avxvnni, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avxvnni, /*mr=*/1, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVXVNNI); } @@ -927,7 +927,7 @@ GEMMBenchmark(state, xnn_qd8_f16_qc4w_gemm_minmax_ukernel_2x8c8__avxvnni, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avxvnni, /*mr=*/2, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVXVNNI); } @@ -938,7 +938,7 @@ GEMMBenchmark(state, xnn_qd8_f16_qc4w_gemm_minmax_ukernel_3x8c8__avxvnni, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avxvnni, /*mr=*/3, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVXVNNI); } @@ -949,7 +949,7 @@ GEMMBenchmark(state, xnn_qd8_f16_qc4w_gemm_minmax_ukernel_4x8c8__avxvnni, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avxvnni, /*mr=*/4, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVXVNNI); } @@ -960,7 +960,7 @@ GEMMBenchmark(state, xnn_qd8_f16_qc4w_gemm_minmax_ukernel_5x8c8__avxvnni, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avxvnni, /*mr=*/5, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVXVNNI); } @@ -971,7 +971,7 @@ GEMMBenchmark(state, xnn_qd8_f16_qc4w_gemm_minmax_ukernel_6x8c8__avxvnni, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avxvnni, /*mr=*/6, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVXVNNI); } @@ -982,7 +982,7 @@ GEMMBenchmark(state, xnn_qd8_f16_qc4w_gemm_minmax_ukernel_7x8c8__avxvnni, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avxvnni, /*mr=*/7, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVXVNNI); } @@ -993,7 +993,7 @@ GEMMBenchmark(state, xnn_qd8_f16_qc4w_gemm_minmax_ukernel_8x8c8__avxvnni, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avxvnni, /*mr=*/8, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVXVNNI); } @@ -1004,7 +1004,7 @@ GEMMBenchmark(state, xnn_qd8_f16_qc4w_gemm_minmax_ukernel_1x8c8__avxvnni_prfm, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avxvnni_prfm, /*mr=*/1, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVXVNNI); } @@ -1015,7 +1015,7 @@ GEMMBenchmark(state, xnn_qd8_f16_qc4w_gemm_minmax_ukernel_2x8c8__avxvnni_prfm, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avxvnni_prfm, /*mr=*/2, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVXVNNI); } @@ -1026,7 +1026,7 @@ GEMMBenchmark(state, xnn_qd8_f16_qc4w_gemm_minmax_ukernel_3x8c8__avxvnni_prfm, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avxvnni_prfm, /*mr=*/3, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVXVNNI); } @@ -1037,7 +1037,7 @@ GEMMBenchmark(state, xnn_qd8_f16_qc4w_gemm_minmax_ukernel_4x8c8__avxvnni_prfm, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avxvnni_prfm, /*mr=*/4, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVXVNNI); } @@ -1048,7 +1048,7 @@ GEMMBenchmark(state, xnn_qd8_f16_qc4w_gemm_minmax_ukernel_5x8c8__avxvnni_prfm, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avxvnni_prfm, /*mr=*/5, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVXVNNI); } @@ -1059,7 +1059,7 @@ GEMMBenchmark(state, xnn_qd8_f16_qc4w_gemm_minmax_ukernel_6x8c8__avxvnni_prfm, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avxvnni_prfm, /*mr=*/6, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVXVNNI); } @@ -1070,7 +1070,7 @@ GEMMBenchmark(state, xnn_qd8_f16_qc4w_gemm_minmax_ukernel_7x8c8__avxvnni_prfm, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avxvnni_prfm, /*mr=*/7, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVXVNNI); } @@ -1081,7 +1081,7 @@ GEMMBenchmark(state, xnn_qd8_f16_qc4w_gemm_minmax_ukernel_8x8c8__avxvnni_prfm, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avxvnni_prfm, /*mr=*/8, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVXVNNI); } diff --git a/bench/qd8-f32-qc4w-gemm.cc b/bench/qd8-f32-qc4w-gemm.cc index 9066e099c1c..561f4c0344d 100644 --- a/bench/qd8-f32-qc4w-gemm.cc +++ b/bench/qd8-f32-qc4w-gemm.cc @@ -1057,7 +1057,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_1x16c8__avx512vnnigfni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni, /*mr=*/1, /*nr=*/16, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX512VNNIGFNI); } @@ -1068,7 +1068,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_5x16c8__avx512vnnigfni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni, /*mr=*/5, /*nr=*/16, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX512VNNIGFNI); } @@ -1079,7 +1079,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_7x16c8__avx512vnnigfni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni, /*mr=*/7, /*nr=*/16, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX512VNNIGFNI); } @@ -1090,7 +1090,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_8x16c8__avx512vnnigfni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni, /*mr=*/8, /*nr=*/16, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX512VNNIGFNI); } @@ -1101,7 +1101,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_9x16c8__avx512vnnigfni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni, /*mr=*/9, /*nr=*/16, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX512VNNIGFNI); } @@ -1112,7 +1112,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_10x16c8__avx512vnnigfni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni, /*mr=*/10, /*nr=*/16, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX512VNNIGFNI); } @@ -1123,7 +1123,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_12x16c8__avx512vnnigfni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni, /*mr=*/12, /*nr=*/16, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX512VNNIGFNI); } @@ -1134,7 +1134,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_14x16c8__avx512vnnigfni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni, /*mr=*/14, /*nr=*/16, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX512VNNIGFNI); } @@ -1145,7 +1145,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_1x16c8__avx512vnnigfni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni_prfm, /*mr=*/1, /*nr=*/16, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX512VNNIGFNI); } @@ -1156,7 +1156,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_5x16c8__avx512vnnigfni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni_prfm, /*mr=*/5, /*nr=*/16, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX512VNNIGFNI); } @@ -1167,7 +1167,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_7x16c8__avx512vnnigfni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni_prfm, /*mr=*/7, /*nr=*/16, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX512VNNIGFNI); } @@ -1178,7 +1178,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_8x16c8__avx512vnnigfni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni_prfm, /*mr=*/8, /*nr=*/16, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX512VNNIGFNI); } @@ -1189,7 +1189,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_9x16c8__avx512vnnigfni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni_prfm, /*mr=*/9, /*nr=*/16, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX512VNNIGFNI); } @@ -1200,7 +1200,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_10x16c8__avx512vnnigfni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni_prfm, /*mr=*/10, /*nr=*/16, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX512VNNIGFNI); } @@ -1211,7 +1211,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_12x16c8__avx512vnnigfni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni_prfm, /*mr=*/12, /*nr=*/16, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX512VNNIGFNI); } @@ -1222,7 +1222,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_14x16c8__avx512vnnigfni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni_prfm, /*mr=*/14, /*nr=*/16, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX512VNNIGFNI); } @@ -1445,7 +1445,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_5x16c8__avx512vnni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni, /*mr=*/5, /*nr=*/16, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX512VNNI); } @@ -1456,7 +1456,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_7x16c8__avx512vnni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni, /*mr=*/7, /*nr=*/16, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX512VNNI); } @@ -1467,7 +1467,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_8x16c8__avx512vnni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni, /*mr=*/8, /*nr=*/16, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX512VNNI); } @@ -1478,7 +1478,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_9x16c8__avx512vnni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni, /*mr=*/9, /*nr=*/16, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX512VNNI); } @@ -1489,7 +1489,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_10x16c8__avx512vnni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni, /*mr=*/10, /*nr=*/16, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX512VNNI); } @@ -1500,7 +1500,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_12x16c8__avx512vnni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni, /*mr=*/12, /*nr=*/16, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX512VNNI); } @@ -1511,7 +1511,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_14x16c8__avx512vnni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni, /*mr=*/14, /*nr=*/16, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX512VNNI); } @@ -1522,7 +1522,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_1x16c8__avx512vnni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni_prfm, /*mr=*/1, /*nr=*/16, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX512VNNI); } @@ -1533,7 +1533,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_5x16c8__avx512vnni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni_prfm, /*mr=*/5, /*nr=*/16, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX512VNNI); } @@ -1544,7 +1544,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_7x16c8__avx512vnni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni_prfm, /*mr=*/7, /*nr=*/16, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX512VNNI); } @@ -1555,7 +1555,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_8x16c8__avx512vnni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni_prfm, /*mr=*/8, /*nr=*/16, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX512VNNI); } @@ -1566,7 +1566,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_9x16c8__avx512vnni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni_prfm, /*mr=*/9, /*nr=*/16, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX512VNNI); } @@ -1577,7 +1577,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_10x16c8__avx512vnni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni_prfm, /*mr=*/10, /*nr=*/16, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX512VNNI); } @@ -1588,7 +1588,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_12x16c8__avx512vnni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni_prfm, /*mr=*/12, /*nr=*/16, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX512VNNI); } @@ -1599,7 +1599,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_14x16c8__avx512vnni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni_prfm, /*mr=*/14, /*nr=*/16, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX512VNNI); } @@ -2078,7 +2078,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_1x8c8__avx256vnnigfni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni, /*mr=*/1, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX256VNNIGFNI); } @@ -2089,7 +2089,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_5x8c8__avx256vnnigfni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni, /*mr=*/5, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX256VNNIGFNI); } @@ -2100,7 +2100,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_7x8c8__avx256vnnigfni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni, /*mr=*/7, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX256VNNIGFNI); } @@ -2111,7 +2111,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_8x8c8__avx256vnnigfni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni, /*mr=*/8, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX256VNNIGFNI); } @@ -2122,7 +2122,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_9x8c8__avx256vnnigfni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni, /*mr=*/9, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX256VNNIGFNI); } @@ -2133,7 +2133,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_10x8c8__avx256vnnigfni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni, /*mr=*/10, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX256VNNIGFNI); } @@ -2144,7 +2144,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_12x8c8__avx256vnnigfni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni, /*mr=*/12, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX256VNNIGFNI); } @@ -2155,7 +2155,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_14x8c8__avx256vnnigfni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni, /*mr=*/14, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX256VNNIGFNI); } @@ -2166,7 +2166,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_1x8c8__avx256vnnigfni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm, /*mr=*/1, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX256VNNIGFNI); } @@ -2177,7 +2177,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_5x8c8__avx256vnnigfni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm, /*mr=*/5, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX256VNNIGFNI); } @@ -2188,7 +2188,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_7x8c8__avx256vnnigfni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm, /*mr=*/7, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX256VNNIGFNI); } @@ -2199,7 +2199,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_8x8c8__avx256vnnigfni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm, /*mr=*/8, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX256VNNIGFNI); } @@ -2210,7 +2210,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_9x8c8__avx256vnnigfni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm, /*mr=*/9, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX256VNNIGFNI); } @@ -2221,7 +2221,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_10x8c8__avx256vnnigfni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm, /*mr=*/10, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX256VNNIGFNI); } @@ -2232,7 +2232,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_12x8c8__avx256vnnigfni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm, /*mr=*/12, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX256VNNIGFNI); } @@ -2243,7 +2243,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_14x8c8__avx256vnnigfni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm, /*mr=*/14, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX256VNNIGFNI); } @@ -2257,7 +2257,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_1x8c8__avx256vnni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni, /*mr=*/1, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX256VNNI); } @@ -2268,7 +2268,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_5x8c8__avx256vnni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni, /*mr=*/5, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX256VNNI); } @@ -2279,7 +2279,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_7x8c8__avx256vnni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni, /*mr=*/7, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX256VNNI); } @@ -2290,7 +2290,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_8x8c8__avx256vnni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni, /*mr=*/8, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX256VNNI); } @@ -2301,7 +2301,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_9x8c8__avx256vnni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni, /*mr=*/9, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX256VNNI); } @@ -2312,7 +2312,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_10x8c8__avx256vnni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni, /*mr=*/10, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX256VNNI); } @@ -2323,7 +2323,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_12x8c8__avx256vnni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni, /*mr=*/12, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX256VNNI); } @@ -2334,7 +2334,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_14x8c8__avx256vnni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni, /*mr=*/14, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX256VNNI); } @@ -2345,7 +2345,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_1x8c8__avx256vnni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm, /*mr=*/1, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX256VNNI); } @@ -2356,7 +2356,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_5x8c8__avx256vnni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm, /*mr=*/5, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX256VNNI); } @@ -2367,7 +2367,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_7x8c8__avx256vnni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm, /*mr=*/7, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX256VNNI); } @@ -2378,7 +2378,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_8x8c8__avx256vnni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm, /*mr=*/8, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX256VNNI); } @@ -2389,7 +2389,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_9x8c8__avx256vnni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm, /*mr=*/9, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX256VNNI); } @@ -2400,7 +2400,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_10x8c8__avx256vnni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm, /*mr=*/10, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX256VNNI); } @@ -2411,7 +2411,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_12x8c8__avx256vnni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm, /*mr=*/12, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX256VNNI); } @@ -2422,7 +2422,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_14x8c8__avx256vnni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm, /*mr=*/14, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVX256VNNI); } @@ -2703,7 +2703,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_1x8c8__avxvnni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__scalar, /*mr=*/1, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVXVNNI); } @@ -2714,7 +2714,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_2x8c8__avxvnni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__scalar, /*mr=*/2, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVXVNNI); } @@ -2725,7 +2725,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_3x8c8__avxvnni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__scalar, /*mr=*/3, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVXVNNI); } @@ -2736,7 +2736,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_4x8c8__avxvnni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__scalar, /*mr=*/4, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVXVNNI); } @@ -2747,7 +2747,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_5x8c8__avxvnni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__scalar, /*mr=*/5, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVXVNNI); } @@ -2758,7 +2758,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_6x8c8__avxvnni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__scalar, /*mr=*/6, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVXVNNI); } @@ -2769,7 +2769,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_7x8c8__avxvnni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__scalar, /*mr=*/7, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVXVNNI); } @@ -2780,7 +2780,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_8x8c8__avxvnni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__scalar, /*mr=*/8, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVXVNNI); } @@ -2791,7 +2791,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_1x8c8__avxvnni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__scalar, /*mr=*/1, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVXVNNI); } @@ -2802,7 +2802,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_2x8c8__avxvnni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__scalar, /*mr=*/2, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVXVNNI); } @@ -2813,7 +2813,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_3x8c8__avxvnni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__scalar, /*mr=*/3, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVXVNNI); } @@ -2824,7 +2824,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_4x8c8__avxvnni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__scalar, /*mr=*/4, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVXVNNI); } @@ -2835,7 +2835,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_5x8c8__avxvnni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__scalar, /*mr=*/5, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVXVNNI); } @@ -2846,7 +2846,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_6x8c8__avxvnni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__scalar, /*mr=*/6, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVXVNNI); } @@ -2857,7 +2857,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_7x8c8__avxvnni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__scalar, /*mr=*/7, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVXVNNI); } @@ -2868,7 +2868,7 @@ GEMMBenchmark(state, xnn_qd8_f32_qc4w_gemm_minmax_ukernel_8x8c8__avxvnni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w, + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__scalar, /*mr=*/8, /*nr=*/8, /*kr=*/8, /*sr=*/1, benchmark::utils::CheckAVXVNNI); } diff --git a/test/qd8-f16-qc4w-gemm-minmax-2.cc b/test/qd8-f16-qc4w-gemm-minmax-2.cc index 0e05f9320e0..6b9f7a4f084 100644 --- a/test/qd8-f16-qc4w-gemm-minmax-2.cc +++ b/test/qd8-f16-qc4w-gemm-minmax-2.cc @@ -801,7 +801,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f16_qc4w_gemm_minmax_ukernel_1x8c8__avx256vnni, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni); }, []() { TEST_REQUIRES_X86_AVX256VNNI; @@ -821,7 +821,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f16_qc4w_gemm_minmax_ukernel_7x8c8__avx256vnni, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni); }, []() { TEST_REQUIRES_X86_AVX256VNNI; @@ -841,7 +841,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f16_qc4w_gemm_minmax_ukernel_12x8c8__avx256vnni, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni); }, []() { TEST_REQUIRES_X86_AVX256VNNI; @@ -861,7 +861,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f16_qc4w_gemm_minmax_ukernel_14x8c8__avx256vnni, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni); }, []() { TEST_REQUIRES_X86_AVX256VNNI; @@ -884,7 +884,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f16_qc4w_gemm_minmax_ukernel_1x8c8__avx256vnnigfni, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni); }, []() { TEST_REQUIRES_X86_AVX256VNNIGFNI; @@ -904,7 +904,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f16_qc4w_gemm_minmax_ukernel_8x8c8__avx256vnnigfni, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni); }, []() { TEST_REQUIRES_X86_AVX256VNNIGFNI; @@ -924,7 +924,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f16_qc4w_gemm_minmax_ukernel_10x8c8__avx256vnnigfni, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni); }, []() { TEST_REQUIRES_X86_AVX256VNNIGFNI; @@ -944,7 +944,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f16_qc4w_gemm_minmax_ukernel_5x8c8__avx256vnnigfni_prfm, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm); }, []() { TEST_REQUIRES_X86_AVX256VNNIGFNI; @@ -967,7 +967,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f16_qc4w_gemm_minmax_ukernel_2x8c8__avxvnni, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avxvnni); }, []() { TEST_REQUIRES_X86_AVXVNNI; @@ -987,7 +987,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f16_qc4w_gemm_minmax_ukernel_5x8c8__avxvnni, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avxvnni); }, []() { TEST_REQUIRES_X86_AVXVNNI; @@ -1007,7 +1007,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f16_qc4w_gemm_minmax_ukernel_6x8c8__avxvnni_prfm, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avxvnni_prfm); }, []() { TEST_REQUIRES_X86_AVXVNNI; @@ -1027,7 +1027,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f16_qc4w_gemm_minmax_ukernel_7x8c8__avxvnni_prfm, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avxvnni_prfm); }, []() { TEST_REQUIRES_X86_AVXVNNI; diff --git a/test/qd8-f16-qc4w-gemm-minmax-3.cc b/test/qd8-f16-qc4w-gemm-minmax-3.cc index 6ae023f7d33..4768b17cf94 100644 --- a/test/qd8-f16-qc4w-gemm-minmax-3.cc +++ b/test/qd8-f16-qc4w-gemm-minmax-3.cc @@ -678,7 +678,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f16_qc4w_gemm_minmax_ukernel_5x8c8__avx256vnni, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni); }, []() { TEST_REQUIRES_X86_AVX256VNNI; @@ -698,7 +698,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f16_qc4w_gemm_minmax_ukernel_10x8c8__avx256vnni, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni); }, []() { TEST_REQUIRES_X86_AVX256VNNI; @@ -718,7 +718,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f16_qc4w_gemm_minmax_ukernel_10x8c8__avx256vnni_prfm, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm); }, []() { TEST_REQUIRES_X86_AVX256VNNI; @@ -738,7 +738,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f16_qc4w_gemm_minmax_ukernel_12x8c8__avx256vnni_prfm, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm); }, []() { TEST_REQUIRES_X86_AVX256VNNI; @@ -758,7 +758,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f16_qc4w_gemm_minmax_ukernel_14x8c8__avx256vnni_prfm, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm); }, []() { TEST_REQUIRES_X86_AVX256VNNI; @@ -781,7 +781,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f16_qc4w_gemm_minmax_ukernel_5x8c8__avx256vnnigfni, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni); }, []() { TEST_REQUIRES_X86_AVX256VNNIGFNI; @@ -801,7 +801,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f16_qc4w_gemm_minmax_ukernel_14x8c8__avx256vnnigfni, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni); }, []() { TEST_REQUIRES_X86_AVX256VNNIGFNI; @@ -821,7 +821,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f16_qc4w_gemm_minmax_ukernel_7x8c8__avx256vnnigfni_prfm, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm); }, []() { TEST_REQUIRES_X86_AVX256VNNIGFNI; @@ -841,7 +841,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f16_qc4w_gemm_minmax_ukernel_14x8c8__avx256vnnigfni_prfm, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm); }, []() { TEST_REQUIRES_X86_AVX256VNNIGFNI; @@ -864,7 +864,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f16_qc4w_gemm_minmax_ukernel_1x8c8__avxvnni, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avxvnni); }, []() { TEST_REQUIRES_X86_AVXVNNI; @@ -884,7 +884,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f16_qc4w_gemm_minmax_ukernel_6x8c8__avxvnni, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avxvnni); }, []() { TEST_REQUIRES_X86_AVXVNNI; @@ -904,7 +904,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f16_qc4w_gemm_minmax_ukernel_8x8c8__avxvnni, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avxvnni); }, []() { TEST_REQUIRES_X86_AVXVNNI; @@ -924,7 +924,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f16_qc4w_gemm_minmax_ukernel_1x8c8__avxvnni_prfm, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avxvnni_prfm); }, []() { TEST_REQUIRES_X86_AVXVNNI; diff --git a/test/qd8-f16-qc4w-gemm-minmax-4.cc b/test/qd8-f16-qc4w-gemm-minmax-4.cc index a9d4a0676f4..009da624667 100644 --- a/test/qd8-f16-qc4w-gemm-minmax-4.cc +++ b/test/qd8-f16-qc4w-gemm-minmax-4.cc @@ -681,7 +681,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f16_qc4w_gemm_minmax_ukernel_9x8c8__avx256vnni, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni); }, []() { TEST_REQUIRES_X86_AVX256VNNI; @@ -704,7 +704,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f16_qc4w_gemm_minmax_ukernel_7x8c8__avx256vnnigfni, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni); }, []() { TEST_REQUIRES_X86_AVX256VNNIGFNI; @@ -724,7 +724,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f16_qc4w_gemm_minmax_ukernel_9x8c8__avx256vnnigfni, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni); }, []() { TEST_REQUIRES_X86_AVX256VNNIGFNI; @@ -744,7 +744,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f16_qc4w_gemm_minmax_ukernel_1x8c8__avx256vnnigfni_prfm, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm); }, []() { TEST_REQUIRES_X86_AVX256VNNIGFNI; @@ -764,7 +764,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f16_qc4w_gemm_minmax_ukernel_9x8c8__avx256vnnigfni_prfm, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm); }, []() { TEST_REQUIRES_X86_AVX256VNNIGFNI; @@ -784,7 +784,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f16_qc4w_gemm_minmax_ukernel_12x8c8__avx256vnnigfni_prfm, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm); }, []() { TEST_REQUIRES_X86_AVX256VNNIGFNI; @@ -807,7 +807,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f16_qc4w_gemm_minmax_ukernel_3x8c8__avxvnni, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avxvnni); }, []() { TEST_REQUIRES_X86_AVXVNNI; @@ -827,7 +827,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f16_qc4w_gemm_minmax_ukernel_4x8c8__avxvnni, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avxvnni); }, []() { TEST_REQUIRES_X86_AVXVNNI; @@ -847,7 +847,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f16_qc4w_gemm_minmax_ukernel_2x8c8__avxvnni_prfm, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avxvnni_prfm); }, []() { TEST_REQUIRES_X86_AVXVNNI; @@ -867,7 +867,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f16_qc4w_gemm_minmax_ukernel_3x8c8__avxvnni_prfm, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avxvnni_prfm); }, []() { TEST_REQUIRES_X86_AVXVNNI; @@ -887,7 +887,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f16_qc4w_gemm_minmax_ukernel_8x8c8__avxvnni_prfm, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avxvnni_prfm); }, []() { TEST_REQUIRES_X86_AVXVNNI; diff --git a/test/qd8-f16-qc4w-gemm-minmax.cc b/test/qd8-f16-qc4w-gemm-minmax.cc index 096b6834dda..fa3f4fc189e 100644 --- a/test/qd8-f16-qc4w-gemm-minmax.cc +++ b/test/qd8-f16-qc4w-gemm-minmax.cc @@ -761,7 +761,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f16_qc4w_gemm_minmax_ukernel_8x8c8__avx256vnni, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni); }, []() { TEST_REQUIRES_X86_AVX256VNNI; @@ -781,7 +781,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f16_qc4w_gemm_minmax_ukernel_1x8c8__avx256vnni_prfm, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm); }, []() { TEST_REQUIRES_X86_AVX256VNNI; @@ -801,7 +801,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f16_qc4w_gemm_minmax_ukernel_5x8c8__avx256vnni_prfm, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm); }, []() { TEST_REQUIRES_X86_AVX256VNNI; @@ -821,7 +821,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f16_qc4w_gemm_minmax_ukernel_7x8c8__avx256vnni_prfm, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm); }, []() { TEST_REQUIRES_X86_AVX256VNNI; @@ -841,7 +841,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f16_qc4w_gemm_minmax_ukernel_8x8c8__avx256vnni_prfm, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm); }, []() { TEST_REQUIRES_X86_AVX256VNNI; @@ -861,7 +861,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f16_qc4w_gemm_minmax_ukernel_9x8c8__avx256vnni_prfm, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm); }, []() { TEST_REQUIRES_X86_AVX256VNNI; @@ -884,7 +884,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f16_qc4w_gemm_minmax_ukernel_12x8c8__avx256vnnigfni, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni); }, []() { TEST_REQUIRES_X86_AVX256VNNIGFNI; @@ -904,7 +904,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f16_qc4w_gemm_minmax_ukernel_8x8c8__avx256vnnigfni_prfm, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm); }, []() { TEST_REQUIRES_X86_AVX256VNNIGFNI; @@ -924,7 +924,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f16_qc4w_gemm_minmax_ukernel_10x8c8__avx256vnnigfni_prfm, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm); }, []() { TEST_REQUIRES_X86_AVX256VNNIGFNI; @@ -947,7 +947,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f16_qc4w_gemm_minmax_ukernel_7x8c8__avxvnni, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avxvnni); }, []() { TEST_REQUIRES_X86_AVXVNNI; @@ -967,7 +967,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f16_qc4w_gemm_minmax_ukernel_4x8c8__avxvnni_prfm, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avxvnni_prfm); }, []() { TEST_REQUIRES_X86_AVXVNNI; @@ -987,7 +987,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f16_qc4w_gemm_minmax_ukernel_5x8c8__avxvnni_prfm, xnn_init_f16_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avxvnni_prfm); }, []() { TEST_REQUIRES_X86_AVXVNNI; diff --git a/test/qd8-f16-qc4w-gemm-minmax.yaml b/test/qd8-f16-qc4w-gemm-minmax.yaml index 74095e68f27..6c7fef78583 100644 --- a/test/qd8-f16-qc4w-gemm-minmax.yaml +++ b/test/qd8-f16-qc4w-gemm-minmax.yaml @@ -363,249 +363,249 @@ # AVX256 VNNI - name: xnn_qd8_f16_qc4w_gemm_minmax_ukernel_1x8c8__avx256vnni init: xnn_init_f16_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni k-block: 16 unsigned-inputs: True - name: xnn_qd8_f16_qc4w_gemm_minmax_ukernel_5x8c8__avx256vnni init: xnn_init_f16_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni k-block: 16 unsigned-inputs: True - name: xnn_qd8_f16_qc4w_gemm_minmax_ukernel_7x8c8__avx256vnni init: xnn_init_f16_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni k-block: 16 unsigned-inputs: True - name: xnn_qd8_f16_qc4w_gemm_minmax_ukernel_8x8c8__avx256vnni init: xnn_init_f16_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni k-block: 16 unsigned-inputs: True - name: xnn_qd8_f16_qc4w_gemm_minmax_ukernel_9x8c8__avx256vnni init: xnn_init_f16_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni k-block: 16 unsigned-inputs: True - name: xnn_qd8_f16_qc4w_gemm_minmax_ukernel_10x8c8__avx256vnni init: xnn_init_f16_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni k-block: 16 unsigned-inputs: True - name: xnn_qd8_f16_qc4w_gemm_minmax_ukernel_12x8c8__avx256vnni init: xnn_init_f16_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni k-block: 16 unsigned-inputs: True - name: xnn_qd8_f16_qc4w_gemm_minmax_ukernel_14x8c8__avx256vnni init: xnn_init_f16_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni k-block: 16 unsigned-inputs: True - name: xnn_qd8_f16_qc4w_gemm_minmax_ukernel_1x8c8__avx256vnni_prfm init: xnn_init_f16_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm k-block: 16 unsigned-inputs: True - name: xnn_qd8_f16_qc4w_gemm_minmax_ukernel_5x8c8__avx256vnni_prfm init: xnn_init_f16_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm k-block: 16 unsigned-inputs: True - name: xnn_qd8_f16_qc4w_gemm_minmax_ukernel_7x8c8__avx256vnni_prfm init: xnn_init_f16_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm k-block: 16 unsigned-inputs: True - name: xnn_qd8_f16_qc4w_gemm_minmax_ukernel_8x8c8__avx256vnni_prfm init: xnn_init_f16_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm k-block: 16 unsigned-inputs: True - name: xnn_qd8_f16_qc4w_gemm_minmax_ukernel_9x8c8__avx256vnni_prfm init: xnn_init_f16_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm k-block: 16 unsigned-inputs: True - name: xnn_qd8_f16_qc4w_gemm_minmax_ukernel_10x8c8__avx256vnni_prfm init: xnn_init_f16_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm k-block: 16 unsigned-inputs: True - name: xnn_qd8_f16_qc4w_gemm_minmax_ukernel_12x8c8__avx256vnni_prfm init: xnn_init_f16_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm k-block: 16 unsigned-inputs: True - name: xnn_qd8_f16_qc4w_gemm_minmax_ukernel_14x8c8__avx256vnni_prfm init: xnn_init_f16_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm k-block: 16 unsigned-inputs: True # AVX256 VNNI GFNI - name: xnn_qd8_f16_qc4w_gemm_minmax_ukernel_1x8c8__avx256vnnigfni init: xnn_init_f16_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni k-block: 16 unsigned-inputs: True - name: xnn_qd8_f16_qc4w_gemm_minmax_ukernel_5x8c8__avx256vnnigfni init: xnn_init_f16_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni k-block: 16 unsigned-inputs: True - name: xnn_qd8_f16_qc4w_gemm_minmax_ukernel_7x8c8__avx256vnnigfni init: xnn_init_f16_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni k-block: 16 unsigned-inputs: True - name: xnn_qd8_f16_qc4w_gemm_minmax_ukernel_8x8c8__avx256vnnigfni init: xnn_init_f16_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni k-block: 16 unsigned-inputs: True - name: xnn_qd8_f16_qc4w_gemm_minmax_ukernel_9x8c8__avx256vnnigfni init: xnn_init_f16_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni k-block: 16 unsigned-inputs: True - name: xnn_qd8_f16_qc4w_gemm_minmax_ukernel_10x8c8__avx256vnnigfni init: xnn_init_f16_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni k-block: 16 unsigned-inputs: True - name: xnn_qd8_f16_qc4w_gemm_minmax_ukernel_12x8c8__avx256vnnigfni init: xnn_init_f16_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni k-block: 16 unsigned-inputs: True - name: xnn_qd8_f16_qc4w_gemm_minmax_ukernel_14x8c8__avx256vnnigfni init: xnn_init_f16_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni k-block: 16 unsigned-inputs: True - name: xnn_qd8_f16_qc4w_gemm_minmax_ukernel_1x8c8__avx256vnnigfni_prfm init: xnn_init_f16_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm k-block: 16 unsigned-inputs: True - name: xnn_qd8_f16_qc4w_gemm_minmax_ukernel_5x8c8__avx256vnnigfni_prfm init: xnn_init_f16_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm k-block: 16 unsigned-inputs: True - name: xnn_qd8_f16_qc4w_gemm_minmax_ukernel_7x8c8__avx256vnnigfni_prfm init: xnn_init_f16_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm k-block: 16 unsigned-inputs: True - name: xnn_qd8_f16_qc4w_gemm_minmax_ukernel_8x8c8__avx256vnnigfni_prfm init: xnn_init_f16_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm k-block: 16 unsigned-inputs: True - name: xnn_qd8_f16_qc4w_gemm_minmax_ukernel_9x8c8__avx256vnnigfni_prfm init: xnn_init_f16_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm k-block: 16 unsigned-inputs: True - name: xnn_qd8_f16_qc4w_gemm_minmax_ukernel_10x8c8__avx256vnnigfni_prfm init: xnn_init_f16_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm k-block: 16 unsigned-inputs: True - name: xnn_qd8_f16_qc4w_gemm_minmax_ukernel_12x8c8__avx256vnnigfni_prfm init: xnn_init_f16_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm k-block: 16 unsigned-inputs: True - name: xnn_qd8_f16_qc4w_gemm_minmax_ukernel_14x8c8__avx256vnnigfni_prfm init: xnn_init_f16_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm k-block: 16 unsigned-inputs: True # AVXVNNI - name: xnn_qd8_f16_qc4w_gemm_minmax_ukernel_1x8c8__avxvnni init: xnn_init_f16_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avxvnni k-block: 16 unsigned-inputs: True - name: xnn_qd8_f16_qc4w_gemm_minmax_ukernel_2x8c8__avxvnni init: xnn_init_f16_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avxvnni k-block: 16 unsigned-inputs: True - name: xnn_qd8_f16_qc4w_gemm_minmax_ukernel_3x8c8__avxvnni init: xnn_init_f16_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avxvnni k-block: 16 unsigned-inputs: True - name: xnn_qd8_f16_qc4w_gemm_minmax_ukernel_4x8c8__avxvnni init: xnn_init_f16_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avxvnni k-block: 16 unsigned-inputs: True - name: xnn_qd8_f16_qc4w_gemm_minmax_ukernel_5x8c8__avxvnni init: xnn_init_f16_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avxvnni k-block: 16 unsigned-inputs: True - name: xnn_qd8_f16_qc4w_gemm_minmax_ukernel_6x8c8__avxvnni init: xnn_init_f16_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avxvnni k-block: 16 unsigned-inputs: True - name: xnn_qd8_f16_qc4w_gemm_minmax_ukernel_7x8c8__avxvnni init: xnn_init_f16_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avxvnni k-block: 16 unsigned-inputs: True - name: xnn_qd8_f16_qc4w_gemm_minmax_ukernel_8x8c8__avxvnni init: xnn_init_f16_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avxvnni k-block: 16 unsigned-inputs: True - name: xnn_qd8_f16_qc4w_gemm_minmax_ukernel_1x8c8__avxvnni_prfm init: xnn_init_f16_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avxvnni_prfm k-block: 16 unsigned-inputs: True - name: xnn_qd8_f16_qc4w_gemm_minmax_ukernel_2x8c8__avxvnni_prfm init: xnn_init_f16_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avxvnni_prfm k-block: 16 unsigned-inputs: True - name: xnn_qd8_f16_qc4w_gemm_minmax_ukernel_3x8c8__avxvnni_prfm init: xnn_init_f16_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avxvnni_prfm k-block: 16 unsigned-inputs: True - name: xnn_qd8_f16_qc4w_gemm_minmax_ukernel_4x8c8__avxvnni_prfm init: xnn_init_f16_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avxvnni_prfm k-block: 16 unsigned-inputs: True - name: xnn_qd8_f16_qc4w_gemm_minmax_ukernel_5x8c8__avxvnni_prfm init: xnn_init_f16_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avxvnni_prfm k-block: 16 unsigned-inputs: True - name: xnn_qd8_f16_qc4w_gemm_minmax_ukernel_6x8c8__avxvnni_prfm init: xnn_init_f16_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avxvnni_prfm k-block: 16 unsigned-inputs: True - name: xnn_qd8_f16_qc4w_gemm_minmax_ukernel_7x8c8__avxvnni_prfm init: xnn_init_f16_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avxvnni_prfm k-block: 16 unsigned-inputs: True - name: xnn_qd8_f16_qc4w_gemm_minmax_ukernel_8x8c8__avxvnni_prfm init: xnn_init_f16_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avxvnni_prfm k-block: 16 unsigned-inputs: True diff --git a/test/qd8-f32-qc4w-gemm-minmax-2.cc b/test/qd8-f32-qc4w-gemm-minmax-2.cc index 66bc1f6f935..3a211880190 100644 --- a/test/qd8-f32-qc4w-gemm-minmax-2.cc +++ b/test/qd8-f32-qc4w-gemm-minmax-2.cc @@ -1436,7 +1436,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_7x16c8__avx512vnni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni); }, []() { TEST_REQUIRES_X86_AVX512VNNI; @@ -1456,7 +1456,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_8x16c8__avx512vnni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni); }, []() { TEST_REQUIRES_X86_AVX512VNNI; @@ -1476,7 +1476,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_12x16c8__avx512vnni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni_prfm); }, []() { TEST_REQUIRES_X86_AVX512VNNI; @@ -1539,7 +1539,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_10x16c8__avx512vnnigfni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni); }, []() { TEST_REQUIRES_X86_AVX512VNNIGFNI; @@ -1559,7 +1559,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_12x16c8__avx512vnnigfni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni); }, []() { TEST_REQUIRES_X86_AVX512VNNIGFNI; @@ -1579,7 +1579,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_14x16c8__avx512vnnigfni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni); }, []() { TEST_REQUIRES_X86_AVX512VNNIGFNI; @@ -1599,7 +1599,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_5x16c8__avx512vnnigfni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni_prfm); }, []() { TEST_REQUIRES_X86_AVX512VNNIGFNI; @@ -1619,7 +1619,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_12x16c8__avx512vnnigfni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni_prfm); }, []() { TEST_REQUIRES_X86_AVX512VNNIGFNI; @@ -1642,7 +1642,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_8x8c8__avx256vnni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni); }, []() { TEST_REQUIRES_X86_AVX256VNNI; @@ -1665,7 +1665,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_5x8c8__avx256vnnigfni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni); }, []() { TEST_REQUIRES_X86_AVX256VNNIGFNI; @@ -1685,7 +1685,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_10x8c8__avx256vnnigfni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni); }, []() { TEST_REQUIRES_X86_AVX256VNNIGFNI; @@ -1705,7 +1705,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_8x8c8__avx256vnnigfni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm); }, []() { TEST_REQUIRES_X86_AVX256VNNIGFNI; @@ -1728,7 +1728,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_1x8c8__avxvnni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__scalar); }, []() { TEST_REQUIRES_X86_AVXVNNI; @@ -1748,7 +1748,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_6x8c8__avxvnni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__scalar); }, []() { TEST_REQUIRES_X86_AVXVNNI; @@ -1768,7 +1768,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_8x8c8__avxvnni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__scalar); }, []() { TEST_REQUIRES_X86_AVXVNNI; @@ -1788,7 +1788,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_1x8c8__avxvnni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__scalar); }, []() { TEST_REQUIRES_X86_AVXVNNI; diff --git a/test/qd8-f32-qc4w-gemm-minmax-3.cc b/test/qd8-f32-qc4w-gemm-minmax-3.cc index e712d97e868..0bb8b0f58b9 100644 --- a/test/qd8-f32-qc4w-gemm-minmax-3.cc +++ b/test/qd8-f32-qc4w-gemm-minmax-3.cc @@ -1313,7 +1313,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_5x16c8__avx512vnni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni); }, []() { TEST_REQUIRES_X86_AVX512VNNI; @@ -1333,7 +1333,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_14x16c8__avx512vnni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni); }, []() { TEST_REQUIRES_X86_AVX512VNNI; @@ -1353,7 +1353,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_7x16c8__avx512vnni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni_prfm); }, []() { TEST_REQUIRES_X86_AVX512VNNI; @@ -1373,7 +1373,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_8x16c8__avx512vnni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni_prfm); }, []() { TEST_REQUIRES_X86_AVX512VNNI; @@ -1393,7 +1393,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_9x16c8__avx512vnni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni_prfm); }, []() { TEST_REQUIRES_X86_AVX512VNNI; @@ -1576,7 +1576,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_9x16c8__avx512vnnigfni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni_prfm); }, []() { TEST_REQUIRES_X86_AVX512VNNIGFNI; @@ -1596,7 +1596,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_14x16c8__avx512vnnigfni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni_prfm); }, []() { TEST_REQUIRES_X86_AVX512VNNIGFNI; @@ -1619,7 +1619,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_9x8c8__avx256vnni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni); }, []() { TEST_REQUIRES_X86_AVX256VNNI; @@ -1639,7 +1639,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_10x8c8__avx256vnni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm); }, []() { TEST_REQUIRES_X86_AVX256VNNI; @@ -1659,7 +1659,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_12x8c8__avx256vnni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm); }, []() { TEST_REQUIRES_X86_AVX256VNNI; @@ -1679,7 +1679,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_14x8c8__avx256vnni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm); }, []() { TEST_REQUIRES_X86_AVX256VNNI; @@ -1702,7 +1702,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_1x8c8__avx256vnnigfni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni); }, []() { TEST_REQUIRES_X86_AVX256VNNIGFNI; @@ -1722,7 +1722,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_8x8c8__avx256vnnigfni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni); }, []() { TEST_REQUIRES_X86_AVX256VNNIGFNI; @@ -1742,7 +1742,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_14x8c8__avx256vnnigfni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni); }, []() { TEST_REQUIRES_X86_AVX256VNNIGFNI; @@ -1762,7 +1762,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_1x8c8__avx256vnnigfni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm); }, []() { TEST_REQUIRES_X86_AVX256VNNIGFNI; @@ -1782,7 +1782,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_9x8c8__avx256vnnigfni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm); }, []() { TEST_REQUIRES_X86_AVX256VNNIGFNI; @@ -1802,7 +1802,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_14x8c8__avx256vnnigfni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm); }, []() { TEST_REQUIRES_X86_AVX256VNNIGFNI; @@ -1825,7 +1825,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_2x8c8__avxvnni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__scalar); }, []() { TEST_REQUIRES_X86_AVXVNNI; @@ -1845,7 +1845,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_5x8c8__avxvnni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__scalar); }, []() { TEST_REQUIRES_X86_AVXVNNI; @@ -1865,7 +1865,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_6x8c8__avxvnni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__scalar); }, []() { TEST_REQUIRES_X86_AVXVNNI; @@ -1885,7 +1885,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_7x8c8__avxvnni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__scalar); }, []() { TEST_REQUIRES_X86_AVXVNNI; diff --git a/test/qd8-f32-qc4w-gemm-minmax-4.cc b/test/qd8-f32-qc4w-gemm-minmax-4.cc index 3c54e40a16b..e800590d552 100644 --- a/test/qd8-f32-qc4w-gemm-minmax-4.cc +++ b/test/qd8-f32-qc4w-gemm-minmax-4.cc @@ -1516,7 +1516,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_12x16c8__avx512vnni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni); }, []() { TEST_REQUIRES_X86_AVX512VNNI; @@ -1536,7 +1536,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_5x16c8__avx512vnni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni_prfm); }, []() { TEST_REQUIRES_X86_AVX512VNNI; @@ -1556,7 +1556,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_14x16c8__avx512vnni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni_prfm); }, []() { TEST_REQUIRES_X86_AVX512VNNI; @@ -1639,7 +1639,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_1x16c8__avx512vnnigfni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni); }, []() { TEST_REQUIRES_X86_AVX512VNNIGFNI; @@ -1659,7 +1659,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_5x16c8__avx512vnnigfni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni); }, []() { TEST_REQUIRES_X86_AVX512VNNIGFNI; @@ -1679,7 +1679,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_7x16c8__avx512vnnigfni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni); }, []() { TEST_REQUIRES_X86_AVX512VNNIGFNI; @@ -1699,7 +1699,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_8x16c8__avx512vnnigfni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni); }, []() { TEST_REQUIRES_X86_AVX512VNNIGFNI; @@ -1719,7 +1719,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_9x16c8__avx512vnnigfni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni); }, []() { TEST_REQUIRES_X86_AVX512VNNIGFNI; @@ -1739,7 +1739,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_7x16c8__avx512vnnigfni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni_prfm); }, []() { TEST_REQUIRES_X86_AVX512VNNIGFNI; @@ -1759,7 +1759,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_8x16c8__avx512vnnigfni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni_prfm); }, []() { TEST_REQUIRES_X86_AVX512VNNIGFNI; @@ -1779,7 +1779,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_10x16c8__avx512vnnigfni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni_prfm); }, []() { TEST_REQUIRES_X86_AVX512VNNIGFNI; @@ -1802,7 +1802,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_5x8c8__avx256vnni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni); }, []() { TEST_REQUIRES_X86_AVX256VNNI; @@ -1822,7 +1822,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_10x8c8__avx256vnni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni); }, []() { TEST_REQUIRES_X86_AVX256VNNI; @@ -1845,7 +1845,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_7x8c8__avx256vnnigfni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm); }, []() { TEST_REQUIRES_X86_AVX256VNNIGFNI; @@ -1865,7 +1865,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_12x8c8__avx256vnnigfni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm); }, []() { TEST_REQUIRES_X86_AVX256VNNIGFNI; @@ -1888,7 +1888,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_7x8c8__avxvnni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__scalar); }, []() { TEST_REQUIRES_X86_AVXVNNI; @@ -1908,7 +1908,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_4x8c8__avxvnni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__scalar); }, []() { TEST_REQUIRES_X86_AVXVNNI; @@ -1928,7 +1928,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_5x8c8__avxvnni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__scalar); }, []() { TEST_REQUIRES_X86_AVXVNNI; diff --git a/test/qd8-f32-qc4w-gemm-minmax.cc b/test/qd8-f32-qc4w-gemm-minmax.cc index 20de62f9810..866461aa8a6 100644 --- a/test/qd8-f32-qc4w-gemm-minmax.cc +++ b/test/qd8-f32-qc4w-gemm-minmax.cc @@ -1553,7 +1553,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_9x16c8__avx512vnni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni); }, []() { TEST_REQUIRES_X86_AVX512VNNI; @@ -1573,7 +1573,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_10x16c8__avx512vnni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni); }, []() { TEST_REQUIRES_X86_AVX512VNNI; @@ -1593,7 +1593,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_1x16c8__avx512vnni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni_prfm); }, []() { TEST_REQUIRES_X86_AVX512VNNI; @@ -1613,7 +1613,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_10x16c8__avx512vnni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni_prfm); }, []() { TEST_REQUIRES_X86_AVX512VNNI; @@ -1736,7 +1736,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_1x16c8__avx512vnnigfni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni_prfm); }, []() { TEST_REQUIRES_X86_AVX512VNNIGFNI; @@ -1759,7 +1759,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_1x8c8__avx256vnni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni); }, []() { TEST_REQUIRES_X86_AVX256VNNI; @@ -1779,7 +1779,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_7x8c8__avx256vnni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni); }, []() { TEST_REQUIRES_X86_AVX256VNNI; @@ -1799,7 +1799,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_12x8c8__avx256vnni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni); }, []() { TEST_REQUIRES_X86_AVX256VNNI; @@ -1819,7 +1819,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_14x8c8__avx256vnni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni); }, []() { TEST_REQUIRES_X86_AVX256VNNI; @@ -1839,7 +1839,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_1x8c8__avx256vnni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm); }, []() { TEST_REQUIRES_X86_AVX256VNNI; @@ -1859,7 +1859,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_5x8c8__avx256vnni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm); }, []() { TEST_REQUIRES_X86_AVX256VNNI; @@ -1879,7 +1879,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_7x8c8__avx256vnni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm); }, []() { TEST_REQUIRES_X86_AVX256VNNI; @@ -1899,7 +1899,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_8x8c8__avx256vnni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm); }, []() { TEST_REQUIRES_X86_AVX256VNNI; @@ -1919,7 +1919,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_9x8c8__avx256vnni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm); }, []() { TEST_REQUIRES_X86_AVX256VNNI; @@ -1942,7 +1942,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_7x8c8__avx256vnnigfni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni); }, []() { TEST_REQUIRES_X86_AVX256VNNIGFNI; @@ -1962,7 +1962,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_9x8c8__avx256vnnigfni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni); }, []() { TEST_REQUIRES_X86_AVX256VNNIGFNI; @@ -1982,7 +1982,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_12x8c8__avx256vnnigfni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni); }, []() { TEST_REQUIRES_X86_AVX256VNNIGFNI; @@ -2002,7 +2002,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_5x8c8__avx256vnnigfni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm); }, []() { TEST_REQUIRES_X86_AVX256VNNIGFNI; @@ -2022,7 +2022,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_10x8c8__avx256vnnigfni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm); }, []() { TEST_REQUIRES_X86_AVX256VNNIGFNI; @@ -2045,7 +2045,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_3x8c8__avxvnni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__scalar); }, []() { TEST_REQUIRES_X86_AVXVNNI; @@ -2065,7 +2065,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_4x8c8__avxvnni, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__scalar); }, []() { TEST_REQUIRES_X86_AVXVNNI; @@ -2085,7 +2085,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_2x8c8__avxvnni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__scalar); }, []() { TEST_REQUIRES_X86_AVXVNNI; @@ -2105,7 +2105,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_3x8c8__avxvnni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__scalar); }, []() { TEST_REQUIRES_X86_AVXVNNI; @@ -2125,7 +2125,7 @@ std::vector CreateTests1( [](GemmMicrokernelTester& tester) { tester.Test(xnn_qd8_f32_qc4w_gemm_minmax_ukernel_8x8c8__avxvnni_prfm, xnn_init_f32_qc4w_minmax_scalar_params, - xnn_pack_qs8_qc4w_gemm_goi_w); + xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__scalar); }, []() { TEST_REQUIRES_X86_AVXVNNI; diff --git a/test/qd8-f32-qc4w-gemm-minmax.yaml b/test/qd8-f32-qc4w-gemm-minmax.yaml index a3bdd7d7586..681ce849228 100644 --- a/test/qd8-f32-qc4w-gemm-minmax.yaml +++ b/test/qd8-f32-qc4w-gemm-minmax.yaml @@ -794,78 +794,78 @@ unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_5x16c8__avx512vnni init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_7x16c8__avx512vnni init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_8x16c8__avx512vnni init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_9x16c8__avx512vnni init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_10x16c8__avx512vnni init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_12x16c8__avx512vnni init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_14x16c8__avx512vnni init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_1x16c8__avx512vnni_prfm init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni_prfm k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_5x16c8__avx512vnni_prfm init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni_prfm k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_7x16c8__avx512vnni_prfm init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni_prfm k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_8x16c8__avx512vnni_prfm init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni_prfm k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_9x16c8__avx512vnni_prfm init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni_prfm k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_10x16c8__avx512vnni_prfm init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni_prfm k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_12x16c8__avx512vnni_prfm init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni_prfm k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_14x16c8__avx512vnni_prfm init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni_prfm k-block: 16 unsigned-inputs: True @@ -964,332 +964,332 @@ - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_1x16c8__avx512vnnigfni init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_5x16c8__avx512vnnigfni init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_7x16c8__avx512vnnigfni init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_8x16c8__avx512vnnigfni init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_9x16c8__avx512vnnigfni init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_10x16c8__avx512vnnigfni init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_12x16c8__avx512vnnigfni init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_14x16c8__avx512vnnigfni init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_1x16c8__avx512vnnigfni_prfm init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni_prfm k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_5x16c8__avx512vnnigfni_prfm init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni_prfm k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_7x16c8__avx512vnnigfni_prfm init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni_prfm k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_8x16c8__avx512vnnigfni_prfm init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni_prfm k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_9x16c8__avx512vnnigfni_prfm init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni_prfm k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_10x16c8__avx512vnnigfni_prfm init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni_prfm k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_12x16c8__avx512vnnigfni_prfm init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni_prfm k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_14x16c8__avx512vnnigfni_prfm init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x16c8__avx256vnni_prfm k-block: 16 unsigned-inputs: True # AVX256 VNNI - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_1x8c8__avx256vnni init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_5x8c8__avx256vnni init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_7x8c8__avx256vnni init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_8x8c8__avx256vnni init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_9x8c8__avx256vnni init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_10x8c8__avx256vnni init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_12x8c8__avx256vnni init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_14x8c8__avx256vnni init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_1x8c8__avx256vnni_prfm init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_5x8c8__avx256vnni_prfm init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_7x8c8__avx256vnni_prfm init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_8x8c8__avx256vnni_prfm init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_9x8c8__avx256vnni_prfm init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_10x8c8__avx256vnni_prfm init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_12x8c8__avx256vnni_prfm init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_14x8c8__avx256vnni_prfm init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm k-block: 16 unsigned-inputs: True # AVX256 VNNI GFNI - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_1x8c8__avx256vnnigfni init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_5x8c8__avx256vnnigfni init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_7x8c8__avx256vnnigfni init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_8x8c8__avx256vnnigfni init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_9x8c8__avx256vnnigfni init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_10x8c8__avx256vnnigfni init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_12x8c8__avx256vnnigfni init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_14x8c8__avx256vnnigfni init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_1x8c8__avx256vnnigfni_prfm init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_5x8c8__avx256vnnigfni_prfm init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_7x8c8__avx256vnnigfni_prfm init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_8x8c8__avx256vnnigfni_prfm init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_9x8c8__avx256vnnigfni_prfm init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_10x8c8__avx256vnnigfni_prfm init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_12x8c8__avx256vnnigfni_prfm init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_14x8c8__avx256vnnigfni_prfm init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__avx256vnni_prfm k-block: 16 unsigned-inputs: True # AVXVNNI - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_1x8c8__avxvnni init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__scalar k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_2x8c8__avxvnni init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__scalar k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_3x8c8__avxvnni init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__scalar k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_4x8c8__avxvnni init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__scalar k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_5x8c8__avxvnni init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__scalar k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_6x8c8__avxvnni init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__scalar k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_7x8c8__avxvnni init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__scalar k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_8x8c8__avxvnni init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__scalar k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_1x8c8__avxvnni_prfm init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__scalar k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_2x8c8__avxvnni_prfm init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__scalar k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_3x8c8__avxvnni_prfm init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__scalar k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_4x8c8__avxvnni_prfm init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__scalar k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_5x8c8__avxvnni_prfm init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__scalar k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_6x8c8__avxvnni_prfm init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__scalar k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_7x8c8__avxvnni_prfm init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__scalar k-block: 16 unsigned-inputs: True - name: xnn_qd8_f32_qc4w_gemm_minmax_ukernel_8x8c8__avxvnni_prfm init: xnn_init_f32_qc4w_minmax_scalar_params - pack: xnn_pack_qs8_qc4w_gemm_goi_w + pack: xnn_qs8_qc4w_packw_gemm_goi_ukernel_x8c8__scalar k-block: 16 unsigned-inputs: True