diff --git a/BUILD.bazel b/BUILD.bazel index 39c2847d453..d801e12d84b 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -62,6 +62,7 @@ MICROKERNEL_DEFS = [ "src/f16-dwconv/f16-dwconv-minmax-multipass.h", "src/f16-dwconv/f16-dwconv-minmax-unipass.h", "src/f16-f32-vcvt/f16-f32-vcvt.h", + "src/f16-ibilinear/f16-ibilinear.h", "src/f16-maxpool/f16-maxpool-minmax.h", "src/f16-pavgpool/f16-pavgpool-minmax.h", "src/f16-qu8-vcvt/f16-qu8-vcvt.h", @@ -108,6 +109,7 @@ MICROKERNEL_DEFS = [ "src/f32-dwconv/f32-dwconv-multipass.h", "src/f32-dwconv/f32-dwconv-unipass.h", "src/f32-f16-vcvt/f32-f16-vcvt.h", + "src/f32-ibilinear/f32-ibilinear.h", "src/f32-maxpool/f32-maxpool-minmax.h", "src/f32-pavgpool/f32-pavgpool-minmax.h", "src/f32-qs8-vcvt/f32-qs8-vcvt.h", @@ -190,8 +192,10 @@ MICROKERNEL_DEFS = [ "src/qu8-vmul/qu8-vmul-minmax-rndnu.h", "src/qu8-vmulc/qu8-vmulc-minmax-fp32.h", "src/qu8-vmulc/qu8-vmulc-minmax-rndnu.h", + "src/s8-ibilinear/s8-ibilinear.h", "src/s8-maxpool/s8-maxpool-minmax.h", "src/s8-vclamp/s8-vclamp.h", + "src/u8-ibilinear/u8-ibilinear.h", "src/u8-maxpool/u8-maxpool-minmax.h", "src/u8-vclamp/u8-vclamp.h", "src/xx-fill/xx-fill.h", diff --git a/scripts/generate-tests.sh b/scripts/generate-tests.sh index 58e6de3133f..d84c2a3580b 100755 --- a/scripts/generate-tests.sh +++ b/scripts/generate-tests.sh @@ -236,10 +236,10 @@ tools/generate-vunary-test.py --ukernel f16-vhswish --output test/f16-vhswish.cc tools/generate-vunary-test.py --ukernel f32-vhswish --output test/f32-vhswish.cc & ### Tests for IBilinear micro-kernels -tools/generate-ibilinear-test.py --spec test/f16-ibilinear.yaml --output test/f16-ibilinear.cc & -tools/generate-ibilinear-test.py --spec test/f32-ibilinear.yaml --output test/f32-ibilinear.cc & -tools/generate-ibilinear-test.py --spec test/s8-ibilinear.yaml --output test/s8-ibilinear.cc & -tools/generate-ibilinear-test.py --spec test/u8-ibilinear.yaml --output test/u8-ibilinear.cc & +tools/generate-ibilinear-test.py --tester IBilinearMicrokernelTester --ukernel f16-ibilinear --output test/f16-ibilinear.cc & +tools/generate-ibilinear-test.py --tester IBilinearMicrokernelTester --ukernel f32-ibilinear --output test/f32-ibilinear.cc & +tools/generate-ibilinear-test.py --tester IBilinearMicrokernelTester --ukernel s8-ibilinear --output test/s8-ibilinear.cc & +tools/generate-ibilinear-test.py --tester IBilinearMicrokernelTester --ukernel u8-ibilinear --output test/u8-ibilinear.cc & ### Tests for IBilinear CHW layout micro-kernels tools/generate-ibilinear-chw-test.py --spec test/f16-ibilinear-chw.yaml --output test/f16-ibilinear-chw.cc & diff --git a/src/f16-ibilinear/f16-ibilinear.h b/src/f16-ibilinear/f16-ibilinear.h new file mode 100644 index 00000000000..73a8101cc22 --- /dev/null +++ b/src/f16-ibilinear/f16-ibilinear.h @@ -0,0 +1,35 @@ +// Copyright 2023 Google LLC +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. + +#ifndef XNN_UKERNEL_WITH_PARAMS +#define XNN_UKERNEL_WITH_PARAMS(arch_flags, ukernel, channel_tile, pixel_tile, datatype, weight_type, params_type, init_params) \ + XNN_UKERNEL(arch_flags, ukernel, channel_tile, pixel_tile, datatype, weight_type, params_type, init_params) +#define XNN_DEFINED_UKERNEL_WITH_PARAMS +#endif + +#ifndef XNN_UKERNEL +#define XNN_UKERNEL(arch_flags, ukernel, channel_tile, pixel_tile, datatype, weight_type, params_type, init_params) \ + XNN_UKERNEL_WITH_PARAMS(arch_flags, ukernel, channel_tile, pixel_tile, datatype, weight_type, void, /*init_params=*/nullptr) +#define XNN_DEFINED_UKERNEL +#endif + +#if XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm_fp16_arith, xnn_f16_ibilinear_ukernel__neonfp16arith_c8, 8, 1, xnn_float16, xnn_float16, struct xnn_f16_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm_fp16_arith, xnn_f16_ibilinear_ukernel__neonfp16arith_c16, 16, 1, xnn_float16, xnn_float16, struct xnn_f16_default_params, NULL) +#endif // XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64) + +#if XNN_ARCH_X86 || XNN_ARCH_X86_64 +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_fma3, xnn_f16_ibilinear_ukernel__fma3_c8, 8, 1, xnn_float16, xnn_float16, struct xnn_f16_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_fma3, xnn_f16_ibilinear_ukernel__fma3_c16, 16, 1, xnn_float16, xnn_float16, struct xnn_f16_default_params, NULL) +#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 + +#ifdef XNN_DEFINED_UKERNEL_WITH_PARAMS +#undef XNN_DEFINED_UKERNEL_WITH_PARAMS +#undef XNN_UKERNEL_WITH_PARAMS +#endif +#ifdef XNN_DEFINED_UKERNEL +#undef XNN_DEFINED_UKERNEL +#undef XNN_UKERNEL +#endif diff --git a/src/f32-ibilinear/f32-ibilinear.h b/src/f32-ibilinear/f32-ibilinear.h new file mode 100644 index 00000000000..fa0a7db6583 --- /dev/null +++ b/src/f32-ibilinear/f32-ibilinear.h @@ -0,0 +1,49 @@ +// Copyright 2023 Google LLC +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. + +#ifndef XNN_UKERNEL_WITH_PARAMS +#define XNN_UKERNEL_WITH_PARAMS(arch_flags, ukernel, channel_tile, pixel_tile, datatype, weight_type, params_type, init_params) \ + XNN_UKERNEL(arch_flags, ukernel, channel_tile, pixel_tile, datatype, weight_type, params_type, init_params) +#define XNN_DEFINED_UKERNEL_WITH_PARAMS +#endif + +#ifndef XNN_UKERNEL +#define XNN_UKERNEL(arch_flags, ukernel, channel_tile, pixel_tile, datatype, weight_type, params_type, init_params) \ + XNN_UKERNEL_WITH_PARAMS(arch_flags, ukernel, channel_tile, pixel_tile, datatype, weight_type, void, /*init_params=*/nullptr) +#define XNN_DEFINED_UKERNEL +#endif + +XNN_UKERNEL_WITH_PARAMS(0, xnn_f32_ibilinear_ukernel__scalar_c1, 1, 1, float, float, void, nullptr) +XNN_UKERNEL_WITH_PARAMS(0, xnn_f32_ibilinear_ukernel__scalar_c2, 2, 1, float, float, void, nullptr) +XNN_UKERNEL_WITH_PARAMS(0, xnn_f32_ibilinear_ukernel__scalar_c4, 4, 1, float, float, void, nullptr) + +#if (XNN_ARCH_ARM || XNN_ARCH_ARM64) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm_neon, xnn_f32_ibilinear_ukernel__neon_c4, 4, 1, float, float, struct xnn_f32_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm_neon, xnn_f32_ibilinear_ukernel__neon_c8, 8, 1, float, float, struct xnn_f32_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm_neon_fma, xnn_f32_ibilinear_ukernel__neonfma_c4, 4, 1, float, float, struct xnn_f32_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm_neon_fma, xnn_f32_ibilinear_ukernel__neonfma_c8, 8, 1, float, float, struct xnn_f32_default_params, NULL) +#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 + +#if XNN_ARCH_X86 || XNN_ARCH_X86_64 +XNN_UKERNEL_WITH_PARAMS(0, xnn_f32_ibilinear_ukernel__sse_c4, 4, 1, float, float, struct xnn_f32_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(0, xnn_f32_ibilinear_ukernel__sse_c8, 8, 1, float, float, struct xnn_f32_default_params, NULL) +#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 + +#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD +XNN_UKERNEL_WITH_PARAMS(0, xnn_f32_ibilinear_ukernel__wasmsimd_c4, 4, 1, float, float, struct xnn_f32_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(0, xnn_f32_ibilinear_ukernel__wasmsimd_c8, 8, 1, float, float, struct xnn_f32_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(0, xnn_f32_ibilinear_ukernel__wasmrelaxedsimd_c4, 4, 1, float, float, struct xnn_f32_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(0, xnn_f32_ibilinear_ukernel__wasmrelaxedsimd_c8, 8, 1, float, float, struct xnn_f32_default_params, NULL) +#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD + +#ifdef XNN_DEFINED_UKERNEL_WITH_PARAMS +#undef XNN_DEFINED_UKERNEL_WITH_PARAMS +#undef XNN_UKERNEL_WITH_PARAMS +#endif + +#ifdef XNN_DEFINED_UKERNEL +#undef XNN_DEFINED_UKERNEL +#undef XNN_UKERNEL +#endif diff --git a/src/s8-ibilinear/s8-ibilinear.h b/src/s8-ibilinear/s8-ibilinear.h new file mode 100644 index 00000000000..6fa8e470210 --- /dev/null +++ b/src/s8-ibilinear/s8-ibilinear.h @@ -0,0 +1,49 @@ +// Copyright 2023 Google LLC +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. + +#ifndef XNN_UKERNEL_WITH_PARAMS +#define XNN_UKERNEL_WITH_PARAMS(arch_flags, ukernel, channel_tile, pixel_tile, datatype, weight_type, params_type, init_params) \ + XNN_UKERNEL(arch_flags, ukernel, channel_tile, pixel_tile, datatype, weight_type, params_type, init_params) +#define XNN_DEFINED_UKERNEL_WITH_PARAMS +#endif + +#ifndef XNN_UKERNEL +#define XNN_UKERNEL(arch_flags, ukernel, channel_tile, pixel_tile, datatype, weight_type, params_type, init_params) \ + XNN_UKERNEL_WITH_PARAMS(arch_flags, ukernel, channel_tile, pixel_tile, datatype, weight_type, void, /*init_params=*/nullptr) +#define XNN_DEFINED_UKERNEL +#endif + +XNN_UKERNEL_WITH_PARAMS(0, xnn_s8_ibilinear_ukernel__scalar_c1, 1, 1, int8_t, int16_t, void, nullptr) +XNN_UKERNEL_WITH_PARAMS(0, xnn_s8_ibilinear_ukernel__scalar_c2, 2, 1, int8_t, int16_t, void, nullptr) +XNN_UKERNEL_WITH_PARAMS(0, xnn_s8_ibilinear_ukernel__scalar_c4, 4, 1, int8_t, int16_t, void, nullptr) + +#if XNN_ARCH_ARM || XNN_ARCH_ARM64 +XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm_neon, xnn_s8_ibilinear_ukernel__neon_c8, 8, 1, int8_t, int16_t, struct xnn_s8_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm_neon, xnn_s8_ibilinear_ukernel__neon_c16, 16, 1, int8_t, int16_t, struct xnn_s8_default_params, NULL) +#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 + +#if XNN_ARCH_X86 || XNN_ARCH_X86_64 +XNN_UKERNEL_WITH_PARAMS(0, xnn_s8_ibilinear_ukernel__sse2_c8, 8, 1, int8_t, int16_t, struct xnn_s8_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(0, xnn_s8_ibilinear_ukernel__sse2_c16, 16, 1, int8_t, int16_t, struct xnn_s8_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_sse4_1, xnn_s8_ibilinear_ukernel__sse41_c8, 8, 1, int8_t, int16_t, struct xnn_s8_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_sse4_1, xnn_s8_ibilinear_ukernel__sse41_c16, 16, 1, int8_t, int16_t, struct xnn_s8_default_params, NULL) +#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 + +#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD +XNN_UKERNEL_WITH_PARAMS(0, xnn_s8_ibilinear_ukernel__wasmsimd_dot16x2_c8, 8, 1, int8_t, int16_t, struct xnn_s8_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(0, xnn_s8_ibilinear_ukernel__wasmsimd_dot16x2_c16, 16, 1, int8_t, int16_t, struct xnn_s8_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(0, xnn_s8_ibilinear_ukernel__wasmsimd_mul32_c8, 8, 1, int8_t, int16_t, struct xnn_s8_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(0, xnn_s8_ibilinear_ukernel__wasmsimd_mul32_c16, 16, 1, int8_t, int16_t, struct xnn_s8_default_params, NULL) +#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD + +#ifdef XNN_DEFINED_UKERNEL_WITH_PARAMS +#undef XNN_DEFINED_UKERNEL_WITH_PARAMS +#undef XNN_UKERNEL_WITH_PARAMS +#endif + +#ifdef XNN_DEFINED_UKERNEL +#undef XNN_DEFINED_UKERNEL +#undef XNN_UKERNEL +#endif diff --git a/src/u8-ibilinear/u8-ibilinear.h b/src/u8-ibilinear/u8-ibilinear.h new file mode 100644 index 00000000000..e0ce346598b --- /dev/null +++ b/src/u8-ibilinear/u8-ibilinear.h @@ -0,0 +1,49 @@ +// Copyright 2023 Google LLC +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. + +#ifndef XNN_UKERNEL_WITH_PARAMS +#define XNN_UKERNEL_WITH_PARAMS(arch_flags, ukernel, channel_tile, pixel_tile, datatype, weight_type, params_type, init_params) \ + XNN_UKERNEL(arch_flags, ukernel, channel_tile, pixel_tile, datatype, weight_type, params_type, init_params) +#define XNN_DEFINED_UKERNEL_WITH_PARAMS +#endif + +#ifndef XNN_UKERNEL +#define XNN_UKERNEL(arch_flags, ukernel, channel_tile, pixel_tile, datatype, weight_type, params_type, init_params) \ + XNN_UKERNEL_WITH_PARAMS(arch_flags, ukernel, channel_tile, pixel_tile, datatype, weight_type, void, /*init_params=*/nullptr) +#define XNN_DEFINED_UKERNEL +#endif + +XNN_UKERNEL_WITH_PARAMS(0, xnn_u8_ibilinear_ukernel__scalar_c1, 1, 1, uint8_t, int16_t, void, nullptr) +XNN_UKERNEL_WITH_PARAMS(0, xnn_u8_ibilinear_ukernel__scalar_c2, 2, 1, uint8_t, int16_t, void, nullptr) +XNN_UKERNEL_WITH_PARAMS(0, xnn_u8_ibilinear_ukernel__scalar_c4, 4, 1, uint8_t, int16_t, void, nullptr) + +#if XNN_ARCH_ARM || XNN_ARCH_ARM64 +XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm_neon, xnn_u8_ibilinear_ukernel__neon_c8, 8, 1, uint8_t, int16_t, struct xnn_u8_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm_neon, xnn_u8_ibilinear_ukernel__neon_c16, 16, 1, uint8_t, int16_t, struct xnn_u8_default_params, NULL) +#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 + +#if XNN_ARCH_X86 || XNN_ARCH_X86_64 +XNN_UKERNEL_WITH_PARAMS(0, xnn_u8_ibilinear_ukernel__sse2_c8, 8, 1, uint8_t, int16_t, struct xnn_u8_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(0, xnn_u8_ibilinear_ukernel__sse2_c16, 16, 1, uint8_t, int16_t, struct xnn_u8_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_sse4_1, xnn_u8_ibilinear_ukernel__sse41_c8, 8, 1, uint8_t, int16_t, struct xnn_u8_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86_sse4_1, xnn_u8_ibilinear_ukernel__sse41_c16, 16, 1, uint8_t, int16_t, struct xnn_u8_default_params, NULL) +#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 + +#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD +XNN_UKERNEL_WITH_PARAMS(0, xnn_u8_ibilinear_ukernel__wasmsimd_dot16x2_c8, 8, 1, uint8_t, int16_t, struct xnn_u8_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(0, xnn_u8_ibilinear_ukernel__wasmsimd_dot16x2_c16, 16, 1, uint8_t, int16_t, struct xnn_u8_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(0, xnn_u8_ibilinear_ukernel__wasmsimd_mul32_c8, 8, 1, uint8_t, int16_t, struct xnn_u8_default_params, NULL) +XNN_UKERNEL_WITH_PARAMS(0, xnn_u8_ibilinear_ukernel__wasmsimd_mul32_c16, 16, 1, uint8_t, int16_t, struct xnn_u8_default_params, NULL) +#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD + +#ifdef XNN_DEFINED_UKERNEL_WITH_PARAMS +#undef XNN_DEFINED_UKERNEL_WITH_PARAMS +#undef XNN_UKERNEL_WITH_PARAMS +#endif + +#ifdef XNN_DEFINED_UKERNEL +#undef XNN_DEFINED_UKERNEL +#undef XNN_UKERNEL +#endif diff --git a/src/xnnpack/ibilinear.h b/src/xnnpack/ibilinear.h index 5034917e28f..9dc5ba21aa4 100644 --- a/src/xnnpack/ibilinear.h +++ b/src/xnnpack/ibilinear.h @@ -16,111 +16,20 @@ extern "C" { #endif -#define DECLARE_F16_IBILINEAR_UKERNEL_FUNCTION(fn_name) \ - XNN_INTERNAL void fn_name( \ - size_t output_pixels, \ - size_t channels, \ - const xnn_float16** input, \ - size_t input_offset, \ - const xnn_float16* weights, \ - xnn_float16* output, \ - size_t output_increment); - -DECLARE_F16_IBILINEAR_UKERNEL_FUNCTION(xnn_f16_ibilinear_ukernel__fma3_c8) -DECLARE_F16_IBILINEAR_UKERNEL_FUNCTION(xnn_f16_ibilinear_ukernel__fma3_c16) - -DECLARE_F16_IBILINEAR_UKERNEL_FUNCTION(xnn_f16_ibilinear_ukernel__neonfp16arith_c8) -DECLARE_F16_IBILINEAR_UKERNEL_FUNCTION(xnn_f16_ibilinear_ukernel__neonfp16arith_c16) - - -#define DECLARE_F32_IBILINEAR_UKERNEL_FUNCTION(fn_name) \ - XNN_INTERNAL void fn_name( \ - size_t output_pixels, \ - size_t channels, \ - const float** input, \ - size_t input_offset, \ - const float* weights, \ - float* output, \ - size_t output_increment); - -DECLARE_F32_IBILINEAR_UKERNEL_FUNCTION(xnn_f32_ibilinear_ukernel__scalar_c1) -DECLARE_F32_IBILINEAR_UKERNEL_FUNCTION(xnn_f32_ibilinear_ukernel__scalar_c2) -DECLARE_F32_IBILINEAR_UKERNEL_FUNCTION(xnn_f32_ibilinear_ukernel__scalar_c4) - -DECLARE_F32_IBILINEAR_UKERNEL_FUNCTION(xnn_f32_ibilinear_ukernel__neon_c4) -DECLARE_F32_IBILINEAR_UKERNEL_FUNCTION(xnn_f32_ibilinear_ukernel__neon_c8) - -DECLARE_F32_IBILINEAR_UKERNEL_FUNCTION(xnn_f32_ibilinear_ukernel__neonfma_c4) -DECLARE_F32_IBILINEAR_UKERNEL_FUNCTION(xnn_f32_ibilinear_ukernel__neonfma_c8) - -DECLARE_F32_IBILINEAR_UKERNEL_FUNCTION(xnn_f32_ibilinear_ukernel__sse_c4) -DECLARE_F32_IBILINEAR_UKERNEL_FUNCTION(xnn_f32_ibilinear_ukernel__sse_c8) - -DECLARE_F32_IBILINEAR_UKERNEL_FUNCTION(xnn_f32_ibilinear_ukernel__wasmsimd_c4) -DECLARE_F32_IBILINEAR_UKERNEL_FUNCTION(xnn_f32_ibilinear_ukernel__wasmsimd_c8) - -DECLARE_F32_IBILINEAR_UKERNEL_FUNCTION(xnn_f32_ibilinear_ukernel__wasmrelaxedsimd_c4) -DECLARE_F32_IBILINEAR_UKERNEL_FUNCTION(xnn_f32_ibilinear_ukernel__wasmrelaxedsimd_c8) - - -#define DECLARE_S8_IBILINEAR_UKERNEL_FUNCTION(fn_name) \ +#define XNN_UKERNEL(arch_flags, fn_name, channel_tile, pixel_tile, datatype, weight_type, params_type, init_params) \ XNN_INTERNAL void fn_name( \ size_t output_pixels, \ size_t channels, \ - const int8_t** input, \ + const datatype** input, \ size_t input_offset, \ - const int16_t* weights, \ - int8_t* output, \ + const weight_type* weights, \ + datatype* output, \ size_t output_increment); - -DECLARE_S8_IBILINEAR_UKERNEL_FUNCTION(xnn_s8_ibilinear_ukernel__scalar_c1) -DECLARE_S8_IBILINEAR_UKERNEL_FUNCTION(xnn_s8_ibilinear_ukernel__scalar_c2) -DECLARE_S8_IBILINEAR_UKERNEL_FUNCTION(xnn_s8_ibilinear_ukernel__scalar_c4) - -DECLARE_S8_IBILINEAR_UKERNEL_FUNCTION(xnn_s8_ibilinear_ukernel__neon_c8) -DECLARE_S8_IBILINEAR_UKERNEL_FUNCTION(xnn_s8_ibilinear_ukernel__neon_c16) - -DECLARE_S8_IBILINEAR_UKERNEL_FUNCTION(xnn_s8_ibilinear_ukernel__sse2_c8) -DECLARE_S8_IBILINEAR_UKERNEL_FUNCTION(xnn_s8_ibilinear_ukernel__sse2_c16) - -DECLARE_S8_IBILINEAR_UKERNEL_FUNCTION(xnn_s8_ibilinear_ukernel__sse41_c8) -DECLARE_S8_IBILINEAR_UKERNEL_FUNCTION(xnn_s8_ibilinear_ukernel__sse41_c16) - -DECLARE_S8_IBILINEAR_UKERNEL_FUNCTION(xnn_s8_ibilinear_ukernel__wasmsimd_dot16x2_c8) -DECLARE_S8_IBILINEAR_UKERNEL_FUNCTION(xnn_s8_ibilinear_ukernel__wasmsimd_dot16x2_c16) - -DECLARE_S8_IBILINEAR_UKERNEL_FUNCTION(xnn_s8_ibilinear_ukernel__wasmsimd_mul32_c8) -DECLARE_S8_IBILINEAR_UKERNEL_FUNCTION(xnn_s8_ibilinear_ukernel__wasmsimd_mul32_c16) - -#define DECLARE_U8_IBILINEAR_UKERNEL_FUNCTION(fn_name) \ - XNN_INTERNAL void fn_name( \ - size_t output_pixels, \ - size_t channels, \ - const uint8_t** input, \ - size_t input_offset, \ - const int16_t* weights, \ - uint8_t* output, \ - size_t output_increment); - - -DECLARE_U8_IBILINEAR_UKERNEL_FUNCTION(xnn_u8_ibilinear_ukernel__scalar_c1) -DECLARE_U8_IBILINEAR_UKERNEL_FUNCTION(xnn_u8_ibilinear_ukernel__scalar_c2) -DECLARE_U8_IBILINEAR_UKERNEL_FUNCTION(xnn_u8_ibilinear_ukernel__scalar_c4) - -DECLARE_U8_IBILINEAR_UKERNEL_FUNCTION(xnn_u8_ibilinear_ukernel__neon_c8) -DECLARE_U8_IBILINEAR_UKERNEL_FUNCTION(xnn_u8_ibilinear_ukernel__neon_c16) - -DECLARE_U8_IBILINEAR_UKERNEL_FUNCTION(xnn_u8_ibilinear_ukernel__sse2_c8) -DECLARE_U8_IBILINEAR_UKERNEL_FUNCTION(xnn_u8_ibilinear_ukernel__sse2_c16) - -DECLARE_U8_IBILINEAR_UKERNEL_FUNCTION(xnn_u8_ibilinear_ukernel__sse41_c8) -DECLARE_U8_IBILINEAR_UKERNEL_FUNCTION(xnn_u8_ibilinear_ukernel__sse41_c16) - -DECLARE_U8_IBILINEAR_UKERNEL_FUNCTION(xnn_u8_ibilinear_ukernel__wasmsimd_dot16x2_c8) -DECLARE_U8_IBILINEAR_UKERNEL_FUNCTION(xnn_u8_ibilinear_ukernel__wasmsimd_dot16x2_c16) - -DECLARE_U8_IBILINEAR_UKERNEL_FUNCTION(xnn_u8_ibilinear_ukernel__wasmsimd_mul32_c8) -DECLARE_U8_IBILINEAR_UKERNEL_FUNCTION(xnn_u8_ibilinear_ukernel__wasmsimd_mul32_c16) +#include "f16-ibilinear/f16-ibilinear.h" +#include "f32-ibilinear/f32-ibilinear.h" +#include "s8-ibilinear/s8-ibilinear.h" +#include "u8-ibilinear/u8-ibilinear.h" +#undef XNN_UKERNEL #define DECLARE_F32_IBILINEAR_CHW_UKERNEL_FUNCTION(fn_name) \ XNN_INTERNAL void fn_name( \ diff --git a/test/f16-ibilinear.cc b/test/f16-ibilinear.cc index b5039dbbcc1..9c9bc976d4f 100644 --- a/test/f16-ibilinear.cc +++ b/test/f16-ibilinear.cc @@ -4,7 +4,7 @@ // LICENSE file in the root directory of this source tree. // // Auto-generated file. Do not edit! -// Specification: test/f16-ibilinear.yaml +// Microkernel: f16-ibilinear // Generator: tools/generate-ibilinear-test.py @@ -13,319 +13,14 @@ #include "xnnpack/ibilinear.h" #include "xnnpack/isa-checks.h" #include "ibilinear-microkernel-tester.h" - - -#if XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64) - TEST(F16_IBILINEAR__NEONFP16ARITH_C8, channels_eq_8) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - IBilinearMicrokernelTester() - .pixels(1) - .channels(8) - .Test(xnn_f16_ibilinear_ukernel__neonfp16arith_c8); - } - - TEST(F16_IBILINEAR__NEONFP16ARITH_C8, channels_div_8) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t channels = 16; channels < 80; channels += 8) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_f16_ibilinear_ukernel__neonfp16arith_c8); - } - } - - TEST(F16_IBILINEAR__NEONFP16ARITH_C8, channels_lt_8) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t channels = 1; channels < 8; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_f16_ibilinear_ukernel__neonfp16arith_c8); - } - } - - TEST(F16_IBILINEAR__NEONFP16ARITH_C8, channels_gt_8) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t channels = 9; channels < 16; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_f16_ibilinear_ukernel__neonfp16arith_c8); - } - } - - TEST(F16_IBILINEAR__NEONFP16ARITH_C8, pixels_gt_1) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t pixels = 2; pixels < 3; pixels++) { - for (size_t channels = 1; channels <= 40; channels += 7) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .Test(xnn_f16_ibilinear_ukernel__neonfp16arith_c8); - } - } - } - - TEST(F16_IBILINEAR__NEONFP16ARITH_C8, input_offset) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .input_offset(43) - .Test(xnn_f16_ibilinear_ukernel__neonfp16arith_c8); - } - } - } - - TEST(F16_IBILINEAR__NEONFP16ARITH_C8, output_stride) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .output_stride(43) - .Test(xnn_f16_ibilinear_ukernel__neonfp16arith_c8); - } - } - } -#endif // XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64) - - -#if XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64) - TEST(F16_IBILINEAR__NEONFP16ARITH_C16, channels_eq_16) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - IBilinearMicrokernelTester() - .pixels(1) - .channels(16) - .Test(xnn_f16_ibilinear_ukernel__neonfp16arith_c16); - } - - TEST(F16_IBILINEAR__NEONFP16ARITH_C16, channels_div_16) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t channels = 32; channels < 160; channels += 16) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_f16_ibilinear_ukernel__neonfp16arith_c16); - } - } - - TEST(F16_IBILINEAR__NEONFP16ARITH_C16, channels_lt_16) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t channels = 1; channels < 16; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_f16_ibilinear_ukernel__neonfp16arith_c16); - } - } - - TEST(F16_IBILINEAR__NEONFP16ARITH_C16, channels_gt_16) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t channels = 17; channels < 32; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_f16_ibilinear_ukernel__neonfp16arith_c16); - } - } - - TEST(F16_IBILINEAR__NEONFP16ARITH_C16, pixels_gt_1) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t pixels = 2; pixels < 3; pixels++) { - for (size_t channels = 1; channels <= 80; channels += 15) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .Test(xnn_f16_ibilinear_ukernel__neonfp16arith_c16); - } - } - } - - TEST(F16_IBILINEAR__NEONFP16ARITH_C16, input_offset) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 80; channels += 15) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .input_offset(83) - .Test(xnn_f16_ibilinear_ukernel__neonfp16arith_c16); - } - } - } - - TEST(F16_IBILINEAR__NEONFP16ARITH_C16, output_stride) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 80; channels += 15) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .output_stride(83) - .Test(xnn_f16_ibilinear_ukernel__neonfp16arith_c16); - } - } - } -#endif // XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64) - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(F16_IBILINEAR__FMA3_C8, channels_eq_8) { - TEST_REQUIRES_X86_FMA3; - IBilinearMicrokernelTester() - .pixels(1) - .channels(8) - .Test(xnn_f16_ibilinear_ukernel__fma3_c8); - } - - TEST(F16_IBILINEAR__FMA3_C8, channels_div_8) { - TEST_REQUIRES_X86_FMA3; - for (size_t channels = 16; channels < 80; channels += 8) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_f16_ibilinear_ukernel__fma3_c8); - } - } - - TEST(F16_IBILINEAR__FMA3_C8, channels_lt_8) { - TEST_REQUIRES_X86_FMA3; - for (size_t channels = 1; channels < 8; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_f16_ibilinear_ukernel__fma3_c8); - } - } - - TEST(F16_IBILINEAR__FMA3_C8, channels_gt_8) { - TEST_REQUIRES_X86_FMA3; - for (size_t channels = 9; channels < 16; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_f16_ibilinear_ukernel__fma3_c8); - } - } - - TEST(F16_IBILINEAR__FMA3_C8, pixels_gt_1) { - TEST_REQUIRES_X86_FMA3; - for (size_t pixels = 2; pixels < 3; pixels++) { - for (size_t channels = 1; channels <= 40; channels += 7) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .Test(xnn_f16_ibilinear_ukernel__fma3_c8); - } - } - } - - TEST(F16_IBILINEAR__FMA3_C8, input_offset) { - TEST_REQUIRES_X86_FMA3; - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .input_offset(43) - .Test(xnn_f16_ibilinear_ukernel__fma3_c8); - } - } - } - - TEST(F16_IBILINEAR__FMA3_C8, output_stride) { - TEST_REQUIRES_X86_FMA3; - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .output_stride(43) - .Test(xnn_f16_ibilinear_ukernel__fma3_c8); - } - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(F16_IBILINEAR__FMA3_C16, channels_eq_16) { - TEST_REQUIRES_X86_FMA3; - IBilinearMicrokernelTester() - .pixels(1) - .channels(16) - .Test(xnn_f16_ibilinear_ukernel__fma3_c16); - } - - TEST(F16_IBILINEAR__FMA3_C16, channels_div_16) { - TEST_REQUIRES_X86_FMA3; - for (size_t channels = 32; channels < 160; channels += 16) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_f16_ibilinear_ukernel__fma3_c16); - } - } - - TEST(F16_IBILINEAR__FMA3_C16, channels_lt_16) { - TEST_REQUIRES_X86_FMA3; - for (size_t channels = 1; channels < 16; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_f16_ibilinear_ukernel__fma3_c16); - } - } - - TEST(F16_IBILINEAR__FMA3_C16, channels_gt_16) { - TEST_REQUIRES_X86_FMA3; - for (size_t channels = 17; channels < 32; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_f16_ibilinear_ukernel__fma3_c16); - } - } - - TEST(F16_IBILINEAR__FMA3_C16, pixels_gt_1) { - TEST_REQUIRES_X86_FMA3; - for (size_t pixels = 2; pixels < 3; pixels++) { - for (size_t channels = 1; channels <= 80; channels += 15) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .Test(xnn_f16_ibilinear_ukernel__fma3_c16); - } - } - } - - TEST(F16_IBILINEAR__FMA3_C16, input_offset) { - TEST_REQUIRES_X86_FMA3; - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 80; channels += 15) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .input_offset(83) - .Test(xnn_f16_ibilinear_ukernel__fma3_c16); - } - } - } - - TEST(F16_IBILINEAR__FMA3_C16, output_stride) { - TEST_REQUIRES_X86_FMA3; - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 80; channels += 15) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .output_stride(83) - .Test(xnn_f16_ibilinear_ukernel__fma3_c16); - } - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 +#define XNN_UKERNEL_WITH_PARAMS(arch_flags, ukernel, channel_tile, pixel_tile, datatype, weight_type, params_type, init_params) XNN_TEST_IBILINEAR_CHANNELS_EQ(ukernel, arch_flags, channel_tile, pixel_tile, datatype, weight_type, params_type, init_params);\ +XNN_TEST_IBILINEAR_CHANNELS_DIV(ukernel, arch_flags, channel_tile, pixel_tile, datatype, weight_type, params_type, init_params); \ +XNN_TEST_IBILINEAR_CHANNELS_LT(ukernel, arch_flags, channel_tile, pixel_tile, datatype, weight_type, params_type, init_params); \ +XNN_TEST_IBILINEAR_CHANNELS_GT(ukernel, arch_flags, channel_tile, pixel_tile, datatype, weight_type, params_type, init_params); \ +XNN_TEST_IBILINEAR_PIXELS_DIV(ukernel, arch_flags, channel_tile, pixel_tile, datatype, weight_type, params_type, init_params); \ +XNN_TEST_IBILINEAR_PIXELS_LT(ukernel, arch_flags, channel_tile, pixel_tile, datatype, weight_type, params_type, init_params); \ +XNN_TEST_IBILINEAR_PIXELS_GT(ukernel, arch_flags, channel_tile, pixel_tile, datatype, weight_type, params_type, init_params); \ +XNN_TEST_IBILINEAR_INPUT_OFFSET(ukernel, arch_flags, channel_tile, pixel_tile, datatype, weight_type, params_type, init_params); \ +XNN_TEST_IBILINEAR_OUTPUT_STRIDE(ukernel, arch_flags, channel_tile, pixel_tile, datatype, weight_type, params_type, init_params); +#include "f16-ibilinear/f16-ibilinear.h" +#undef XNN_UKERNEL_WITH_PARAMS diff --git a/test/f16-ibilinear.yaml b/test/f16-ibilinear.yaml deleted file mode 100644 index 6ac94f7f3a1..00000000000 --- a/test/f16-ibilinear.yaml +++ /dev/null @@ -1,12 +0,0 @@ -# Copyright 2022 Google LLC -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -# ARM NEON+FP16ARITH -- name: xnn_f16_ibilinear_ukernel__neonfp16arith_c8 -- name: xnn_f16_ibilinear_ukernel__neonfp16arith_c16 - -# x86 FMA3 -- name: xnn_f16_ibilinear_ukernel__fma3_c8 -- name: xnn_f16_ibilinear_ukernel__fma3_c16 diff --git a/test/f32-ibilinear.cc b/test/f32-ibilinear.cc index 6fd7ee04286..d2ef2d240fc 100644 --- a/test/f32-ibilinear.cc +++ b/test/f32-ibilinear.cc @@ -4,7 +4,7 @@ // LICENSE file in the root directory of this source tree. // // Auto-generated file. Do not edit! -// Specification: test/f32-ibilinear.yaml +// Microkernel: f32-ibilinear // Generator: tools/generate-ibilinear-test.py @@ -13,954 +13,14 @@ #include "xnnpack/ibilinear.h" #include "xnnpack/isa-checks.h" #include "ibilinear-microkernel-tester.h" - - -TEST(F32_IBILINEAR__SCALAR_C1, channels_eq_1) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(1) - .Test(xnn_f32_ibilinear_ukernel__scalar_c1); -} - -TEST(F32_IBILINEAR__SCALAR_C1, channels_gt_1) { - for (size_t channels = 2; channels < 10; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_f32_ibilinear_ukernel__scalar_c1); - } -} - -TEST(F32_IBILINEAR__SCALAR_C1, pixels_gt_1) { - for (size_t pixels = 2; pixels < 3; pixels++) { - for (size_t channels = 1; channels <= 5; channels += 1) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .Test(xnn_f32_ibilinear_ukernel__scalar_c1); - } - } -} - -TEST(F32_IBILINEAR__SCALAR_C1, input_offset) { - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 5; channels += 1) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .input_offset(7) - .Test(xnn_f32_ibilinear_ukernel__scalar_c1); - } - } -} - -TEST(F32_IBILINEAR__SCALAR_C1, output_stride) { - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 5; channels += 1) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .output_stride(7) - .Test(xnn_f32_ibilinear_ukernel__scalar_c1); - } - } -} - -TEST(F32_IBILINEAR__SCALAR_C2, channels_eq_2) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(2) - .Test(xnn_f32_ibilinear_ukernel__scalar_c2); -} - -TEST(F32_IBILINEAR__SCALAR_C2, channels_div_2) { - for (size_t channels = 4; channels < 20; channels += 2) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_f32_ibilinear_ukernel__scalar_c2); - } -} - -TEST(F32_IBILINEAR__SCALAR_C2, channels_lt_2) { - for (size_t channels = 1; channels < 2; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_f32_ibilinear_ukernel__scalar_c2); - } -} - -TEST(F32_IBILINEAR__SCALAR_C2, channels_gt_2) { - for (size_t channels = 3; channels < 4; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_f32_ibilinear_ukernel__scalar_c2); - } -} - -TEST(F32_IBILINEAR__SCALAR_C2, pixels_gt_1) { - for (size_t pixels = 2; pixels < 3; pixels++) { - for (size_t channels = 1; channels <= 10; channels += 1) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .Test(xnn_f32_ibilinear_ukernel__scalar_c2); - } - } -} - -TEST(F32_IBILINEAR__SCALAR_C2, input_offset) { - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 10; channels += 1) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .input_offset(13) - .Test(xnn_f32_ibilinear_ukernel__scalar_c2); - } - } -} - -TEST(F32_IBILINEAR__SCALAR_C2, output_stride) { - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 10; channels += 1) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .output_stride(13) - .Test(xnn_f32_ibilinear_ukernel__scalar_c2); - } - } -} - -TEST(F32_IBILINEAR__SCALAR_C4, channels_eq_4) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(4) - .Test(xnn_f32_ibilinear_ukernel__scalar_c4); -} - -TEST(F32_IBILINEAR__SCALAR_C4, channels_div_4) { - for (size_t channels = 8; channels < 40; channels += 4) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_f32_ibilinear_ukernel__scalar_c4); - } -} - -TEST(F32_IBILINEAR__SCALAR_C4, channels_lt_4) { - for (size_t channels = 1; channels < 4; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_f32_ibilinear_ukernel__scalar_c4); - } -} - -TEST(F32_IBILINEAR__SCALAR_C4, channels_gt_4) { - for (size_t channels = 5; channels < 8; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_f32_ibilinear_ukernel__scalar_c4); - } -} - -TEST(F32_IBILINEAR__SCALAR_C4, pixels_gt_1) { - for (size_t pixels = 2; pixels < 3; pixels++) { - for (size_t channels = 1; channels <= 20; channels += 3) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .Test(xnn_f32_ibilinear_ukernel__scalar_c4); - } - } -} - -TEST(F32_IBILINEAR__SCALAR_C4, input_offset) { - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 20; channels += 3) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .input_offset(23) - .Test(xnn_f32_ibilinear_ukernel__scalar_c4); - } - } -} - -TEST(F32_IBILINEAR__SCALAR_C4, output_stride) { - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 20; channels += 3) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .output_stride(23) - .Test(xnn_f32_ibilinear_ukernel__scalar_c4); - } - } -} - -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 - TEST(F32_IBILINEAR__NEON_C4, channels_eq_4) { - TEST_REQUIRES_ARM_NEON; - IBilinearMicrokernelTester() - .pixels(1) - .channels(4) - .Test(xnn_f32_ibilinear_ukernel__neon_c4); - } - - TEST(F32_IBILINEAR__NEON_C4, channels_div_4) { - TEST_REQUIRES_ARM_NEON; - for (size_t channels = 8; channels < 40; channels += 4) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_f32_ibilinear_ukernel__neon_c4); - } - } - - TEST(F32_IBILINEAR__NEON_C4, channels_lt_4) { - TEST_REQUIRES_ARM_NEON; - for (size_t channels = 1; channels < 4; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_f32_ibilinear_ukernel__neon_c4); - } - } - - TEST(F32_IBILINEAR__NEON_C4, channels_gt_4) { - TEST_REQUIRES_ARM_NEON; - for (size_t channels = 5; channels < 8; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_f32_ibilinear_ukernel__neon_c4); - } - } - - TEST(F32_IBILINEAR__NEON_C4, pixels_gt_1) { - TEST_REQUIRES_ARM_NEON; - for (size_t pixels = 2; pixels < 3; pixels++) { - for (size_t channels = 1; channels <= 20; channels += 3) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .Test(xnn_f32_ibilinear_ukernel__neon_c4); - } - } - } - - TEST(F32_IBILINEAR__NEON_C4, input_offset) { - TEST_REQUIRES_ARM_NEON; - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 20; channels += 3) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .input_offset(23) - .Test(xnn_f32_ibilinear_ukernel__neon_c4); - } - } - } - - TEST(F32_IBILINEAR__NEON_C4, output_stride) { - TEST_REQUIRES_ARM_NEON; - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 20; channels += 3) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .output_stride(23) - .Test(xnn_f32_ibilinear_ukernel__neon_c4); - } - } - } -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 - - -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 - TEST(F32_IBILINEAR__NEON_C8, channels_eq_8) { - TEST_REQUIRES_ARM_NEON; - IBilinearMicrokernelTester() - .pixels(1) - .channels(8) - .Test(xnn_f32_ibilinear_ukernel__neon_c8); - } - - TEST(F32_IBILINEAR__NEON_C8, channels_div_8) { - TEST_REQUIRES_ARM_NEON; - for (size_t channels = 16; channels < 80; channels += 8) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_f32_ibilinear_ukernel__neon_c8); - } - } - - TEST(F32_IBILINEAR__NEON_C8, channels_lt_8) { - TEST_REQUIRES_ARM_NEON; - for (size_t channels = 1; channels < 8; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_f32_ibilinear_ukernel__neon_c8); - } - } - - TEST(F32_IBILINEAR__NEON_C8, channels_gt_8) { - TEST_REQUIRES_ARM_NEON; - for (size_t channels = 9; channels < 16; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_f32_ibilinear_ukernel__neon_c8); - } - } - - TEST(F32_IBILINEAR__NEON_C8, pixels_gt_1) { - TEST_REQUIRES_ARM_NEON; - for (size_t pixels = 2; pixels < 3; pixels++) { - for (size_t channels = 1; channels <= 40; channels += 7) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .Test(xnn_f32_ibilinear_ukernel__neon_c8); - } - } - } - - TEST(F32_IBILINEAR__NEON_C8, input_offset) { - TEST_REQUIRES_ARM_NEON; - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .input_offset(43) - .Test(xnn_f32_ibilinear_ukernel__neon_c8); - } - } - } - - TEST(F32_IBILINEAR__NEON_C8, output_stride) { - TEST_REQUIRES_ARM_NEON; - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .output_stride(43) - .Test(xnn_f32_ibilinear_ukernel__neon_c8); - } - } - } -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 - - -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 - TEST(F32_IBILINEAR__NEONFMA_C4, channels_eq_4) { - TEST_REQUIRES_ARM_NEON_FMA; - IBilinearMicrokernelTester() - .pixels(1) - .channels(4) - .Test(xnn_f32_ibilinear_ukernel__neonfma_c4); - } - - TEST(F32_IBILINEAR__NEONFMA_C4, channels_div_4) { - TEST_REQUIRES_ARM_NEON_FMA; - for (size_t channels = 8; channels < 40; channels += 4) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_f32_ibilinear_ukernel__neonfma_c4); - } - } - - TEST(F32_IBILINEAR__NEONFMA_C4, channels_lt_4) { - TEST_REQUIRES_ARM_NEON_FMA; - for (size_t channels = 1; channels < 4; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_f32_ibilinear_ukernel__neonfma_c4); - } - } - - TEST(F32_IBILINEAR__NEONFMA_C4, channels_gt_4) { - TEST_REQUIRES_ARM_NEON_FMA; - for (size_t channels = 5; channels < 8; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_f32_ibilinear_ukernel__neonfma_c4); - } - } - - TEST(F32_IBILINEAR__NEONFMA_C4, pixels_gt_1) { - TEST_REQUIRES_ARM_NEON_FMA; - for (size_t pixels = 2; pixels < 3; pixels++) { - for (size_t channels = 1; channels <= 20; channels += 3) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .Test(xnn_f32_ibilinear_ukernel__neonfma_c4); - } - } - } - - TEST(F32_IBILINEAR__NEONFMA_C4, input_offset) { - TEST_REQUIRES_ARM_NEON_FMA; - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 20; channels += 3) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .input_offset(23) - .Test(xnn_f32_ibilinear_ukernel__neonfma_c4); - } - } - } - - TEST(F32_IBILINEAR__NEONFMA_C4, output_stride) { - TEST_REQUIRES_ARM_NEON_FMA; - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 20; channels += 3) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .output_stride(23) - .Test(xnn_f32_ibilinear_ukernel__neonfma_c4); - } - } - } -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 - - -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 - TEST(F32_IBILINEAR__NEONFMA_C8, channels_eq_8) { - TEST_REQUIRES_ARM_NEON_FMA; - IBilinearMicrokernelTester() - .pixels(1) - .channels(8) - .Test(xnn_f32_ibilinear_ukernel__neonfma_c8); - } - - TEST(F32_IBILINEAR__NEONFMA_C8, channels_div_8) { - TEST_REQUIRES_ARM_NEON_FMA; - for (size_t channels = 16; channels < 80; channels += 8) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_f32_ibilinear_ukernel__neonfma_c8); - } - } - - TEST(F32_IBILINEAR__NEONFMA_C8, channels_lt_8) { - TEST_REQUIRES_ARM_NEON_FMA; - for (size_t channels = 1; channels < 8; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_f32_ibilinear_ukernel__neonfma_c8); - } - } - - TEST(F32_IBILINEAR__NEONFMA_C8, channels_gt_8) { - TEST_REQUIRES_ARM_NEON_FMA; - for (size_t channels = 9; channels < 16; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_f32_ibilinear_ukernel__neonfma_c8); - } - } - - TEST(F32_IBILINEAR__NEONFMA_C8, pixels_gt_1) { - TEST_REQUIRES_ARM_NEON_FMA; - for (size_t pixels = 2; pixels < 3; pixels++) { - for (size_t channels = 1; channels <= 40; channels += 7) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .Test(xnn_f32_ibilinear_ukernel__neonfma_c8); - } - } - } - - TEST(F32_IBILINEAR__NEONFMA_C8, input_offset) { - TEST_REQUIRES_ARM_NEON_FMA; - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .input_offset(43) - .Test(xnn_f32_ibilinear_ukernel__neonfma_c8); - } - } - } - - TEST(F32_IBILINEAR__NEONFMA_C8, output_stride) { - TEST_REQUIRES_ARM_NEON_FMA; - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .output_stride(43) - .Test(xnn_f32_ibilinear_ukernel__neonfma_c8); - } - } - } -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(F32_IBILINEAR__SSE_C4, channels_eq_4) { - TEST_REQUIRES_X86_SSE; - IBilinearMicrokernelTester() - .pixels(1) - .channels(4) - .Test(xnn_f32_ibilinear_ukernel__sse_c4); - } - - TEST(F32_IBILINEAR__SSE_C4, channels_div_4) { - TEST_REQUIRES_X86_SSE; - for (size_t channels = 8; channels < 40; channels += 4) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_f32_ibilinear_ukernel__sse_c4); - } - } - - TEST(F32_IBILINEAR__SSE_C4, channels_lt_4) { - TEST_REQUIRES_X86_SSE; - for (size_t channels = 1; channels < 4; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_f32_ibilinear_ukernel__sse_c4); - } - } - - TEST(F32_IBILINEAR__SSE_C4, channels_gt_4) { - TEST_REQUIRES_X86_SSE; - for (size_t channels = 5; channels < 8; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_f32_ibilinear_ukernel__sse_c4); - } - } - - TEST(F32_IBILINEAR__SSE_C4, pixels_gt_1) { - TEST_REQUIRES_X86_SSE; - for (size_t pixels = 2; pixels < 3; pixels++) { - for (size_t channels = 1; channels <= 20; channels += 3) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .Test(xnn_f32_ibilinear_ukernel__sse_c4); - } - } - } - - TEST(F32_IBILINEAR__SSE_C4, input_offset) { - TEST_REQUIRES_X86_SSE; - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 20; channels += 3) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .input_offset(23) - .Test(xnn_f32_ibilinear_ukernel__sse_c4); - } - } - } - - TEST(F32_IBILINEAR__SSE_C4, output_stride) { - TEST_REQUIRES_X86_SSE; - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 20; channels += 3) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .output_stride(23) - .Test(xnn_f32_ibilinear_ukernel__sse_c4); - } - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(F32_IBILINEAR__SSE_C8, channels_eq_8) { - TEST_REQUIRES_X86_SSE; - IBilinearMicrokernelTester() - .pixels(1) - .channels(8) - .Test(xnn_f32_ibilinear_ukernel__sse_c8); - } - - TEST(F32_IBILINEAR__SSE_C8, channels_div_8) { - TEST_REQUIRES_X86_SSE; - for (size_t channels = 16; channels < 80; channels += 8) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_f32_ibilinear_ukernel__sse_c8); - } - } - - TEST(F32_IBILINEAR__SSE_C8, channels_lt_8) { - TEST_REQUIRES_X86_SSE; - for (size_t channels = 1; channels < 8; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_f32_ibilinear_ukernel__sse_c8); - } - } - - TEST(F32_IBILINEAR__SSE_C8, channels_gt_8) { - TEST_REQUIRES_X86_SSE; - for (size_t channels = 9; channels < 16; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_f32_ibilinear_ukernel__sse_c8); - } - } - - TEST(F32_IBILINEAR__SSE_C8, pixels_gt_1) { - TEST_REQUIRES_X86_SSE; - for (size_t pixels = 2; pixels < 3; pixels++) { - for (size_t channels = 1; channels <= 40; channels += 7) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .Test(xnn_f32_ibilinear_ukernel__sse_c8); - } - } - } - - TEST(F32_IBILINEAR__SSE_C8, input_offset) { - TEST_REQUIRES_X86_SSE; - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .input_offset(43) - .Test(xnn_f32_ibilinear_ukernel__sse_c8); - } - } - } - - TEST(F32_IBILINEAR__SSE_C8, output_stride) { - TEST_REQUIRES_X86_SSE; - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .output_stride(43) - .Test(xnn_f32_ibilinear_ukernel__sse_c8); - } - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - TEST(F32_IBILINEAR__WASMSIMD_C4, channels_eq_4) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(4) - .Test(xnn_f32_ibilinear_ukernel__wasmsimd_c4); - } - - TEST(F32_IBILINEAR__WASMSIMD_C4, channels_div_4) { - for (size_t channels = 8; channels < 40; channels += 4) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_f32_ibilinear_ukernel__wasmsimd_c4); - } - } - - TEST(F32_IBILINEAR__WASMSIMD_C4, channels_lt_4) { - for (size_t channels = 1; channels < 4; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_f32_ibilinear_ukernel__wasmsimd_c4); - } - } - - TEST(F32_IBILINEAR__WASMSIMD_C4, channels_gt_4) { - for (size_t channels = 5; channels < 8; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_f32_ibilinear_ukernel__wasmsimd_c4); - } - } - - TEST(F32_IBILINEAR__WASMSIMD_C4, pixels_gt_1) { - for (size_t pixels = 2; pixels < 3; pixels++) { - for (size_t channels = 1; channels <= 20; channels += 3) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .Test(xnn_f32_ibilinear_ukernel__wasmsimd_c4); - } - } - } - - TEST(F32_IBILINEAR__WASMSIMD_C4, input_offset) { - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 20; channels += 3) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .input_offset(23) - .Test(xnn_f32_ibilinear_ukernel__wasmsimd_c4); - } - } - } - - TEST(F32_IBILINEAR__WASMSIMD_C4, output_stride) { - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 20; channels += 3) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .output_stride(23) - .Test(xnn_f32_ibilinear_ukernel__wasmsimd_c4); - } - } - } -#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - - -#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - TEST(F32_IBILINEAR__WASMSIMD_C8, channels_eq_8) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(8) - .Test(xnn_f32_ibilinear_ukernel__wasmsimd_c8); - } - - TEST(F32_IBILINEAR__WASMSIMD_C8, channels_div_8) { - for (size_t channels = 16; channels < 80; channels += 8) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_f32_ibilinear_ukernel__wasmsimd_c8); - } - } - - TEST(F32_IBILINEAR__WASMSIMD_C8, channels_lt_8) { - for (size_t channels = 1; channels < 8; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_f32_ibilinear_ukernel__wasmsimd_c8); - } - } - - TEST(F32_IBILINEAR__WASMSIMD_C8, channels_gt_8) { - for (size_t channels = 9; channels < 16; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_f32_ibilinear_ukernel__wasmsimd_c8); - } - } - - TEST(F32_IBILINEAR__WASMSIMD_C8, pixels_gt_1) { - for (size_t pixels = 2; pixels < 3; pixels++) { - for (size_t channels = 1; channels <= 40; channels += 7) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .Test(xnn_f32_ibilinear_ukernel__wasmsimd_c8); - } - } - } - - TEST(F32_IBILINEAR__WASMSIMD_C8, input_offset) { - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .input_offset(43) - .Test(xnn_f32_ibilinear_ukernel__wasmsimd_c8); - } - } - } - - TEST(F32_IBILINEAR__WASMSIMD_C8, output_stride) { - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .output_stride(43) - .Test(xnn_f32_ibilinear_ukernel__wasmsimd_c8); - } - } - } -#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - - -#if XNN_ARCH_WASMRELAXEDSIMD - TEST(F32_IBILINEAR__WASMRELAXEDSIMD_C4, channels_eq_4) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(4) - .Test(xnn_f32_ibilinear_ukernel__wasmrelaxedsimd_c4); - } - - TEST(F32_IBILINEAR__WASMRELAXEDSIMD_C4, channels_div_4) { - for (size_t channels = 8; channels < 40; channels += 4) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_f32_ibilinear_ukernel__wasmrelaxedsimd_c4); - } - } - - TEST(F32_IBILINEAR__WASMRELAXEDSIMD_C4, channels_lt_4) { - for (size_t channels = 1; channels < 4; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_f32_ibilinear_ukernel__wasmrelaxedsimd_c4); - } - } - - TEST(F32_IBILINEAR__WASMRELAXEDSIMD_C4, channels_gt_4) { - for (size_t channels = 5; channels < 8; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_f32_ibilinear_ukernel__wasmrelaxedsimd_c4); - } - } - - TEST(F32_IBILINEAR__WASMRELAXEDSIMD_C4, pixels_gt_1) { - for (size_t pixels = 2; pixels < 3; pixels++) { - for (size_t channels = 1; channels <= 20; channels += 3) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .Test(xnn_f32_ibilinear_ukernel__wasmrelaxedsimd_c4); - } - } - } - - TEST(F32_IBILINEAR__WASMRELAXEDSIMD_C4, input_offset) { - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 20; channels += 3) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .input_offset(23) - .Test(xnn_f32_ibilinear_ukernel__wasmrelaxedsimd_c4); - } - } - } - - TEST(F32_IBILINEAR__WASMRELAXEDSIMD_C4, output_stride) { - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 20; channels += 3) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .output_stride(23) - .Test(xnn_f32_ibilinear_ukernel__wasmrelaxedsimd_c4); - } - } - } -#endif // XNN_ARCH_WASMRELAXEDSIMD - - -#if XNN_ARCH_WASMRELAXEDSIMD - TEST(F32_IBILINEAR__WASMRELAXEDSIMD_C8, channels_eq_8) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(8) - .Test(xnn_f32_ibilinear_ukernel__wasmrelaxedsimd_c8); - } - - TEST(F32_IBILINEAR__WASMRELAXEDSIMD_C8, channels_div_8) { - for (size_t channels = 16; channels < 80; channels += 8) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_f32_ibilinear_ukernel__wasmrelaxedsimd_c8); - } - } - - TEST(F32_IBILINEAR__WASMRELAXEDSIMD_C8, channels_lt_8) { - for (size_t channels = 1; channels < 8; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_f32_ibilinear_ukernel__wasmrelaxedsimd_c8); - } - } - - TEST(F32_IBILINEAR__WASMRELAXEDSIMD_C8, channels_gt_8) { - for (size_t channels = 9; channels < 16; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_f32_ibilinear_ukernel__wasmrelaxedsimd_c8); - } - } - - TEST(F32_IBILINEAR__WASMRELAXEDSIMD_C8, pixels_gt_1) { - for (size_t pixels = 2; pixels < 3; pixels++) { - for (size_t channels = 1; channels <= 40; channels += 7) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .Test(xnn_f32_ibilinear_ukernel__wasmrelaxedsimd_c8); - } - } - } - - TEST(F32_IBILINEAR__WASMRELAXEDSIMD_C8, input_offset) { - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .input_offset(43) - .Test(xnn_f32_ibilinear_ukernel__wasmrelaxedsimd_c8); - } - } - } - - TEST(F32_IBILINEAR__WASMRELAXEDSIMD_C8, output_stride) { - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .output_stride(43) - .Test(xnn_f32_ibilinear_ukernel__wasmrelaxedsimd_c8); - } - } - } -#endif // XNN_ARCH_WASMRELAXEDSIMD +#define XNN_UKERNEL_WITH_PARAMS(arch_flags, ukernel, channel_tile, pixel_tile, datatype, weight_type, params_type, init_params) XNN_TEST_IBILINEAR_CHANNELS_EQ(ukernel, arch_flags, channel_tile, pixel_tile, datatype, weight_type, params_type, init_params);\ +XNN_TEST_IBILINEAR_CHANNELS_DIV(ukernel, arch_flags, channel_tile, pixel_tile, datatype, weight_type, params_type, init_params); \ +XNN_TEST_IBILINEAR_CHANNELS_LT(ukernel, arch_flags, channel_tile, pixel_tile, datatype, weight_type, params_type, init_params); \ +XNN_TEST_IBILINEAR_CHANNELS_GT(ukernel, arch_flags, channel_tile, pixel_tile, datatype, weight_type, params_type, init_params); \ +XNN_TEST_IBILINEAR_PIXELS_DIV(ukernel, arch_flags, channel_tile, pixel_tile, datatype, weight_type, params_type, init_params); \ +XNN_TEST_IBILINEAR_PIXELS_LT(ukernel, arch_flags, channel_tile, pixel_tile, datatype, weight_type, params_type, init_params); \ +XNN_TEST_IBILINEAR_PIXELS_GT(ukernel, arch_flags, channel_tile, pixel_tile, datatype, weight_type, params_type, init_params); \ +XNN_TEST_IBILINEAR_INPUT_OFFSET(ukernel, arch_flags, channel_tile, pixel_tile, datatype, weight_type, params_type, init_params); \ +XNN_TEST_IBILINEAR_OUTPUT_STRIDE(ukernel, arch_flags, channel_tile, pixel_tile, datatype, weight_type, params_type, init_params); +#include "f32-ibilinear/f32-ibilinear.h" +#undef XNN_UKERNEL_WITH_PARAMS diff --git a/test/f32-ibilinear.yaml b/test/f32-ibilinear.yaml deleted file mode 100644 index f2577e99be0..00000000000 --- a/test/f32-ibilinear.yaml +++ /dev/null @@ -1,23 +0,0 @@ -# Copyright 2019 Google LLC -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -# Scalar -- name: xnn_f32_ibilinear_ukernel__scalar_c1 -- name: xnn_f32_ibilinear_ukernel__scalar_c2 -- name: xnn_f32_ibilinear_ukernel__scalar_c4 -# ARM NEON -- name: xnn_f32_ibilinear_ukernel__neon_c4 -- name: xnn_f32_ibilinear_ukernel__neon_c8 -- name: xnn_f32_ibilinear_ukernel__neonfma_c4 -- name: xnn_f32_ibilinear_ukernel__neonfma_c8 -# x86 SSE -- name: xnn_f32_ibilinear_ukernel__sse_c4 -- name: xnn_f32_ibilinear_ukernel__sse_c8 -# WAsm SIMD -- name: xnn_f32_ibilinear_ukernel__wasmsimd_c4 -- name: xnn_f32_ibilinear_ukernel__wasmsimd_c8 -# WAsm Relaxed SIMD -- name: xnn_f32_ibilinear_ukernel__wasmrelaxedsimd_c4 -- name: xnn_f32_ibilinear_ukernel__wasmrelaxedsimd_c8 diff --git a/test/ibilinear-microkernel-tester.h b/test/ibilinear-microkernel-tester.h index f830e26279a..2a05abb3146 100644 --- a/test/ibilinear-microkernel-tester.h +++ b/test/ibilinear-microkernel-tester.h @@ -15,6 +15,7 @@ #include #include +#include "next_prime.h" #include "xnnpack.h" #include "xnnpack/math.h" #include "xnnpack/microfnptr.h" @@ -423,3 +424,109 @@ class IBilinearMicrokernelTester { uint32_t input_offset_{0}; size_t iterations_{3}; }; + +#define XNN_TEST_IBILINEAR_CHANNELS_EQ( \ + ukernel, arch_flags, channel_tile, pixel_tile, datatype, weight_type, params_type, init_params) \ + TEST(ukernel, channels_eq) \ + { \ + TEST_REQUIRES_ARCH_FLAGS(arch_flags); \ + IBilinearMicrokernelTester().pixels(pixel_tile).channels(channel_tile).Test(ukernel); \ + } + +#define XNN_TEST_IBILINEAR_CHANNELS_DIV( \ + ukernel, arch_flags, channel_tile, pixel_tile, datatype, weight_type, params_type, init_params) \ + TEST(ukernel, channels_div) \ + { \ + TEST_REQUIRES_ARCH_FLAGS(arch_flags); \ + for (size_t channels = channel_tile * 2; channels < channel_tile * 10; channels += channel_tile) { \ + IBilinearMicrokernelTester().pixels(pixel_tile).channels(channels).Test(ukernel); \ + } \ + } + +#define XNN_TEST_IBILINEAR_CHANNELS_LT( \ + ukernel, arch_flags, channel_tile, pixel_tile, datatype, weight_type, params_type, init_params) \ + TEST(ukernel, channels_lt) \ + { \ + TEST_REQUIRES_ARCH_FLAGS(arch_flags); \ + for (size_t channels = 1; channels < channel_tile; channels++) { \ + IBilinearMicrokernelTester().pixels(pixel_tile).channels(channels).Test(ukernel); \ + } \ + } + +#define XNN_TEST_IBILINEAR_CHANNELS_GT( \ + ukernel, arch_flags, channel_tile, pixel_tile, datatype, weight_type, params_type, init_params) \ + TEST(ukernel, channels_gt) \ + { \ + TEST_REQUIRES_ARCH_FLAGS(arch_flags); \ + for (size_t channels = channel_tile + 1; channels < ((channel_tile == 1) ? 10 : channel_tile * 2); channels++) { \ + IBilinearMicrokernelTester().pixels(pixel_tile).channels(channels).Test(ukernel); \ + } \ + } + +#define XNN_TEST_IBILINEAR_PIXELS_DIV( \ + ukernel, arch_flags, channel_tile, pixel_tile, datatype, weight_type, params_type, init_params) \ + TEST(ukernel, pixels_div) \ + { \ + TEST_REQUIRES_ARCH_FLAGS(arch_flags); \ + for (size_t pixels = pixel_tile * 2; pixels < channel_tile * 10; pixels += pixel_tile) { \ + for (size_t channels = 1; channels <= channel_tile * 5; channels += max(1, (channel_tile - 1))) { \ + IBilinearMicrokernelTester().pixels(pixels).channels(channels).Test(ukernel); \ + } \ + } \ + } + +#define XNN_TEST_IBILINEAR_PIXELS_LT( \ + ukernel, arch_flags, channel_tile, pixel_tile, datatype, weight_type, params_type, init_params) \ + TEST(ukernel, pixels_lt) \ + { \ + TEST_REQUIRES_ARCH_FLAGS(arch_flags); \ + for (size_t pixels = 1; pixels < pixel_tile; pixels++) { \ + for (size_t channels = 1; channels <= channel_tile * 5; channels += max(1, (channel_tile - 1))) { \ + IBilinearMicrokernelTester().pixels(pixels).channels(channels).Test(ukernel); \ + } \ + } \ + } + +#define XNN_TEST_IBILINEAR_PIXELS_GT( \ + ukernel, arch_flags, channel_tile, pixel_tile, datatype, weight_type, params_type, init_params) \ + TEST(ukernel, pixels_gt) \ + { \ + TEST_REQUIRES_ARCH_FLAGS(arch_flags); \ + for (size_t pixels = pixel_tile + 1; pixels < max((pixel_tile * 2), 3); pixels++) { \ + for (size_t channels = 1; channels <= channel_tile * 5; channels += max(1, (channel_tile - 1))) { \ + IBilinearMicrokernelTester().pixels(pixels).channels(channels).Test(ukernel); \ + } \ + } \ + } + +#define XNN_TEST_IBILINEAR_INPUT_OFFSET( \ + ukernel, arch_flags, channel_tile, pixel_tile, datatype, weight_type, params_type, init_params) \ + TEST(ukernel, input_offset) \ + { \ + TEST_REQUIRES_ARCH_FLAGS(arch_flags); \ + for (size_t pixels = 1; pixels < pixel_tile * 5; pixels += max(1, (pixel_tile - 1))) { \ + for (size_t channels = 1; channels <= channel_tile * 5; channels += max(1, (channel_tile - 1))) { \ + IBilinearMicrokernelTester() \ + .pixels(pixels) \ + .channels(channels) \ + .input_offset(xnnpack::NextPrime(channel_tile * 5 + 1)) \ + .Test(ukernel); \ + } \ + } \ + } + +#define XNN_TEST_IBILINEAR_OUTPUT_STRIDE( \ + ukernel, arch_flags, channel_tile, pixel_tile, datatype, weight_type, params_type, init_params) \ + TEST(ukernel, output_stride) \ + { \ + TEST_REQUIRES_ARCH_FLAGS(arch_flags); \ + for (size_t pixels = 1; pixels < pixel_tile * 5; pixels += max(1, (pixel_tile - 1))) { \ + for (size_t channels = 1; channels <= channel_tile * 5; channels += max(1, (channel_tile - 1))) { \ + IBilinearMicrokernelTester() \ + .pixels(pixels) \ + .channels(channels) \ + .output_stride(xnnpack::NextPrime(channel_tile * 5 + 1)) \ + .Test(ukernel); \ + } \ + } \ + } diff --git a/test/s8-ibilinear.cc b/test/s8-ibilinear.cc index 795e5d9f929..70140eb5e7f 100644 --- a/test/s8-ibilinear.cc +++ b/test/s8-ibilinear.cc @@ -4,7 +4,7 @@ // LICENSE file in the root directory of this source tree. // // Auto-generated file. Do not edit! -// Specification: test/s8-ibilinear.yaml +// Microkernel: s8-ibilinear // Generator: tools/generate-ibilinear-test.py @@ -13,954 +13,14 @@ #include "xnnpack/ibilinear.h" #include "xnnpack/isa-checks.h" #include "ibilinear-microkernel-tester.h" - - -TEST(S8_IBILINEAR__SCALAR_C1, channels_eq_1) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(1) - .Test(xnn_s8_ibilinear_ukernel__scalar_c1); -} - -TEST(S8_IBILINEAR__SCALAR_C1, channels_gt_1) { - for (size_t channels = 2; channels < 10; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_s8_ibilinear_ukernel__scalar_c1); - } -} - -TEST(S8_IBILINEAR__SCALAR_C1, pixels_gt_1) { - for (size_t pixels = 2; pixels < 3; pixels++) { - for (size_t channels = 1; channels <= 5; channels += 1) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .Test(xnn_s8_ibilinear_ukernel__scalar_c1); - } - } -} - -TEST(S8_IBILINEAR__SCALAR_C1, input_offset) { - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 5; channels += 1) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .input_offset(7) - .Test(xnn_s8_ibilinear_ukernel__scalar_c1); - } - } -} - -TEST(S8_IBILINEAR__SCALAR_C1, output_stride) { - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 5; channels += 1) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .output_stride(7) - .Test(xnn_s8_ibilinear_ukernel__scalar_c1); - } - } -} - -TEST(S8_IBILINEAR__SCALAR_C2, channels_eq_2) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(2) - .Test(xnn_s8_ibilinear_ukernel__scalar_c2); -} - -TEST(S8_IBILINEAR__SCALAR_C2, channels_div_2) { - for (size_t channels = 4; channels < 20; channels += 2) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_s8_ibilinear_ukernel__scalar_c2); - } -} - -TEST(S8_IBILINEAR__SCALAR_C2, channels_lt_2) { - for (size_t channels = 1; channels < 2; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_s8_ibilinear_ukernel__scalar_c2); - } -} - -TEST(S8_IBILINEAR__SCALAR_C2, channels_gt_2) { - for (size_t channels = 3; channels < 4; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_s8_ibilinear_ukernel__scalar_c2); - } -} - -TEST(S8_IBILINEAR__SCALAR_C2, pixels_gt_1) { - for (size_t pixels = 2; pixels < 3; pixels++) { - for (size_t channels = 1; channels <= 10; channels += 1) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .Test(xnn_s8_ibilinear_ukernel__scalar_c2); - } - } -} - -TEST(S8_IBILINEAR__SCALAR_C2, input_offset) { - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 10; channels += 1) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .input_offset(13) - .Test(xnn_s8_ibilinear_ukernel__scalar_c2); - } - } -} - -TEST(S8_IBILINEAR__SCALAR_C2, output_stride) { - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 10; channels += 1) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .output_stride(13) - .Test(xnn_s8_ibilinear_ukernel__scalar_c2); - } - } -} - -TEST(S8_IBILINEAR__SCALAR_C4, channels_eq_4) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(4) - .Test(xnn_s8_ibilinear_ukernel__scalar_c4); -} - -TEST(S8_IBILINEAR__SCALAR_C4, channels_div_4) { - for (size_t channels = 8; channels < 40; channels += 4) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_s8_ibilinear_ukernel__scalar_c4); - } -} - -TEST(S8_IBILINEAR__SCALAR_C4, channels_lt_4) { - for (size_t channels = 1; channels < 4; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_s8_ibilinear_ukernel__scalar_c4); - } -} - -TEST(S8_IBILINEAR__SCALAR_C4, channels_gt_4) { - for (size_t channels = 5; channels < 8; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_s8_ibilinear_ukernel__scalar_c4); - } -} - -TEST(S8_IBILINEAR__SCALAR_C4, pixels_gt_1) { - for (size_t pixels = 2; pixels < 3; pixels++) { - for (size_t channels = 1; channels <= 20; channels += 3) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .Test(xnn_s8_ibilinear_ukernel__scalar_c4); - } - } -} - -TEST(S8_IBILINEAR__SCALAR_C4, input_offset) { - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 20; channels += 3) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .input_offset(23) - .Test(xnn_s8_ibilinear_ukernel__scalar_c4); - } - } -} - -TEST(S8_IBILINEAR__SCALAR_C4, output_stride) { - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 20; channels += 3) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .output_stride(23) - .Test(xnn_s8_ibilinear_ukernel__scalar_c4); - } - } -} - -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 - TEST(S8_IBILINEAR__NEON_C8, channels_eq_8) { - TEST_REQUIRES_ARM_NEON; - IBilinearMicrokernelTester() - .pixels(1) - .channels(8) - .Test(xnn_s8_ibilinear_ukernel__neon_c8); - } - - TEST(S8_IBILINEAR__NEON_C8, channels_div_8) { - TEST_REQUIRES_ARM_NEON; - for (size_t channels = 16; channels < 80; channels += 8) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_s8_ibilinear_ukernel__neon_c8); - } - } - - TEST(S8_IBILINEAR__NEON_C8, channels_lt_8) { - TEST_REQUIRES_ARM_NEON; - for (size_t channels = 1; channels < 8; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_s8_ibilinear_ukernel__neon_c8); - } - } - - TEST(S8_IBILINEAR__NEON_C8, channels_gt_8) { - TEST_REQUIRES_ARM_NEON; - for (size_t channels = 9; channels < 16; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_s8_ibilinear_ukernel__neon_c8); - } - } - - TEST(S8_IBILINEAR__NEON_C8, pixels_gt_1) { - TEST_REQUIRES_ARM_NEON; - for (size_t pixels = 2; pixels < 3; pixels++) { - for (size_t channels = 1; channels <= 40; channels += 7) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .Test(xnn_s8_ibilinear_ukernel__neon_c8); - } - } - } - - TEST(S8_IBILINEAR__NEON_C8, input_offset) { - TEST_REQUIRES_ARM_NEON; - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .input_offset(43) - .Test(xnn_s8_ibilinear_ukernel__neon_c8); - } - } - } - - TEST(S8_IBILINEAR__NEON_C8, output_stride) { - TEST_REQUIRES_ARM_NEON; - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .output_stride(43) - .Test(xnn_s8_ibilinear_ukernel__neon_c8); - } - } - } -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 - - -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 - TEST(S8_IBILINEAR__NEON_C16, channels_eq_16) { - TEST_REQUIRES_ARM_NEON; - IBilinearMicrokernelTester() - .pixels(1) - .channels(16) - .Test(xnn_s8_ibilinear_ukernel__neon_c16); - } - - TEST(S8_IBILINEAR__NEON_C16, channels_div_16) { - TEST_REQUIRES_ARM_NEON; - for (size_t channels = 32; channels < 160; channels += 16) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_s8_ibilinear_ukernel__neon_c16); - } - } - - TEST(S8_IBILINEAR__NEON_C16, channels_lt_16) { - TEST_REQUIRES_ARM_NEON; - for (size_t channels = 1; channels < 16; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_s8_ibilinear_ukernel__neon_c16); - } - } - - TEST(S8_IBILINEAR__NEON_C16, channels_gt_16) { - TEST_REQUIRES_ARM_NEON; - for (size_t channels = 17; channels < 32; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_s8_ibilinear_ukernel__neon_c16); - } - } - - TEST(S8_IBILINEAR__NEON_C16, pixels_gt_1) { - TEST_REQUIRES_ARM_NEON; - for (size_t pixels = 2; pixels < 3; pixels++) { - for (size_t channels = 1; channels <= 80; channels += 15) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .Test(xnn_s8_ibilinear_ukernel__neon_c16); - } - } - } - - TEST(S8_IBILINEAR__NEON_C16, input_offset) { - TEST_REQUIRES_ARM_NEON; - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 80; channels += 15) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .input_offset(83) - .Test(xnn_s8_ibilinear_ukernel__neon_c16); - } - } - } - - TEST(S8_IBILINEAR__NEON_C16, output_stride) { - TEST_REQUIRES_ARM_NEON; - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 80; channels += 15) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .output_stride(83) - .Test(xnn_s8_ibilinear_ukernel__neon_c16); - } - } - } -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(S8_IBILINEAR__SSE2_C8, channels_eq_8) { - TEST_REQUIRES_X86_SSE2; - IBilinearMicrokernelTester() - .pixels(1) - .channels(8) - .Test(xnn_s8_ibilinear_ukernel__sse2_c8); - } - - TEST(S8_IBILINEAR__SSE2_C8, channels_div_8) { - TEST_REQUIRES_X86_SSE2; - for (size_t channels = 16; channels < 80; channels += 8) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_s8_ibilinear_ukernel__sse2_c8); - } - } - - TEST(S8_IBILINEAR__SSE2_C8, channels_lt_8) { - TEST_REQUIRES_X86_SSE2; - for (size_t channels = 1; channels < 8; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_s8_ibilinear_ukernel__sse2_c8); - } - } - - TEST(S8_IBILINEAR__SSE2_C8, channels_gt_8) { - TEST_REQUIRES_X86_SSE2; - for (size_t channels = 9; channels < 16; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_s8_ibilinear_ukernel__sse2_c8); - } - } - - TEST(S8_IBILINEAR__SSE2_C8, pixels_gt_1) { - TEST_REQUIRES_X86_SSE2; - for (size_t pixels = 2; pixels < 3; pixels++) { - for (size_t channels = 1; channels <= 40; channels += 7) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .Test(xnn_s8_ibilinear_ukernel__sse2_c8); - } - } - } - - TEST(S8_IBILINEAR__SSE2_C8, input_offset) { - TEST_REQUIRES_X86_SSE2; - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .input_offset(43) - .Test(xnn_s8_ibilinear_ukernel__sse2_c8); - } - } - } - - TEST(S8_IBILINEAR__SSE2_C8, output_stride) { - TEST_REQUIRES_X86_SSE2; - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .output_stride(43) - .Test(xnn_s8_ibilinear_ukernel__sse2_c8); - } - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(S8_IBILINEAR__SSE2_C16, channels_eq_16) { - TEST_REQUIRES_X86_SSE2; - IBilinearMicrokernelTester() - .pixels(1) - .channels(16) - .Test(xnn_s8_ibilinear_ukernel__sse2_c16); - } - - TEST(S8_IBILINEAR__SSE2_C16, channels_div_16) { - TEST_REQUIRES_X86_SSE2; - for (size_t channels = 32; channels < 160; channels += 16) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_s8_ibilinear_ukernel__sse2_c16); - } - } - - TEST(S8_IBILINEAR__SSE2_C16, channels_lt_16) { - TEST_REQUIRES_X86_SSE2; - for (size_t channels = 1; channels < 16; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_s8_ibilinear_ukernel__sse2_c16); - } - } - - TEST(S8_IBILINEAR__SSE2_C16, channels_gt_16) { - TEST_REQUIRES_X86_SSE2; - for (size_t channels = 17; channels < 32; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_s8_ibilinear_ukernel__sse2_c16); - } - } - - TEST(S8_IBILINEAR__SSE2_C16, pixels_gt_1) { - TEST_REQUIRES_X86_SSE2; - for (size_t pixels = 2; pixels < 3; pixels++) { - for (size_t channels = 1; channels <= 80; channels += 15) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .Test(xnn_s8_ibilinear_ukernel__sse2_c16); - } - } - } - - TEST(S8_IBILINEAR__SSE2_C16, input_offset) { - TEST_REQUIRES_X86_SSE2; - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 80; channels += 15) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .input_offset(83) - .Test(xnn_s8_ibilinear_ukernel__sse2_c16); - } - } - } - - TEST(S8_IBILINEAR__SSE2_C16, output_stride) { - TEST_REQUIRES_X86_SSE2; - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 80; channels += 15) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .output_stride(83) - .Test(xnn_s8_ibilinear_ukernel__sse2_c16); - } - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(S8_IBILINEAR__SSE41_C8, channels_eq_8) { - TEST_REQUIRES_X86_SSE41; - IBilinearMicrokernelTester() - .pixels(1) - .channels(8) - .Test(xnn_s8_ibilinear_ukernel__sse41_c8); - } - - TEST(S8_IBILINEAR__SSE41_C8, channels_div_8) { - TEST_REQUIRES_X86_SSE41; - for (size_t channels = 16; channels < 80; channels += 8) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_s8_ibilinear_ukernel__sse41_c8); - } - } - - TEST(S8_IBILINEAR__SSE41_C8, channels_lt_8) { - TEST_REQUIRES_X86_SSE41; - for (size_t channels = 1; channels < 8; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_s8_ibilinear_ukernel__sse41_c8); - } - } - - TEST(S8_IBILINEAR__SSE41_C8, channels_gt_8) { - TEST_REQUIRES_X86_SSE41; - for (size_t channels = 9; channels < 16; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_s8_ibilinear_ukernel__sse41_c8); - } - } - - TEST(S8_IBILINEAR__SSE41_C8, pixels_gt_1) { - TEST_REQUIRES_X86_SSE41; - for (size_t pixels = 2; pixels < 3; pixels++) { - for (size_t channels = 1; channels <= 40; channels += 7) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .Test(xnn_s8_ibilinear_ukernel__sse41_c8); - } - } - } - - TEST(S8_IBILINEAR__SSE41_C8, input_offset) { - TEST_REQUIRES_X86_SSE41; - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .input_offset(43) - .Test(xnn_s8_ibilinear_ukernel__sse41_c8); - } - } - } - - TEST(S8_IBILINEAR__SSE41_C8, output_stride) { - TEST_REQUIRES_X86_SSE41; - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .output_stride(43) - .Test(xnn_s8_ibilinear_ukernel__sse41_c8); - } - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(S8_IBILINEAR__SSE41_C16, channels_eq_16) { - TEST_REQUIRES_X86_SSE41; - IBilinearMicrokernelTester() - .pixels(1) - .channels(16) - .Test(xnn_s8_ibilinear_ukernel__sse41_c16); - } - - TEST(S8_IBILINEAR__SSE41_C16, channels_div_16) { - TEST_REQUIRES_X86_SSE41; - for (size_t channels = 32; channels < 160; channels += 16) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_s8_ibilinear_ukernel__sse41_c16); - } - } - - TEST(S8_IBILINEAR__SSE41_C16, channels_lt_16) { - TEST_REQUIRES_X86_SSE41; - for (size_t channels = 1; channels < 16; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_s8_ibilinear_ukernel__sse41_c16); - } - } - - TEST(S8_IBILINEAR__SSE41_C16, channels_gt_16) { - TEST_REQUIRES_X86_SSE41; - for (size_t channels = 17; channels < 32; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_s8_ibilinear_ukernel__sse41_c16); - } - } - - TEST(S8_IBILINEAR__SSE41_C16, pixels_gt_1) { - TEST_REQUIRES_X86_SSE41; - for (size_t pixels = 2; pixels < 3; pixels++) { - for (size_t channels = 1; channels <= 80; channels += 15) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .Test(xnn_s8_ibilinear_ukernel__sse41_c16); - } - } - } - - TEST(S8_IBILINEAR__SSE41_C16, input_offset) { - TEST_REQUIRES_X86_SSE41; - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 80; channels += 15) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .input_offset(83) - .Test(xnn_s8_ibilinear_ukernel__sse41_c16); - } - } - } - - TEST(S8_IBILINEAR__SSE41_C16, output_stride) { - TEST_REQUIRES_X86_SSE41; - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 80; channels += 15) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .output_stride(83) - .Test(xnn_s8_ibilinear_ukernel__sse41_c16); - } - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - TEST(S8_IBILINEAR__WASMSIMD_DOT16X2_C8, channels_eq_8) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(8) - .Test(xnn_s8_ibilinear_ukernel__wasmsimd_dot16x2_c8); - } - - TEST(S8_IBILINEAR__WASMSIMD_DOT16X2_C8, channels_div_8) { - for (size_t channels = 16; channels < 80; channels += 8) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_s8_ibilinear_ukernel__wasmsimd_dot16x2_c8); - } - } - - TEST(S8_IBILINEAR__WASMSIMD_DOT16X2_C8, channels_lt_8) { - for (size_t channels = 1; channels < 8; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_s8_ibilinear_ukernel__wasmsimd_dot16x2_c8); - } - } - - TEST(S8_IBILINEAR__WASMSIMD_DOT16X2_C8, channels_gt_8) { - for (size_t channels = 9; channels < 16; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_s8_ibilinear_ukernel__wasmsimd_dot16x2_c8); - } - } - - TEST(S8_IBILINEAR__WASMSIMD_DOT16X2_C8, pixels_gt_1) { - for (size_t pixels = 2; pixels < 3; pixels++) { - for (size_t channels = 1; channels <= 40; channels += 7) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .Test(xnn_s8_ibilinear_ukernel__wasmsimd_dot16x2_c8); - } - } - } - - TEST(S8_IBILINEAR__WASMSIMD_DOT16X2_C8, input_offset) { - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .input_offset(43) - .Test(xnn_s8_ibilinear_ukernel__wasmsimd_dot16x2_c8); - } - } - } - - TEST(S8_IBILINEAR__WASMSIMD_DOT16X2_C8, output_stride) { - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .output_stride(43) - .Test(xnn_s8_ibilinear_ukernel__wasmsimd_dot16x2_c8); - } - } - } -#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - - -#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - TEST(S8_IBILINEAR__WASMSIMD_DOT16X2_C16, channels_eq_16) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(16) - .Test(xnn_s8_ibilinear_ukernel__wasmsimd_dot16x2_c16); - } - - TEST(S8_IBILINEAR__WASMSIMD_DOT16X2_C16, channels_div_16) { - for (size_t channels = 32; channels < 160; channels += 16) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_s8_ibilinear_ukernel__wasmsimd_dot16x2_c16); - } - } - - TEST(S8_IBILINEAR__WASMSIMD_DOT16X2_C16, channels_lt_16) { - for (size_t channels = 1; channels < 16; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_s8_ibilinear_ukernel__wasmsimd_dot16x2_c16); - } - } - - TEST(S8_IBILINEAR__WASMSIMD_DOT16X2_C16, channels_gt_16) { - for (size_t channels = 17; channels < 32; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_s8_ibilinear_ukernel__wasmsimd_dot16x2_c16); - } - } - - TEST(S8_IBILINEAR__WASMSIMD_DOT16X2_C16, pixels_gt_1) { - for (size_t pixels = 2; pixels < 3; pixels++) { - for (size_t channels = 1; channels <= 80; channels += 15) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .Test(xnn_s8_ibilinear_ukernel__wasmsimd_dot16x2_c16); - } - } - } - - TEST(S8_IBILINEAR__WASMSIMD_DOT16X2_C16, input_offset) { - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 80; channels += 15) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .input_offset(83) - .Test(xnn_s8_ibilinear_ukernel__wasmsimd_dot16x2_c16); - } - } - } - - TEST(S8_IBILINEAR__WASMSIMD_DOT16X2_C16, output_stride) { - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 80; channels += 15) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .output_stride(83) - .Test(xnn_s8_ibilinear_ukernel__wasmsimd_dot16x2_c16); - } - } - } -#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - - -#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - TEST(S8_IBILINEAR__WASMSIMD_MUL32_C8, channels_eq_8) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(8) - .Test(xnn_s8_ibilinear_ukernel__wasmsimd_mul32_c8); - } - - TEST(S8_IBILINEAR__WASMSIMD_MUL32_C8, channels_div_8) { - for (size_t channels = 16; channels < 80; channels += 8) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_s8_ibilinear_ukernel__wasmsimd_mul32_c8); - } - } - - TEST(S8_IBILINEAR__WASMSIMD_MUL32_C8, channels_lt_8) { - for (size_t channels = 1; channels < 8; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_s8_ibilinear_ukernel__wasmsimd_mul32_c8); - } - } - - TEST(S8_IBILINEAR__WASMSIMD_MUL32_C8, channels_gt_8) { - for (size_t channels = 9; channels < 16; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_s8_ibilinear_ukernel__wasmsimd_mul32_c8); - } - } - - TEST(S8_IBILINEAR__WASMSIMD_MUL32_C8, pixels_gt_1) { - for (size_t pixels = 2; pixels < 3; pixels++) { - for (size_t channels = 1; channels <= 40; channels += 7) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .Test(xnn_s8_ibilinear_ukernel__wasmsimd_mul32_c8); - } - } - } - - TEST(S8_IBILINEAR__WASMSIMD_MUL32_C8, input_offset) { - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .input_offset(43) - .Test(xnn_s8_ibilinear_ukernel__wasmsimd_mul32_c8); - } - } - } - - TEST(S8_IBILINEAR__WASMSIMD_MUL32_C8, output_stride) { - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .output_stride(43) - .Test(xnn_s8_ibilinear_ukernel__wasmsimd_mul32_c8); - } - } - } -#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - - -#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - TEST(S8_IBILINEAR__WASMSIMD_MUL32_C16, channels_eq_16) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(16) - .Test(xnn_s8_ibilinear_ukernel__wasmsimd_mul32_c16); - } - - TEST(S8_IBILINEAR__WASMSIMD_MUL32_C16, channels_div_16) { - for (size_t channels = 32; channels < 160; channels += 16) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_s8_ibilinear_ukernel__wasmsimd_mul32_c16); - } - } - - TEST(S8_IBILINEAR__WASMSIMD_MUL32_C16, channels_lt_16) { - for (size_t channels = 1; channels < 16; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_s8_ibilinear_ukernel__wasmsimd_mul32_c16); - } - } - - TEST(S8_IBILINEAR__WASMSIMD_MUL32_C16, channels_gt_16) { - for (size_t channels = 17; channels < 32; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_s8_ibilinear_ukernel__wasmsimd_mul32_c16); - } - } - - TEST(S8_IBILINEAR__WASMSIMD_MUL32_C16, pixels_gt_1) { - for (size_t pixels = 2; pixels < 3; pixels++) { - for (size_t channels = 1; channels <= 80; channels += 15) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .Test(xnn_s8_ibilinear_ukernel__wasmsimd_mul32_c16); - } - } - } - - TEST(S8_IBILINEAR__WASMSIMD_MUL32_C16, input_offset) { - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 80; channels += 15) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .input_offset(83) - .Test(xnn_s8_ibilinear_ukernel__wasmsimd_mul32_c16); - } - } - } - - TEST(S8_IBILINEAR__WASMSIMD_MUL32_C16, output_stride) { - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 80; channels += 15) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .output_stride(83) - .Test(xnn_s8_ibilinear_ukernel__wasmsimd_mul32_c16); - } - } - } -#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD +#define XNN_UKERNEL_WITH_PARAMS(arch_flags, ukernel, channel_tile, pixel_tile, datatype, weight_type, params_type, init_params) XNN_TEST_IBILINEAR_CHANNELS_EQ(ukernel, arch_flags, channel_tile, pixel_tile, datatype, weight_type, params_type, init_params);\ +XNN_TEST_IBILINEAR_CHANNELS_DIV(ukernel, arch_flags, channel_tile, pixel_tile, datatype, weight_type, params_type, init_params); \ +XNN_TEST_IBILINEAR_CHANNELS_LT(ukernel, arch_flags, channel_tile, pixel_tile, datatype, weight_type, params_type, init_params); \ +XNN_TEST_IBILINEAR_CHANNELS_GT(ukernel, arch_flags, channel_tile, pixel_tile, datatype, weight_type, params_type, init_params); \ +XNN_TEST_IBILINEAR_PIXELS_DIV(ukernel, arch_flags, channel_tile, pixel_tile, datatype, weight_type, params_type, init_params); \ +XNN_TEST_IBILINEAR_PIXELS_LT(ukernel, arch_flags, channel_tile, pixel_tile, datatype, weight_type, params_type, init_params); \ +XNN_TEST_IBILINEAR_PIXELS_GT(ukernel, arch_flags, channel_tile, pixel_tile, datatype, weight_type, params_type, init_params); \ +XNN_TEST_IBILINEAR_INPUT_OFFSET(ukernel, arch_flags, channel_tile, pixel_tile, datatype, weight_type, params_type, init_params); \ +XNN_TEST_IBILINEAR_OUTPUT_STRIDE(ukernel, arch_flags, channel_tile, pixel_tile, datatype, weight_type, params_type, init_params); +#include "s8-ibilinear/s8-ibilinear.h" +#undef XNN_UKERNEL_WITH_PARAMS diff --git a/test/s8-ibilinear.yaml b/test/s8-ibilinear.yaml deleted file mode 100644 index 5ffd086f333..00000000000 --- a/test/s8-ibilinear.yaml +++ /dev/null @@ -1,22 +0,0 @@ -# Copyright 2021 Google LLC -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -# Scalar -- name: xnn_s8_ibilinear_ukernel__scalar_c1 -- name: xnn_s8_ibilinear_ukernel__scalar_c2 -- name: xnn_s8_ibilinear_ukernel__scalar_c4 -# ARM NEON -- name: xnn_s8_ibilinear_ukernel__neon_c8 -- name: xnn_s8_ibilinear_ukernel__neon_c16 -# x86 SSE -- name: xnn_s8_ibilinear_ukernel__sse2_c8 -- name: xnn_s8_ibilinear_ukernel__sse2_c16 -- name: xnn_s8_ibilinear_ukernel__sse41_c8 -- name: xnn_s8_ibilinear_ukernel__sse41_c16 -# WAsm SIMD -- name: xnn_s8_ibilinear_ukernel__wasmsimd_dot16x2_c8 -- name: xnn_s8_ibilinear_ukernel__wasmsimd_dot16x2_c16 -- name: xnn_s8_ibilinear_ukernel__wasmsimd_mul32_c8 -- name: xnn_s8_ibilinear_ukernel__wasmsimd_mul32_c16 diff --git a/test/u8-ibilinear.cc b/test/u8-ibilinear.cc index 44ea33e6f78..5bb995e27c1 100644 --- a/test/u8-ibilinear.cc +++ b/test/u8-ibilinear.cc @@ -4,7 +4,7 @@ // LICENSE file in the root directory of this source tree. // // Auto-generated file. Do not edit! -// Specification: test/u8-ibilinear.yaml +// Microkernel: u8-ibilinear // Generator: tools/generate-ibilinear-test.py @@ -13,954 +13,14 @@ #include "xnnpack/ibilinear.h" #include "xnnpack/isa-checks.h" #include "ibilinear-microkernel-tester.h" - - -TEST(U8_IBILINEAR__SCALAR_C1, channels_eq_1) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(1) - .Test(xnn_u8_ibilinear_ukernel__scalar_c1); -} - -TEST(U8_IBILINEAR__SCALAR_C1, channels_gt_1) { - for (size_t channels = 2; channels < 10; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_u8_ibilinear_ukernel__scalar_c1); - } -} - -TEST(U8_IBILINEAR__SCALAR_C1, pixels_gt_1) { - for (size_t pixels = 2; pixels < 3; pixels++) { - for (size_t channels = 1; channels <= 5; channels += 1) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .Test(xnn_u8_ibilinear_ukernel__scalar_c1); - } - } -} - -TEST(U8_IBILINEAR__SCALAR_C1, input_offset) { - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 5; channels += 1) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .input_offset(7) - .Test(xnn_u8_ibilinear_ukernel__scalar_c1); - } - } -} - -TEST(U8_IBILINEAR__SCALAR_C1, output_stride) { - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 5; channels += 1) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .output_stride(7) - .Test(xnn_u8_ibilinear_ukernel__scalar_c1); - } - } -} - -TEST(U8_IBILINEAR__SCALAR_C2, channels_eq_2) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(2) - .Test(xnn_u8_ibilinear_ukernel__scalar_c2); -} - -TEST(U8_IBILINEAR__SCALAR_C2, channels_div_2) { - for (size_t channels = 4; channels < 20; channels += 2) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_u8_ibilinear_ukernel__scalar_c2); - } -} - -TEST(U8_IBILINEAR__SCALAR_C2, channels_lt_2) { - for (size_t channels = 1; channels < 2; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_u8_ibilinear_ukernel__scalar_c2); - } -} - -TEST(U8_IBILINEAR__SCALAR_C2, channels_gt_2) { - for (size_t channels = 3; channels < 4; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_u8_ibilinear_ukernel__scalar_c2); - } -} - -TEST(U8_IBILINEAR__SCALAR_C2, pixels_gt_1) { - for (size_t pixels = 2; pixels < 3; pixels++) { - for (size_t channels = 1; channels <= 10; channels += 1) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .Test(xnn_u8_ibilinear_ukernel__scalar_c2); - } - } -} - -TEST(U8_IBILINEAR__SCALAR_C2, input_offset) { - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 10; channels += 1) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .input_offset(13) - .Test(xnn_u8_ibilinear_ukernel__scalar_c2); - } - } -} - -TEST(U8_IBILINEAR__SCALAR_C2, output_stride) { - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 10; channels += 1) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .output_stride(13) - .Test(xnn_u8_ibilinear_ukernel__scalar_c2); - } - } -} - -TEST(U8_IBILINEAR__SCALAR_C4, channels_eq_4) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(4) - .Test(xnn_u8_ibilinear_ukernel__scalar_c4); -} - -TEST(U8_IBILINEAR__SCALAR_C4, channels_div_4) { - for (size_t channels = 8; channels < 40; channels += 4) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_u8_ibilinear_ukernel__scalar_c4); - } -} - -TEST(U8_IBILINEAR__SCALAR_C4, channels_lt_4) { - for (size_t channels = 1; channels < 4; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_u8_ibilinear_ukernel__scalar_c4); - } -} - -TEST(U8_IBILINEAR__SCALAR_C4, channels_gt_4) { - for (size_t channels = 5; channels < 8; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_u8_ibilinear_ukernel__scalar_c4); - } -} - -TEST(U8_IBILINEAR__SCALAR_C4, pixels_gt_1) { - for (size_t pixels = 2; pixels < 3; pixels++) { - for (size_t channels = 1; channels <= 20; channels += 3) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .Test(xnn_u8_ibilinear_ukernel__scalar_c4); - } - } -} - -TEST(U8_IBILINEAR__SCALAR_C4, input_offset) { - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 20; channels += 3) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .input_offset(23) - .Test(xnn_u8_ibilinear_ukernel__scalar_c4); - } - } -} - -TEST(U8_IBILINEAR__SCALAR_C4, output_stride) { - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 20; channels += 3) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .output_stride(23) - .Test(xnn_u8_ibilinear_ukernel__scalar_c4); - } - } -} - -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 - TEST(U8_IBILINEAR__NEON_C8, channels_eq_8) { - TEST_REQUIRES_ARM_NEON; - IBilinearMicrokernelTester() - .pixels(1) - .channels(8) - .Test(xnn_u8_ibilinear_ukernel__neon_c8); - } - - TEST(U8_IBILINEAR__NEON_C8, channels_div_8) { - TEST_REQUIRES_ARM_NEON; - for (size_t channels = 16; channels < 80; channels += 8) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_u8_ibilinear_ukernel__neon_c8); - } - } - - TEST(U8_IBILINEAR__NEON_C8, channels_lt_8) { - TEST_REQUIRES_ARM_NEON; - for (size_t channels = 1; channels < 8; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_u8_ibilinear_ukernel__neon_c8); - } - } - - TEST(U8_IBILINEAR__NEON_C8, channels_gt_8) { - TEST_REQUIRES_ARM_NEON; - for (size_t channels = 9; channels < 16; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_u8_ibilinear_ukernel__neon_c8); - } - } - - TEST(U8_IBILINEAR__NEON_C8, pixels_gt_1) { - TEST_REQUIRES_ARM_NEON; - for (size_t pixels = 2; pixels < 3; pixels++) { - for (size_t channels = 1; channels <= 40; channels += 7) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .Test(xnn_u8_ibilinear_ukernel__neon_c8); - } - } - } - - TEST(U8_IBILINEAR__NEON_C8, input_offset) { - TEST_REQUIRES_ARM_NEON; - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .input_offset(43) - .Test(xnn_u8_ibilinear_ukernel__neon_c8); - } - } - } - - TEST(U8_IBILINEAR__NEON_C8, output_stride) { - TEST_REQUIRES_ARM_NEON; - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .output_stride(43) - .Test(xnn_u8_ibilinear_ukernel__neon_c8); - } - } - } -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 - - -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 - TEST(U8_IBILINEAR__NEON_C16, channels_eq_16) { - TEST_REQUIRES_ARM_NEON; - IBilinearMicrokernelTester() - .pixels(1) - .channels(16) - .Test(xnn_u8_ibilinear_ukernel__neon_c16); - } - - TEST(U8_IBILINEAR__NEON_C16, channels_div_16) { - TEST_REQUIRES_ARM_NEON; - for (size_t channels = 32; channels < 160; channels += 16) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_u8_ibilinear_ukernel__neon_c16); - } - } - - TEST(U8_IBILINEAR__NEON_C16, channels_lt_16) { - TEST_REQUIRES_ARM_NEON; - for (size_t channels = 1; channels < 16; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_u8_ibilinear_ukernel__neon_c16); - } - } - - TEST(U8_IBILINEAR__NEON_C16, channels_gt_16) { - TEST_REQUIRES_ARM_NEON; - for (size_t channels = 17; channels < 32; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_u8_ibilinear_ukernel__neon_c16); - } - } - - TEST(U8_IBILINEAR__NEON_C16, pixels_gt_1) { - TEST_REQUIRES_ARM_NEON; - for (size_t pixels = 2; pixels < 3; pixels++) { - for (size_t channels = 1; channels <= 80; channels += 15) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .Test(xnn_u8_ibilinear_ukernel__neon_c16); - } - } - } - - TEST(U8_IBILINEAR__NEON_C16, input_offset) { - TEST_REQUIRES_ARM_NEON; - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 80; channels += 15) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .input_offset(83) - .Test(xnn_u8_ibilinear_ukernel__neon_c16); - } - } - } - - TEST(U8_IBILINEAR__NEON_C16, output_stride) { - TEST_REQUIRES_ARM_NEON; - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 80; channels += 15) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .output_stride(83) - .Test(xnn_u8_ibilinear_ukernel__neon_c16); - } - } - } -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(U8_IBILINEAR__SSE2_C8, channels_eq_8) { - TEST_REQUIRES_X86_SSE2; - IBilinearMicrokernelTester() - .pixels(1) - .channels(8) - .Test(xnn_u8_ibilinear_ukernel__sse2_c8); - } - - TEST(U8_IBILINEAR__SSE2_C8, channels_div_8) { - TEST_REQUIRES_X86_SSE2; - for (size_t channels = 16; channels < 80; channels += 8) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_u8_ibilinear_ukernel__sse2_c8); - } - } - - TEST(U8_IBILINEAR__SSE2_C8, channels_lt_8) { - TEST_REQUIRES_X86_SSE2; - for (size_t channels = 1; channels < 8; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_u8_ibilinear_ukernel__sse2_c8); - } - } - - TEST(U8_IBILINEAR__SSE2_C8, channels_gt_8) { - TEST_REQUIRES_X86_SSE2; - for (size_t channels = 9; channels < 16; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_u8_ibilinear_ukernel__sse2_c8); - } - } - - TEST(U8_IBILINEAR__SSE2_C8, pixels_gt_1) { - TEST_REQUIRES_X86_SSE2; - for (size_t pixels = 2; pixels < 3; pixels++) { - for (size_t channels = 1; channels <= 40; channels += 7) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .Test(xnn_u8_ibilinear_ukernel__sse2_c8); - } - } - } - - TEST(U8_IBILINEAR__SSE2_C8, input_offset) { - TEST_REQUIRES_X86_SSE2; - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .input_offset(43) - .Test(xnn_u8_ibilinear_ukernel__sse2_c8); - } - } - } - - TEST(U8_IBILINEAR__SSE2_C8, output_stride) { - TEST_REQUIRES_X86_SSE2; - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .output_stride(43) - .Test(xnn_u8_ibilinear_ukernel__sse2_c8); - } - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(U8_IBILINEAR__SSE2_C16, channels_eq_16) { - TEST_REQUIRES_X86_SSE2; - IBilinearMicrokernelTester() - .pixels(1) - .channels(16) - .Test(xnn_u8_ibilinear_ukernel__sse2_c16); - } - - TEST(U8_IBILINEAR__SSE2_C16, channels_div_16) { - TEST_REQUIRES_X86_SSE2; - for (size_t channels = 32; channels < 160; channels += 16) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_u8_ibilinear_ukernel__sse2_c16); - } - } - - TEST(U8_IBILINEAR__SSE2_C16, channels_lt_16) { - TEST_REQUIRES_X86_SSE2; - for (size_t channels = 1; channels < 16; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_u8_ibilinear_ukernel__sse2_c16); - } - } - - TEST(U8_IBILINEAR__SSE2_C16, channels_gt_16) { - TEST_REQUIRES_X86_SSE2; - for (size_t channels = 17; channels < 32; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_u8_ibilinear_ukernel__sse2_c16); - } - } - - TEST(U8_IBILINEAR__SSE2_C16, pixels_gt_1) { - TEST_REQUIRES_X86_SSE2; - for (size_t pixels = 2; pixels < 3; pixels++) { - for (size_t channels = 1; channels <= 80; channels += 15) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .Test(xnn_u8_ibilinear_ukernel__sse2_c16); - } - } - } - - TEST(U8_IBILINEAR__SSE2_C16, input_offset) { - TEST_REQUIRES_X86_SSE2; - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 80; channels += 15) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .input_offset(83) - .Test(xnn_u8_ibilinear_ukernel__sse2_c16); - } - } - } - - TEST(U8_IBILINEAR__SSE2_C16, output_stride) { - TEST_REQUIRES_X86_SSE2; - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 80; channels += 15) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .output_stride(83) - .Test(xnn_u8_ibilinear_ukernel__sse2_c16); - } - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(U8_IBILINEAR__SSE41_C8, channels_eq_8) { - TEST_REQUIRES_X86_SSE41; - IBilinearMicrokernelTester() - .pixels(1) - .channels(8) - .Test(xnn_u8_ibilinear_ukernel__sse41_c8); - } - - TEST(U8_IBILINEAR__SSE41_C8, channels_div_8) { - TEST_REQUIRES_X86_SSE41; - for (size_t channels = 16; channels < 80; channels += 8) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_u8_ibilinear_ukernel__sse41_c8); - } - } - - TEST(U8_IBILINEAR__SSE41_C8, channels_lt_8) { - TEST_REQUIRES_X86_SSE41; - for (size_t channels = 1; channels < 8; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_u8_ibilinear_ukernel__sse41_c8); - } - } - - TEST(U8_IBILINEAR__SSE41_C8, channels_gt_8) { - TEST_REQUIRES_X86_SSE41; - for (size_t channels = 9; channels < 16; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_u8_ibilinear_ukernel__sse41_c8); - } - } - - TEST(U8_IBILINEAR__SSE41_C8, pixels_gt_1) { - TEST_REQUIRES_X86_SSE41; - for (size_t pixels = 2; pixels < 3; pixels++) { - for (size_t channels = 1; channels <= 40; channels += 7) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .Test(xnn_u8_ibilinear_ukernel__sse41_c8); - } - } - } - - TEST(U8_IBILINEAR__SSE41_C8, input_offset) { - TEST_REQUIRES_X86_SSE41; - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .input_offset(43) - .Test(xnn_u8_ibilinear_ukernel__sse41_c8); - } - } - } - - TEST(U8_IBILINEAR__SSE41_C8, output_stride) { - TEST_REQUIRES_X86_SSE41; - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .output_stride(43) - .Test(xnn_u8_ibilinear_ukernel__sse41_c8); - } - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(U8_IBILINEAR__SSE41_C16, channels_eq_16) { - TEST_REQUIRES_X86_SSE41; - IBilinearMicrokernelTester() - .pixels(1) - .channels(16) - .Test(xnn_u8_ibilinear_ukernel__sse41_c16); - } - - TEST(U8_IBILINEAR__SSE41_C16, channels_div_16) { - TEST_REQUIRES_X86_SSE41; - for (size_t channels = 32; channels < 160; channels += 16) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_u8_ibilinear_ukernel__sse41_c16); - } - } - - TEST(U8_IBILINEAR__SSE41_C16, channels_lt_16) { - TEST_REQUIRES_X86_SSE41; - for (size_t channels = 1; channels < 16; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_u8_ibilinear_ukernel__sse41_c16); - } - } - - TEST(U8_IBILINEAR__SSE41_C16, channels_gt_16) { - TEST_REQUIRES_X86_SSE41; - for (size_t channels = 17; channels < 32; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_u8_ibilinear_ukernel__sse41_c16); - } - } - - TEST(U8_IBILINEAR__SSE41_C16, pixels_gt_1) { - TEST_REQUIRES_X86_SSE41; - for (size_t pixels = 2; pixels < 3; pixels++) { - for (size_t channels = 1; channels <= 80; channels += 15) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .Test(xnn_u8_ibilinear_ukernel__sse41_c16); - } - } - } - - TEST(U8_IBILINEAR__SSE41_C16, input_offset) { - TEST_REQUIRES_X86_SSE41; - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 80; channels += 15) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .input_offset(83) - .Test(xnn_u8_ibilinear_ukernel__sse41_c16); - } - } - } - - TEST(U8_IBILINEAR__SSE41_C16, output_stride) { - TEST_REQUIRES_X86_SSE41; - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 80; channels += 15) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .output_stride(83) - .Test(xnn_u8_ibilinear_ukernel__sse41_c16); - } - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - TEST(U8_IBILINEAR__WASMSIMD_DOT16X2_C8, channels_eq_8) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(8) - .Test(xnn_u8_ibilinear_ukernel__wasmsimd_dot16x2_c8); - } - - TEST(U8_IBILINEAR__WASMSIMD_DOT16X2_C8, channels_div_8) { - for (size_t channels = 16; channels < 80; channels += 8) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_u8_ibilinear_ukernel__wasmsimd_dot16x2_c8); - } - } - - TEST(U8_IBILINEAR__WASMSIMD_DOT16X2_C8, channels_lt_8) { - for (size_t channels = 1; channels < 8; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_u8_ibilinear_ukernel__wasmsimd_dot16x2_c8); - } - } - - TEST(U8_IBILINEAR__WASMSIMD_DOT16X2_C8, channels_gt_8) { - for (size_t channels = 9; channels < 16; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_u8_ibilinear_ukernel__wasmsimd_dot16x2_c8); - } - } - - TEST(U8_IBILINEAR__WASMSIMD_DOT16X2_C8, pixels_gt_1) { - for (size_t pixels = 2; pixels < 3; pixels++) { - for (size_t channels = 1; channels <= 40; channels += 7) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .Test(xnn_u8_ibilinear_ukernel__wasmsimd_dot16x2_c8); - } - } - } - - TEST(U8_IBILINEAR__WASMSIMD_DOT16X2_C8, input_offset) { - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .input_offset(43) - .Test(xnn_u8_ibilinear_ukernel__wasmsimd_dot16x2_c8); - } - } - } - - TEST(U8_IBILINEAR__WASMSIMD_DOT16X2_C8, output_stride) { - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .output_stride(43) - .Test(xnn_u8_ibilinear_ukernel__wasmsimd_dot16x2_c8); - } - } - } -#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - - -#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - TEST(U8_IBILINEAR__WASMSIMD_DOT16X2_C16, channels_eq_16) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(16) - .Test(xnn_u8_ibilinear_ukernel__wasmsimd_dot16x2_c16); - } - - TEST(U8_IBILINEAR__WASMSIMD_DOT16X2_C16, channels_div_16) { - for (size_t channels = 32; channels < 160; channels += 16) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_u8_ibilinear_ukernel__wasmsimd_dot16x2_c16); - } - } - - TEST(U8_IBILINEAR__WASMSIMD_DOT16X2_C16, channels_lt_16) { - for (size_t channels = 1; channels < 16; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_u8_ibilinear_ukernel__wasmsimd_dot16x2_c16); - } - } - - TEST(U8_IBILINEAR__WASMSIMD_DOT16X2_C16, channels_gt_16) { - for (size_t channels = 17; channels < 32; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_u8_ibilinear_ukernel__wasmsimd_dot16x2_c16); - } - } - - TEST(U8_IBILINEAR__WASMSIMD_DOT16X2_C16, pixels_gt_1) { - for (size_t pixels = 2; pixels < 3; pixels++) { - for (size_t channels = 1; channels <= 80; channels += 15) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .Test(xnn_u8_ibilinear_ukernel__wasmsimd_dot16x2_c16); - } - } - } - - TEST(U8_IBILINEAR__WASMSIMD_DOT16X2_C16, input_offset) { - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 80; channels += 15) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .input_offset(83) - .Test(xnn_u8_ibilinear_ukernel__wasmsimd_dot16x2_c16); - } - } - } - - TEST(U8_IBILINEAR__WASMSIMD_DOT16X2_C16, output_stride) { - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 80; channels += 15) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .output_stride(83) - .Test(xnn_u8_ibilinear_ukernel__wasmsimd_dot16x2_c16); - } - } - } -#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - - -#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - TEST(U8_IBILINEAR__WASMSIMD_MUL32_C8, channels_eq_8) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(8) - .Test(xnn_u8_ibilinear_ukernel__wasmsimd_mul32_c8); - } - - TEST(U8_IBILINEAR__WASMSIMD_MUL32_C8, channels_div_8) { - for (size_t channels = 16; channels < 80; channels += 8) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_u8_ibilinear_ukernel__wasmsimd_mul32_c8); - } - } - - TEST(U8_IBILINEAR__WASMSIMD_MUL32_C8, channels_lt_8) { - for (size_t channels = 1; channels < 8; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_u8_ibilinear_ukernel__wasmsimd_mul32_c8); - } - } - - TEST(U8_IBILINEAR__WASMSIMD_MUL32_C8, channels_gt_8) { - for (size_t channels = 9; channels < 16; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_u8_ibilinear_ukernel__wasmsimd_mul32_c8); - } - } - - TEST(U8_IBILINEAR__WASMSIMD_MUL32_C8, pixels_gt_1) { - for (size_t pixels = 2; pixels < 3; pixels++) { - for (size_t channels = 1; channels <= 40; channels += 7) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .Test(xnn_u8_ibilinear_ukernel__wasmsimd_mul32_c8); - } - } - } - - TEST(U8_IBILINEAR__WASMSIMD_MUL32_C8, input_offset) { - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .input_offset(43) - .Test(xnn_u8_ibilinear_ukernel__wasmsimd_mul32_c8); - } - } - } - - TEST(U8_IBILINEAR__WASMSIMD_MUL32_C8, output_stride) { - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .output_stride(43) - .Test(xnn_u8_ibilinear_ukernel__wasmsimd_mul32_c8); - } - } - } -#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - - -#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - TEST(U8_IBILINEAR__WASMSIMD_MUL32_C16, channels_eq_16) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(16) - .Test(xnn_u8_ibilinear_ukernel__wasmsimd_mul32_c16); - } - - TEST(U8_IBILINEAR__WASMSIMD_MUL32_C16, channels_div_16) { - for (size_t channels = 32; channels < 160; channels += 16) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_u8_ibilinear_ukernel__wasmsimd_mul32_c16); - } - } - - TEST(U8_IBILINEAR__WASMSIMD_MUL32_C16, channels_lt_16) { - for (size_t channels = 1; channels < 16; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_u8_ibilinear_ukernel__wasmsimd_mul32_c16); - } - } - - TEST(U8_IBILINEAR__WASMSIMD_MUL32_C16, channels_gt_16) { - for (size_t channels = 17; channels < 32; channels++) { - IBilinearMicrokernelTester() - .pixels(1) - .channels(channels) - .Test(xnn_u8_ibilinear_ukernel__wasmsimd_mul32_c16); - } - } - - TEST(U8_IBILINEAR__WASMSIMD_MUL32_C16, pixels_gt_1) { - for (size_t pixels = 2; pixels < 3; pixels++) { - for (size_t channels = 1; channels <= 80; channels += 15) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .Test(xnn_u8_ibilinear_ukernel__wasmsimd_mul32_c16); - } - } - } - - TEST(U8_IBILINEAR__WASMSIMD_MUL32_C16, input_offset) { - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 80; channels += 15) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .input_offset(83) - .Test(xnn_u8_ibilinear_ukernel__wasmsimd_mul32_c16); - } - } - } - - TEST(U8_IBILINEAR__WASMSIMD_MUL32_C16, output_stride) { - for (size_t pixels = 1; pixels < 5; pixels += 1) { - for (size_t channels = 1; channels <= 80; channels += 15) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .output_stride(83) - .Test(xnn_u8_ibilinear_ukernel__wasmsimd_mul32_c16); - } - } - } -#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD +#define XNN_UKERNEL_WITH_PARAMS(arch_flags, ukernel, channel_tile, pixel_tile, datatype, weight_type, params_type, init_params) XNN_TEST_IBILINEAR_CHANNELS_EQ(ukernel, arch_flags, channel_tile, pixel_tile, datatype, weight_type, params_type, init_params);\ +XNN_TEST_IBILINEAR_CHANNELS_DIV(ukernel, arch_flags, channel_tile, pixel_tile, datatype, weight_type, params_type, init_params); \ +XNN_TEST_IBILINEAR_CHANNELS_LT(ukernel, arch_flags, channel_tile, pixel_tile, datatype, weight_type, params_type, init_params); \ +XNN_TEST_IBILINEAR_CHANNELS_GT(ukernel, arch_flags, channel_tile, pixel_tile, datatype, weight_type, params_type, init_params); \ +XNN_TEST_IBILINEAR_PIXELS_DIV(ukernel, arch_flags, channel_tile, pixel_tile, datatype, weight_type, params_type, init_params); \ +XNN_TEST_IBILINEAR_PIXELS_LT(ukernel, arch_flags, channel_tile, pixel_tile, datatype, weight_type, params_type, init_params); \ +XNN_TEST_IBILINEAR_PIXELS_GT(ukernel, arch_flags, channel_tile, pixel_tile, datatype, weight_type, params_type, init_params); \ +XNN_TEST_IBILINEAR_INPUT_OFFSET(ukernel, arch_flags, channel_tile, pixel_tile, datatype, weight_type, params_type, init_params); \ +XNN_TEST_IBILINEAR_OUTPUT_STRIDE(ukernel, arch_flags, channel_tile, pixel_tile, datatype, weight_type, params_type, init_params); +#include "u8-ibilinear/u8-ibilinear.h" +#undef XNN_UKERNEL_WITH_PARAMS diff --git a/test/u8-ibilinear.yaml b/test/u8-ibilinear.yaml deleted file mode 100644 index 5f986448e11..00000000000 --- a/test/u8-ibilinear.yaml +++ /dev/null @@ -1,22 +0,0 @@ -# Copyright 2021 Google LLC -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -# Scalar -- name: xnn_u8_ibilinear_ukernel__scalar_c1 -- name: xnn_u8_ibilinear_ukernel__scalar_c2 -- name: xnn_u8_ibilinear_ukernel__scalar_c4 -# ARM NEON -- name: xnn_u8_ibilinear_ukernel__neon_c8 -- name: xnn_u8_ibilinear_ukernel__neon_c16 -# x86 SSE -- name: xnn_u8_ibilinear_ukernel__sse2_c8 -- name: xnn_u8_ibilinear_ukernel__sse2_c16 -- name: xnn_u8_ibilinear_ukernel__sse41_c8 -- name: xnn_u8_ibilinear_ukernel__sse41_c16 -# WAsm SIMD -- name: xnn_u8_ibilinear_ukernel__wasmsimd_dot16x2_c8 -- name: xnn_u8_ibilinear_ukernel__wasmsimd_dot16x2_c16 -- name: xnn_u8_ibilinear_ukernel__wasmsimd_mul32_c8 -- name: xnn_u8_ibilinear_ukernel__wasmsimd_mul32_c16 diff --git a/tools/generate-ibilinear-test.py b/tools/generate-ibilinear-test.py index 6403f0f2c26..bb5100baa6c 100755 --- a/tools/generate-ibilinear-test.py +++ b/tools/generate-ibilinear-test.py @@ -19,183 +19,45 @@ parser = argparse.ArgumentParser( description='IBILINEAR microkernel test generator') -parser.add_argument("-s", "--spec", metavar="FILE", required=True, - help="Specification (YAML) file") +parser.add_argument("-t", "--tester", metavar="TESTER", required=True, + choices=["IBilinearMicrokernelTester"], + help="Tester class to be used in the generated test") +parser.add_argument("-k", "--ukernel", metavar="FILE", required=True, + help="Microkernel type") parser.add_argument("-o", "--output", metavar="FILE", required=True, help='Output (C++ source) file') parser.set_defaults(defines=list()) -def split_ukernel_name(name): - match = re.fullmatch(r"xnn_(f16|f32|s8|u8)_ibilinear_ukernel__(.+)_c(\d+)", name) - assert match is not None - channel_tile = int(match.group(3)) - pixel_tile = 1 - - arch, isa, assembly = xnncommon.parse_target_name(target_name=match.group(2)) - return channel_tile, pixel_tile, arch, isa - - IBILINEAR_TEST_TEMPLATE = """\ -TEST(${TEST_NAME}, channels_eq_${CHANNEL_TILE}) { - $if ISA_CHECK: - ${ISA_CHECK}; - IBilinearMicrokernelTester() - .pixels(${PIXEL_TILE}) - .channels(${CHANNEL_TILE}) - .Test(${TEST_FUNC}); -} - -$if CHANNEL_TILE > 1: - TEST(${TEST_NAME}, channels_div_${CHANNEL_TILE}) { - $if ISA_CHECK: - ${ISA_CHECK}; - for (size_t channels = ${CHANNEL_TILE*2}; channels < ${CHANNEL_TILE*10}; channels += ${CHANNEL_TILE}) { - IBilinearMicrokernelTester() - .pixels(${PIXEL_TILE}) - .channels(channels) - .Test(${TEST_FUNC}); - } - } - - TEST(${TEST_NAME}, channels_lt_${CHANNEL_TILE}) { - $if ISA_CHECK: - ${ISA_CHECK}; - for (size_t channels = 1; channels < ${CHANNEL_TILE}; channels++) { - IBilinearMicrokernelTester() - .pixels(${PIXEL_TILE}) - .channels(channels) - .Test(${TEST_FUNC}); - } - } - -TEST(${TEST_NAME}, channels_gt_${CHANNEL_TILE}) { - $if ISA_CHECK: - ${ISA_CHECK}; - for (size_t channels = ${CHANNEL_TILE+1}; channels < ${10 if CHANNEL_TILE == 1 else CHANNEL_TILE*2}; channels++) { - IBilinearMicrokernelTester() - .pixels(${PIXEL_TILE}) - .channels(channels) - .Test(${TEST_FUNC}); - } -} - -$if PIXEL_TILE > 1: - TEST(${TEST_NAME}, pixels_div_${PIXEL_TILE}) { - $if ISA_CHECK: - ${ISA_CHECK}; - for (size_t pixels = ${PIXEL_TILE*2}; pixels < ${PIXEL_TILE*10}; pixels += ${PIXEL_TILE}) { - for (size_t channels = 1; channels <= ${CHANNEL_TILE * 5}; channels += ${max(1, CHANNEL_TILE - 1)}) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .Test(${TEST_FUNC}); - } - } - } - - TEST(${TEST_NAME}, pixels_lt_${PIXEL_TILE}) { - $if ISA_CHECK: - ${ISA_CHECK}; - for (size_t pixels = 1; pixels < ${PIXEL_TILE}; pixels++) { - for (size_t channels = 1; channels <= ${CHANNEL_TILE * 5}; channels += ${max(1, CHANNEL_TILE - 1)}) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .Test(${TEST_FUNC}); - } - } - } - -TEST(${TEST_NAME}, pixels_gt_${PIXEL_TILE}) { - $if ISA_CHECK: - ${ISA_CHECK}; - for (size_t pixels = ${PIXEL_TILE+1}; pixels < ${max(PIXEL_TILE*2, 3)}; pixels++) { - for (size_t channels = 1; channels <= ${CHANNEL_TILE * 5}; channels += ${max(1, CHANNEL_TILE - 1)}) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .Test(${TEST_FUNC}); - } - } -} - -TEST(${TEST_NAME}, input_offset) { - $if ISA_CHECK: - ${ISA_CHECK}; - for (size_t pixels = 1; pixels < ${PIXEL_TILE * 5}; pixels += ${max(1, PIXEL_TILE - 1)}) { - for (size_t channels = 1; channels <= ${CHANNEL_TILE * 5}; channels += ${max(1, CHANNEL_TILE - 1)}) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .input_offset(${next_prime(CHANNEL_TILE * 5 + 1)}) - .Test(${TEST_FUNC}); - } - } -} - -TEST(${TEST_NAME}, output_stride) { - $if ISA_CHECK: - ${ISA_CHECK}; - for (size_t pixels = 1; pixels < ${PIXEL_TILE * 5}; pixels += ${max(1, PIXEL_TILE - 1)}) { - for (size_t channels = 1; channels <= ${CHANNEL_TILE * 5}; channels += ${max(1, CHANNEL_TILE - 1)}) { - IBilinearMicrokernelTester() - .pixels(pixels) - .channels(channels) - .output_stride(${next_prime(CHANNEL_TILE * 5 + 1)}) - .Test(${TEST_FUNC}); - } - } -} +#define XNN_UKERNEL_WITH_PARAMS(arch_flags, ukernel, channel_tile, pixel_tile, datatype, weight_type, params_type, init_params) \ +XNN_TEST_IBILINEAR_CHANNELS_EQ(ukernel, arch_flags, ${", ".join(TEST_ARGS)}); +XNN_TEST_IBILINEAR_CHANNELS_DIV(ukernel, arch_flags, ${", ".join(TEST_ARGS)}); +XNN_TEST_IBILINEAR_CHANNELS_LT(ukernel, arch_flags, ${", ".join(TEST_ARGS)}); +XNN_TEST_IBILINEAR_CHANNELS_GT(ukernel, arch_flags, ${", ".join(TEST_ARGS)}); +XNN_TEST_IBILINEAR_PIXELS_DIV(ukernel, arch_flags, ${", ".join(TEST_ARGS)}); +XNN_TEST_IBILINEAR_PIXELS_LT(ukernel, arch_flags, ${", ".join(TEST_ARGS)}); +XNN_TEST_IBILINEAR_PIXELS_GT(ukernel, arch_flags, ${", ".join(TEST_ARGS)}); +XNN_TEST_IBILINEAR_INPUT_OFFSET(ukernel, arch_flags, ${", ".join(TEST_ARGS)}); +XNN_TEST_IBILINEAR_OUTPUT_STRIDE(ukernel, arch_flags, ${", ".join(TEST_ARGS)}); """ - -def generate_test_cases(ukernel, channel_tile, pixel_tile, isa): - """Generates all tests cases for a BILINEAR micro-kernel. - - Args: - ukernel: C name of the micro-kernel function. - channel_tile: Number of channels processed per one iteration of the inner - loop of the micro-kernel. - pixel_tile: Number of pixels processed per one iteration of the outer loop - of the micro-kernel. - isa: instruction set required to run the micro-kernel. Generated unit test - will skip execution if the host processor doesn't support this ISA. - - Returns: - Code for the test case. - """ - _, test_name = ukernel.split("_", 1) - _, datatype, ukernel_type, _ = ukernel.split("_", 3) - test_args = [ukernel] - return xngen.preprocess(IBILINEAR_TEST_TEMPLATE, { - "TEST_NAME": test_name.upper().replace("UKERNEL_", ""), - "TEST_FUNC": ukernel, - "UKERNEL_TYPE": ukernel_type.upper(), - "DATATYPE": datatype, - "CHANNEL_TILE": channel_tile, - "PIXEL_TILE": pixel_tile, - "ISA_CHECK": xnncommon.generate_isa_check_macro(isa), - "next_prime": next_prime, - }) - - def main(args): options = parser.parse_args(args) + tester = options.tester + tester_header = { + "IBilinearMicrokernelTester": "ibilinear-microkernel-tester.h", + }[tester] + ukernel = options.ukernel - with codecs.open(options.spec, "r", encoding="utf-8") as spec_file: - spec_yaml = yaml.safe_load(spec_file) - if not isinstance(spec_yaml, list): - raise ValueError("expected a list of micro-kernels in the spec") - - tests = """\ + tests = """\ // Copyright 2019 Google LLC // // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. // // Auto-generated file. Do not edit! -// Specification: {specification} +// Microkernel: {ukernel} // Generator: {generator} @@ -204,16 +66,29 @@ def main(args): #include "xnnpack/ibilinear.h" #include "xnnpack/isa-checks.h" #include "ibilinear-microkernel-tester.h" -""".format(specification=options.spec, generator=sys.argv[0]) - - for ukernel_spec in spec_yaml: - name = ukernel_spec["name"] - channel_tile, pixel_tile, arch, isa = split_ukernel_name(name) - - test_case = generate_test_cases(name, channel_tile, pixel_tile, isa) - tests += "\n\n" + xnncommon.postprocess_test_case(test_case, arch, isa) - - xnncommon.overwrite_if_changed(options.output, tests) +""".format(ukernel=options.ukernel, generator=sys.argv[0]) + + ukernel_parts = options.ukernel.split("-") + datatype = ukernel_parts[0] + op = ukernel_parts[1] + test_args = ["channel_tile"] + test_args.append("pixel_tile") + test_args.append("datatype") + test_args.append("weight_type") + test_args.append("params_type") + test_args.append("init_params") + tests += xnncommon.make_multiline_macro(xngen.preprocess( + IBILINEAR_TEST_TEMPLATE, + { + "TEST_ARGS": test_args, + "TESTER": tester, + "DATATYPE": datatype, + }, + )) + folder = datatype + "-" + ("ibilinear" if datatype.startswith("f") else op) + tests += f'#include "{folder}/{options.ukernel}.h"\n' + tests += "#undef XNN_UKERNEL_WITH_PARAMS\n" + xnncommon.overwrite_if_changed(options.output, tests) if __name__ == "__main__":