diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index cf0b710fd..d337630f6 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -185,3 +185,50 @@ jobs: - run: make clang-debug - name: tests (wasm2c tests excluding memory64) run: ./test/run-tests.py wasm2c --exclude-dir memory64 + + build-cross: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + arch: [s390x] + services: # still faster on debian... + distcc: + image: debian:latest + options: --health-cmd distccmon-text --health-interval 5s --health-start-period 5m debian:latest bash -c "apt-get update && apt-get install -y g++-s390x-linux-gnu distcc && distccd --daemon --no-detach" + ports: + - 3632:3632 + env: + QEMU_LD_PREFIX: /usr/${{matrix.arch}}-linux-gnu/ + steps: + - uses: actions/setup-python@v1 + with: + python-version: '3.x' + - uses: actions/checkout@v1 + with: + submodules: true + - name: Set up QEMU + uses: docker/setup-qemu-action@v2 + with: + platforms: ${{matrix.arch}} + image: "tonistiigi/binfmt:master" + - name: install ninja + run: sudo apt-get install ninja-build + - name: install the toolchain + run: sudo apt-get install g++-${{matrix.arch}}-linux-gnu + - name: install distcc + run: sudo apt-get install distcc + - name: mkdir distcc symlinks + run: sudo mkdir -p /opt/bin/distcc_symlinks + - name: distcc symlink + run: sudo ln -s /usr/bin/distcc /opt/bin/distcc_symlinks/${{matrix.arch}}-linux-gnu-gcc # only CC is needed + - name: cmake + run: cmake -S . -B out -G Ninja -DCMAKE_TOOLCHAIN_FILE=../scripts/TC-${{matrix.arch}}.cmake -DWITH_WASI=ON -DWERROR=OFF -Werror=dev -Wno-deprecated + - name: build + run: cmake --build out + - name: check if generated files are up-to-date + run: python ./scripts/check_clean.py + - name: unittests + run: cmake --build out --target run-unittests + - name: tests + run: cmake --build out --target run-tests diff --git a/scripts/TC-s390x.cmake b/scripts/TC-s390x.cmake new file mode 100644 index 000000000..74c307167 --- /dev/null +++ b/scripts/TC-s390x.cmake @@ -0,0 +1,11 @@ +set(CMAKE_SYSTEM_NAME Linux) + +set(CMAKE_C_COMPILER /opt/bin/distcc_symlinks/s390x-linux-gnu-gcc) +set(CMAKE_CXX_COMPILER s390x-linux-gnu-g++) + +set(CMAKE_FIND_ROOT_PATH /usr/s390x-linux-gnu) + +set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) + +set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) +set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) diff --git a/scripts/clang-format-diff.sh b/scripts/clang-format-diff.sh index 3348542c2..1b4c73fdc 100755 --- a/scripts/clang-format-diff.sh +++ b/scripts/clang-format-diff.sh @@ -5,12 +5,23 @@ set -o pipefail if [ -n "$1" ]; then BRANCH="$1" +elif [ "$CI" != "true" ]; then + echo "Please specify a base branch in the command line" + exit 1 elif [ -n "$GITHUB_EVENT_BEFORE" ] && [ "push" = "$GITHUB_EVENT_NAME" ]; then BRANCH="$GITHUB_EVENT_BEFORE" elif [ -n "$GITHUB_BASE_REF" ]; then BRANCH="origin/$GITHUB_BASE_REF" -else +elif git symbolic-ref -q HEAD; then # check if we're in a branch BRANCH="@{upstream}" +else + # in a detached HEAD. + # default to origin/main, this is a "last resort" to make this script do the + # right thing, and is only really here so it works when pushing a new branch, + # with the caveat that it assumes the base branch to be called "main". + # (this has been the case with wabt for a while. may fail if the repo lacks a + # "main" branch for some reason.) + BRANCH="origin/main" fi MERGE_BASE=$(git merge-base $BRANCH HEAD) diff --git a/src/c-writer.cc b/src/c-writer.cc index 0c3741fde..77c8e1f58 100644 --- a/src/c-writer.cc +++ b/src/c-writer.cc @@ -1297,9 +1297,11 @@ void CWriter::Write(const Const& const_) { break; } case Type::V128: { - Writef("simde_wasm_i32x4_const(0x%08x, 0x%08x, 0x%08x, 0x%08x)", - const_.vec128().u32(0), const_.vec128().u32(1), - const_.vec128().u32(2), const_.vec128().u32(3)); + Writef("v128_const(0x%02x", const_.vec128().u8(0)); + for (int i = 1; i < 16; i++) { + Writef(", 0x%02x", const_.vec128().u8(i)); + } + Write(")"); break; } @@ -4179,11 +4181,11 @@ void CWriter::Write(const BinaryExpr& expr) { break; case Opcode::I8X16NarrowI16X8S: - WritePrefixBinaryExpr(expr.opcode, "simde_wasm_i8x16_narrow_i16x8"); + WritePrefixBinaryExpr(expr.opcode, "v128_i8x16_narrow_i16x8"); break; case Opcode::I8X16NarrowI16X8U: - WritePrefixBinaryExpr(expr.opcode, "simde_wasm_u8x16_narrow_i16x8"); + WritePrefixBinaryExpr(expr.opcode, "v128_u8x16_narrow_i16x8"); break; case Opcode::I8X16Shl: @@ -4211,7 +4213,7 @@ void CWriter::Write(const BinaryExpr& expr) { break; case Opcode::I8X16Swizzle: - WritePrefixBinaryExpr(expr.opcode, "simde_wasm_i8x16_swizzle"); + WritePrefixBinaryExpr(expr.opcode, "v128_i8x16_swizzle"); break; case Opcode::I16X8Add: @@ -4231,19 +4233,19 @@ void CWriter::Write(const BinaryExpr& expr) { break; case Opcode::I16X8ExtmulHighI8X16S: - WritePrefixBinaryExpr(expr.opcode, "simde_wasm_i16x8_extmul_high_i8x16"); + WritePrefixBinaryExpr(expr.opcode, "v128_i16x8_extmul_high_i8x16"); break; case Opcode::I16X8ExtmulHighI8X16U: - WritePrefixBinaryExpr(expr.opcode, "simde_wasm_u16x8_extmul_high_u8x16"); + WritePrefixBinaryExpr(expr.opcode, "v128_u16x8_extmul_high_u8x16"); break; case Opcode::I16X8ExtmulLowI8X16S: - WritePrefixBinaryExpr(expr.opcode, "simde_wasm_i16x8_extmul_low_i8x16"); + WritePrefixBinaryExpr(expr.opcode, "v128_i16x8_extmul_low_i8x16"); break; case Opcode::I16X8ExtmulLowI8X16U: - WritePrefixBinaryExpr(expr.opcode, "simde_wasm_u16x8_extmul_low_u8x16"); + WritePrefixBinaryExpr(expr.opcode, "v128_u16x8_extmul_low_u8x16"); break; case Opcode::I16X8MaxS: @@ -4267,11 +4269,11 @@ void CWriter::Write(const BinaryExpr& expr) { break; case Opcode::I16X8NarrowI32X4S: - WritePrefixBinaryExpr(expr.opcode, "simde_wasm_i16x8_narrow_i32x4"); + WritePrefixBinaryExpr(expr.opcode, "v128_i16x8_narrow_i32x4"); break; case Opcode::I16X8NarrowI32X4U: - WritePrefixBinaryExpr(expr.opcode, "simde_wasm_u16x8_narrow_i32x4"); + WritePrefixBinaryExpr(expr.opcode, "v128_u16x8_narrow_i32x4"); break; case Opcode::I16X8Q15mulrSatS: @@ -4311,19 +4313,19 @@ void CWriter::Write(const BinaryExpr& expr) { break; case Opcode::I32X4ExtmulHighI16X8S: - WritePrefixBinaryExpr(expr.opcode, "simde_wasm_i32x4_extmul_high_i16x8"); + WritePrefixBinaryExpr(expr.opcode, "v128_i32x4_extmul_high_i16x8"); break; case Opcode::I32X4ExtmulHighI16X8U: - WritePrefixBinaryExpr(expr.opcode, "simde_wasm_u32x4_extmul_high_u16x8"); + WritePrefixBinaryExpr(expr.opcode, "v128_u32x4_extmul_high_u16x8"); break; case Opcode::I32X4ExtmulLowI16X8S: - WritePrefixBinaryExpr(expr.opcode, "simde_wasm_i32x4_extmul_low_i16x8"); + WritePrefixBinaryExpr(expr.opcode, "v128_i32x4_extmul_low_i16x8"); break; case Opcode::I32X4ExtmulLowI16X8U: - WritePrefixBinaryExpr(expr.opcode, "simde_wasm_u32x4_extmul_low_u16x8"); + WritePrefixBinaryExpr(expr.opcode, "v128_u32x4_extmul_low_u16x8"); break; case Opcode::I32X4MaxS: @@ -4367,19 +4369,19 @@ void CWriter::Write(const BinaryExpr& expr) { break; case Opcode::I64X2ExtmulHighI32X4S: - WritePrefixBinaryExpr(expr.opcode, "simde_wasm_i64x2_extmul_high_i32x4"); + WritePrefixBinaryExpr(expr.opcode, "v128_i64x2_extmul_high_i32x4"); break; case Opcode::I64X2ExtmulHighI32X4U: - WritePrefixBinaryExpr(expr.opcode, "simde_wasm_u64x2_extmul_high_u32x4"); + WritePrefixBinaryExpr(expr.opcode, "v128_u64x2_extmul_high_u32x4"); break; case Opcode::I64X2ExtmulLowI32X4S: - WritePrefixBinaryExpr(expr.opcode, "simde_wasm_i64x2_extmul_low_i32x4"); + WritePrefixBinaryExpr(expr.opcode, "v128_i64x2_extmul_low_i32x4"); break; case Opcode::I64X2ExtmulLowI32X4U: - WritePrefixBinaryExpr(expr.opcode, "simde_wasm_u64x2_extmul_low_u32x4"); + WritePrefixBinaryExpr(expr.opcode, "v128_u64x2_extmul_low_u32x4"); break; case Opcode::I64X2Mul: @@ -4899,13 +4901,11 @@ void CWriter::Write(const ConvertExpr& expr) { break; case Opcode::I32X4TruncSatF64X2SZero: - WriteSimpleUnaryExpr(expr.opcode, - "simde_wasm_i32x4_trunc_sat_f64x2_zero"); + WriteSimpleUnaryExpr(expr.opcode, "v128_i32x4_trunc_sat_f64x2_zero"); break; case Opcode::I32X4TruncSatF64X2UZero: - WriteSimpleUnaryExpr(expr.opcode, - "simde_wasm_u32x4_trunc_sat_f64x2_zero"); + WriteSimpleUnaryExpr(expr.opcode, "v128_u32x4_trunc_sat_f64x2_zero"); break; case Opcode::F32X4ConvertI32X4S: @@ -4917,19 +4917,19 @@ void CWriter::Write(const ConvertExpr& expr) { break; case Opcode::F32X4DemoteF64X2Zero: - WriteSimpleUnaryExpr(expr.opcode, "simde_wasm_f32x4_demote_f64x2_zero"); + WriteSimpleUnaryExpr(expr.opcode, "v128_f32x4_demote_f64x2_zero"); break; case Opcode::F64X2ConvertLowI32X4S: - WriteSimpleUnaryExpr(expr.opcode, "simde_wasm_f64x2_convert_low_i32x4"); + WriteSimpleUnaryExpr(expr.opcode, "v128_f64x2_convert_low_i32x4"); break; case Opcode::F64X2ConvertLowI32X4U: - WriteSimpleUnaryExpr(expr.opcode, "simde_wasm_f64x2_convert_low_u32x4"); + WriteSimpleUnaryExpr(expr.opcode, "v128_f64x2_convert_low_u32x4"); break; case Opcode::F64X2PromoteLowF32X4: - WriteSimpleUnaryExpr(expr.opcode, "simde_wasm_f64x2_promote_low_f32x4"); + WriteSimpleUnaryExpr(expr.opcode, "v128_f64x2_promote_low_f32x4"); break; default: @@ -5121,7 +5121,7 @@ void CWriter::Write(const UnaryExpr& expr) { break; case Opcode::I8X16Bitmask: - WriteSimpleUnaryExpr(expr.opcode, "simde_wasm_i8x16_bitmask"); + WriteSimpleUnaryExpr(expr.opcode, "v128_i8x16_bitmask"); break; case Opcode::I8X16Neg: @@ -5145,7 +5145,7 @@ void CWriter::Write(const UnaryExpr& expr) { break; case Opcode::I16X8Bitmask: - WriteSimpleUnaryExpr(expr.opcode, "simde_wasm_i16x8_bitmask"); + WriteSimpleUnaryExpr(expr.opcode, "v128_i16x8_bitmask"); break; case Opcode::I16X8ExtaddPairwiseI8X16S: @@ -5159,19 +5159,19 @@ void CWriter::Write(const UnaryExpr& expr) { break; case Opcode::I16X8ExtendHighI8X16S: - WriteSimpleUnaryExpr(expr.opcode, "simde_wasm_i16x8_extend_high_i8x16"); + WriteSimpleUnaryExpr(expr.opcode, "v128_i16x8_extend_high_i8x16"); break; case Opcode::I16X8ExtendHighI8X16U: - WriteSimpleUnaryExpr(expr.opcode, "simde_wasm_u16x8_extend_high_u8x16"); + WriteSimpleUnaryExpr(expr.opcode, "v128_u16x8_extend_high_u8x16"); break; case Opcode::I16X8ExtendLowI8X16S: - WriteSimpleUnaryExpr(expr.opcode, "simde_wasm_i16x8_extend_low_i8x16"); + WriteSimpleUnaryExpr(expr.opcode, "v128_i16x8_extend_low_i8x16"); break; case Opcode::I16X8ExtendLowI8X16U: - WriteSimpleUnaryExpr(expr.opcode, "simde_wasm_u16x8_extend_low_u8x16"); + WriteSimpleUnaryExpr(expr.opcode, "v128_u16x8_extend_low_u8x16"); break; case Opcode::I16X8Neg: @@ -5191,7 +5191,7 @@ void CWriter::Write(const UnaryExpr& expr) { break; case Opcode::I32X4Bitmask: - WriteSimpleUnaryExpr(expr.opcode, "simde_wasm_i32x4_bitmask"); + WriteSimpleUnaryExpr(expr.opcode, "v128_i32x4_bitmask"); break; case Opcode::I32X4ExtaddPairwiseI16X8S: @@ -5205,19 +5205,19 @@ void CWriter::Write(const UnaryExpr& expr) { break; case Opcode::I32X4ExtendHighI16X8S: - WriteSimpleUnaryExpr(expr.opcode, "simde_wasm_i32x4_extend_high_i16x8"); + WriteSimpleUnaryExpr(expr.opcode, "v128_i32x4_extend_high_i16x8"); break; case Opcode::I32X4ExtendHighI16X8U: - WriteSimpleUnaryExpr(expr.opcode, "simde_wasm_u32x4_extend_high_u16x8"); + WriteSimpleUnaryExpr(expr.opcode, "v128_u32x4_extend_high_u16x8"); break; case Opcode::I32X4ExtendLowI16X8S: - WriteSimpleUnaryExpr(expr.opcode, "simde_wasm_i32x4_extend_low_i16x8"); + WriteSimpleUnaryExpr(expr.opcode, "v128_i32x4_extend_low_i16x8"); break; case Opcode::I32X4ExtendLowI16X8U: - WriteSimpleUnaryExpr(expr.opcode, "simde_wasm_u32x4_extend_low_u16x8"); + WriteSimpleUnaryExpr(expr.opcode, "v128_u32x4_extend_low_u16x8"); break; case Opcode::I32X4Neg: @@ -5237,23 +5237,23 @@ void CWriter::Write(const UnaryExpr& expr) { break; case Opcode::I64X2Bitmask: - WriteSimpleUnaryExpr(expr.opcode, "simde_wasm_i64x2_bitmask"); + WriteSimpleUnaryExpr(expr.opcode, "v128_i64x2_bitmask"); break; case Opcode::I64X2ExtendHighI32X4S: - WriteSimpleUnaryExpr(expr.opcode, "simde_wasm_i64x2_extend_high_i32x4"); + WriteSimpleUnaryExpr(expr.opcode, "v128_i64x2_extend_high_i32x4"); break; case Opcode::I64X2ExtendHighI32X4U: - WriteSimpleUnaryExpr(expr.opcode, "simde_wasm_u64x2_extend_high_u32x4"); + WriteSimpleUnaryExpr(expr.opcode, "v128_u64x2_extend_high_u32x4"); break; case Opcode::I64X2ExtendLowI32X4S: - WriteSimpleUnaryExpr(expr.opcode, "simde_wasm_i64x2_extend_low_i32x4"); + WriteSimpleUnaryExpr(expr.opcode, "v128_i64x2_extend_low_i32x4"); break; case Opcode::I64X2ExtendLowI32X4U: - WriteSimpleUnaryExpr(expr.opcode, "simde_wasm_u64x2_extend_low_u32x4"); + WriteSimpleUnaryExpr(expr.opcode, "v128_u64x2_extend_low_u32x4"); break; case Opcode::I64X2Neg: @@ -5361,85 +5361,85 @@ void CWriter::Write(const SimdLaneOpExpr& expr) { switch (expr.opcode) { case Opcode::I8X16ExtractLaneS: { - Write(StackVar(0, result_type), " = simde_wasm_i8x16_extract_lane(", + Write(StackVar(0, result_type), " = v128_i8x16_extract_lane(", StackVar(0), ", ", expr.val, ");", Newline()); DropTypes(1); break; } case Opcode::I8X16ExtractLaneU: { - Write(StackVar(0, result_type), " = simde_wasm_u8x16_extract_lane(", + Write(StackVar(0, result_type), " = v128_u8x16_extract_lane(", StackVar(0), ", ", expr.val, ");", Newline()); DropTypes(1); break; } case Opcode::I16X8ExtractLaneS: { - Write(StackVar(0, result_type), " = simde_wasm_i16x8_extract_lane(", + Write(StackVar(0, result_type), " = v128_i16x8_extract_lane(", StackVar(0), ", ", expr.val, ");", Newline()); DropTypes(1); break; } case Opcode::I16X8ExtractLaneU: { - Write(StackVar(0, result_type), " = simde_wasm_u16x8_extract_lane(", + Write(StackVar(0, result_type), " = v128_u16x8_extract_lane(", StackVar(0), ", ", expr.val, ");", Newline()); DropTypes(1); break; } case Opcode::I32X4ExtractLane: { - Write(StackVar(0, result_type), " = simde_wasm_i32x4_extract_lane(", + Write(StackVar(0, result_type), " = v128_i32x4_extract_lane(", StackVar(0), ", ", expr.val, ");", Newline()); DropTypes(1); break; } case Opcode::I64X2ExtractLane: { - Write(StackVar(0, result_type), " = simde_wasm_i64x2_extract_lane(", + Write(StackVar(0, result_type), " = v128_i64x2_extract_lane(", StackVar(0), ", ", expr.val, ");", Newline()); DropTypes(1); break; } case Opcode::F32X4ExtractLane: { - Write(StackVar(0, result_type), " = simde_wasm_f32x4_extract_lane(", + Write(StackVar(0, result_type), " = v128_f32x4_extract_lane(", StackVar(0), ", ", expr.val, ");", Newline()); DropTypes(1); break; } case Opcode::F64X2ExtractLane: { - Write(StackVar(0, result_type), " = simde_wasm_f64x2_extract_lane(", + Write(StackVar(0, result_type), " = v128_f64x2_extract_lane(", StackVar(0), ", ", expr.val, ");", Newline()); DropTypes(1); break; } case Opcode::I8X16ReplaceLane: { - Write(StackVar(1, result_type), " = simde_wasm_i8x16_replace_lane(", + Write(StackVar(1, result_type), " = v128_i8x16_replace_lane(", StackVar(1), ", ", expr.val, ", ", StackVar(0), ");", Newline()); DropTypes(2); break; } case Opcode::I16X8ReplaceLane: { - Write(StackVar(1, result_type), " = simde_wasm_i16x8_replace_lane(", + Write(StackVar(1, result_type), " = v128_i16x8_replace_lane(", StackVar(1), ", ", expr.val, ", ", StackVar(0), ");", Newline()); DropTypes(2); break; } case Opcode::I32X4ReplaceLane: { - Write(StackVar(1, result_type), " = simde_wasm_i32x4_replace_lane(", + Write(StackVar(1, result_type), " = v128_i32x4_replace_lane(", StackVar(1), ", ", expr.val, ", ", StackVar(0), ");", Newline()); DropTypes(2); break; } case Opcode::I64X2ReplaceLane: { - Write(StackVar(1, result_type), " = simde_wasm_i64x2_replace_lane(", + Write(StackVar(1, result_type), " = v128_i64x2_replace_lane(", StackVar(1), ", ", expr.val, ", ", StackVar(0), ");", Newline()); DropTypes(2); break; } case Opcode::F32X4ReplaceLane: { - Write(StackVar(1, result_type), " = simde_wasm_f32x4_replace_lane(", + Write(StackVar(1, result_type), " = v128_f32x4_replace_lane(", StackVar(1), ", ", expr.val, ", ", StackVar(0), ");", Newline()); DropTypes(2); break; } case Opcode::F64X2ReplaceLane: { - Write(StackVar(1, result_type), " = simde_wasm_f64x2_replace_lane(", + Write(StackVar(1, result_type), " = v128_f64x2_replace_lane(", StackVar(1), ", ", expr.val, ", ", StackVar(0), ");", Newline()); DropTypes(2); break; @@ -5508,14 +5508,12 @@ void CWriter::Write(const SimdShuffleOpExpr& expr) { Type result_type = expr.opcode.GetResultType(); switch (expr.opcode) { case Opcode::I8X16Shuffle: { - Write(StackVar(1, result_type), " = simde_wasm_i8x16_shuffle(", - StackVar(1), ", ", StackVar(0), ", ", expr.val.u8(0), ", ", - expr.val.u8(1), ", ", expr.val.u8(2), ", ", expr.val.u8(3), ", ", - expr.val.u8(4), ", ", expr.val.u8(5), ", ", expr.val.u8(6), ", ", - expr.val.u8(7), ", ", expr.val.u8(8), ", ", expr.val.u8(9), ", ", - expr.val.u8(10), ", ", expr.val.u8(11), ", ", expr.val.u8(12), ", ", - expr.val.u8(13), ", ", expr.val.u8(14), ", ", expr.val.u8(15), ");", - Newline()); + Write(StackVar(1, result_type), " = v128_i8x16_shuffle(", StackVar(1), + ", ", StackVar(0)); + for (int i = 0; i < 16; i++) { + Write(", ", expr.val.u8(i)); + } + Write(");", Newline()); DropTypes(2); break; } diff --git a/src/prebuilt/wasm2c_simd_source_declarations.cc b/src/prebuilt/wasm2c_simd_source_declarations.cc index 07425377a..7c0c9fb4b 100644 --- a/src/prebuilt/wasm2c_simd_source_declarations.cc +++ b/src/prebuilt/wasm2c_simd_source_declarations.cc @@ -65,7 +65,38 @@ R"w2c_template( } R"w2c_template( // clang-format off )w2c_template" -R"w2c_template(DEFINE_SIMD_LOAD_FUNC(v128_load, simde_wasm_v128_load, v128) +R"w2c_template(#if WABT_BIG_ENDIAN +)w2c_template" +R"w2c_template(static inline v128 v128_impl_load32_zero(const void* a) { +)w2c_template" +R"w2c_template( return simde_wasm_i8x16_swizzle( +)w2c_template" +R"w2c_template( simde_wasm_v128_load32_zero(a), +)w2c_template" +R"w2c_template( simde_wasm_i8x16_const(12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3)); +)w2c_template" +R"w2c_template(} +)w2c_template" +R"w2c_template(static inline v128 v128_impl_load64_zero(const void* a) { +)w2c_template" +R"w2c_template( return simde_wasm_i8x16_swizzle( +)w2c_template" +R"w2c_template( simde_wasm_v128_load64_zero(a), +)w2c_template" +R"w2c_template( simde_wasm_i8x16_const(8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7)); +)w2c_template" +R"w2c_template(} +)w2c_template" +R"w2c_template(#else +)w2c_template" +R"w2c_template(#define v128_impl_load32_zero simde_wasm_v128_load32_zero +)w2c_template" +R"w2c_template(#define v128_impl_load64_zero simde_wasm_v128_load64_zero +)w2c_template" +R"w2c_template(#endif +)w2c_template" +R"w2c_template( +DEFINE_SIMD_LOAD_FUNC(v128_load, simde_wasm_v128_load, v128) )w2c_template" R"w2c_template( DEFINE_SIMD_LOAD_FUNC(v128_load8_splat, simde_wasm_v128_load8_splat, u8) @@ -90,12 +121,76 @@ R"w2c_template(DEFINE_SIMD_LOAD_FUNC(i64x2_load32x2, simde_wasm_i64x2_load32x2, R"w2c_template(DEFINE_SIMD_LOAD_FUNC(u64x2_load32x2, simde_wasm_u64x2_load32x2, u64) )w2c_template" R"w2c_template( -DEFINE_SIMD_LOAD_FUNC(v128_load32_zero, simde_wasm_v128_load32_zero, u32) +DEFINE_SIMD_LOAD_FUNC(v128_load32_zero, v128_impl_load32_zero, u32) )w2c_template" -R"w2c_template(DEFINE_SIMD_LOAD_FUNC(v128_load64_zero, simde_wasm_v128_load64_zero, u64) +R"w2c_template(DEFINE_SIMD_LOAD_FUNC(v128_load64_zero, v128_impl_load64_zero, u64) )w2c_template" R"w2c_template( -DEFINE_SIMD_LOAD_LANE(v128_load8_lane0, simde_wasm_v128_load8_lane, u8, 0) +#if WABT_BIG_ENDIAN +)w2c_template" +R"w2c_template(DEFINE_SIMD_LOAD_LANE(v128_load8_lane0, simde_wasm_v128_load8_lane, u8, 15) +)w2c_template" +R"w2c_template(DEFINE_SIMD_LOAD_LANE(v128_load8_lane1, simde_wasm_v128_load8_lane, u8, 14) +)w2c_template" +R"w2c_template(DEFINE_SIMD_LOAD_LANE(v128_load8_lane2, simde_wasm_v128_load8_lane, u8, 13) +)w2c_template" +R"w2c_template(DEFINE_SIMD_LOAD_LANE(v128_load8_lane3, simde_wasm_v128_load8_lane, u8, 12) +)w2c_template" +R"w2c_template(DEFINE_SIMD_LOAD_LANE(v128_load8_lane4, simde_wasm_v128_load8_lane, u8, 11) +)w2c_template" +R"w2c_template(DEFINE_SIMD_LOAD_LANE(v128_load8_lane5, simde_wasm_v128_load8_lane, u8, 10) +)w2c_template" +R"w2c_template(DEFINE_SIMD_LOAD_LANE(v128_load8_lane6, simde_wasm_v128_load8_lane, u8, 9) +)w2c_template" +R"w2c_template(DEFINE_SIMD_LOAD_LANE(v128_load8_lane7, simde_wasm_v128_load8_lane, u8, 8) +)w2c_template" +R"w2c_template(DEFINE_SIMD_LOAD_LANE(v128_load8_lane8, simde_wasm_v128_load8_lane, u8, 7) +)w2c_template" +R"w2c_template(DEFINE_SIMD_LOAD_LANE(v128_load8_lane9, simde_wasm_v128_load8_lane, u8, 6) +)w2c_template" +R"w2c_template(DEFINE_SIMD_LOAD_LANE(v128_load8_lane10, simde_wasm_v128_load8_lane, u8, 5) +)w2c_template" +R"w2c_template(DEFINE_SIMD_LOAD_LANE(v128_load8_lane11, simde_wasm_v128_load8_lane, u8, 4) +)w2c_template" +R"w2c_template(DEFINE_SIMD_LOAD_LANE(v128_load8_lane12, simde_wasm_v128_load8_lane, u8, 3) +)w2c_template" +R"w2c_template(DEFINE_SIMD_LOAD_LANE(v128_load8_lane13, simde_wasm_v128_load8_lane, u8, 2) +)w2c_template" +R"w2c_template(DEFINE_SIMD_LOAD_LANE(v128_load8_lane14, simde_wasm_v128_load8_lane, u8, 1) +)w2c_template" +R"w2c_template(DEFINE_SIMD_LOAD_LANE(v128_load8_lane15, simde_wasm_v128_load8_lane, u8, 0) +)w2c_template" +R"w2c_template(DEFINE_SIMD_LOAD_LANE(v128_load16_lane0, simde_wasm_v128_load16_lane, u16, 7) +)w2c_template" +R"w2c_template(DEFINE_SIMD_LOAD_LANE(v128_load16_lane1, simde_wasm_v128_load16_lane, u16, 6) +)w2c_template" +R"w2c_template(DEFINE_SIMD_LOAD_LANE(v128_load16_lane2, simde_wasm_v128_load16_lane, u16, 5) +)w2c_template" +R"w2c_template(DEFINE_SIMD_LOAD_LANE(v128_load16_lane3, simde_wasm_v128_load16_lane, u16, 4) +)w2c_template" +R"w2c_template(DEFINE_SIMD_LOAD_LANE(v128_load16_lane4, simde_wasm_v128_load16_lane, u16, 3) +)w2c_template" +R"w2c_template(DEFINE_SIMD_LOAD_LANE(v128_load16_lane5, simde_wasm_v128_load16_lane, u16, 2) +)w2c_template" +R"w2c_template(DEFINE_SIMD_LOAD_LANE(v128_load16_lane6, simde_wasm_v128_load16_lane, u16, 1) +)w2c_template" +R"w2c_template(DEFINE_SIMD_LOAD_LANE(v128_load16_lane7, simde_wasm_v128_load16_lane, u16, 0) +)w2c_template" +R"w2c_template(DEFINE_SIMD_LOAD_LANE(v128_load32_lane0, simde_wasm_v128_load32_lane, u32, 3) +)w2c_template" +R"w2c_template(DEFINE_SIMD_LOAD_LANE(v128_load32_lane1, simde_wasm_v128_load32_lane, u32, 2) +)w2c_template" +R"w2c_template(DEFINE_SIMD_LOAD_LANE(v128_load32_lane2, simde_wasm_v128_load32_lane, u32, 1) +)w2c_template" +R"w2c_template(DEFINE_SIMD_LOAD_LANE(v128_load32_lane3, simde_wasm_v128_load32_lane, u32, 0) +)w2c_template" +R"w2c_template(DEFINE_SIMD_LOAD_LANE(v128_load64_lane0, simde_wasm_v128_load64_lane, u64, 1) +)w2c_template" +R"w2c_template(DEFINE_SIMD_LOAD_LANE(v128_load64_lane1, simde_wasm_v128_load64_lane, u64, 0) +)w2c_template" +R"w2c_template(#else +)w2c_template" +R"w2c_template(DEFINE_SIMD_LOAD_LANE(v128_load8_lane0, simde_wasm_v128_load8_lane, u8, 0) )w2c_template" R"w2c_template(DEFINE_SIMD_LOAD_LANE(v128_load8_lane1, simde_wasm_v128_load8_lane, u8, 1) )w2c_template" @@ -155,11 +250,77 @@ R"w2c_template(DEFINE_SIMD_LOAD_LANE(v128_load64_lane0, simde_wasm_v128_load64_l )w2c_template" R"w2c_template(DEFINE_SIMD_LOAD_LANE(v128_load64_lane1, simde_wasm_v128_load64_lane, u64, 1) )w2c_template" +R"w2c_template(#endif +)w2c_template" R"w2c_template( DEFINE_SIMD_STORE(v128_store, v128) )w2c_template" R"w2c_template( -DEFINE_SIMD_STORE_LANE(v128_store8_lane0, simde_wasm_v128_store8_lane, u8, 0) +#if WABT_BIG_ENDIAN +)w2c_template" +R"w2c_template(DEFINE_SIMD_STORE_LANE(v128_store8_lane0, simde_wasm_v128_store8_lane, u8, 15) +)w2c_template" +R"w2c_template(DEFINE_SIMD_STORE_LANE(v128_store8_lane1, simde_wasm_v128_store8_lane, u8, 14) +)w2c_template" +R"w2c_template(DEFINE_SIMD_STORE_LANE(v128_store8_lane2, simde_wasm_v128_store8_lane, u8, 13) +)w2c_template" +R"w2c_template(DEFINE_SIMD_STORE_LANE(v128_store8_lane3, simde_wasm_v128_store8_lane, u8, 12) +)w2c_template" +R"w2c_template(DEFINE_SIMD_STORE_LANE(v128_store8_lane4, simde_wasm_v128_store8_lane, u8, 11) +)w2c_template" +R"w2c_template(DEFINE_SIMD_STORE_LANE(v128_store8_lane5, simde_wasm_v128_store8_lane, u8, 10) +)w2c_template" +R"w2c_template(DEFINE_SIMD_STORE_LANE(v128_store8_lane6, simde_wasm_v128_store8_lane, u8, 9) +)w2c_template" +R"w2c_template(DEFINE_SIMD_STORE_LANE(v128_store8_lane7, simde_wasm_v128_store8_lane, u8, 8) +)w2c_template" +R"w2c_template(DEFINE_SIMD_STORE_LANE(v128_store8_lane8, simde_wasm_v128_store8_lane, u8, 7) +)w2c_template" +R"w2c_template(DEFINE_SIMD_STORE_LANE(v128_store8_lane9, simde_wasm_v128_store8_lane, u8, 6) +)w2c_template" +R"w2c_template(DEFINE_SIMD_STORE_LANE(v128_store8_lane10, simde_wasm_v128_store8_lane, u8, 5) +)w2c_template" +R"w2c_template(DEFINE_SIMD_STORE_LANE(v128_store8_lane11, simde_wasm_v128_store8_lane, u8, 4) +)w2c_template" +R"w2c_template(DEFINE_SIMD_STORE_LANE(v128_store8_lane12, simde_wasm_v128_store8_lane, u8, 3) +)w2c_template" +R"w2c_template(DEFINE_SIMD_STORE_LANE(v128_store8_lane13, simde_wasm_v128_store8_lane, u8, 2) +)w2c_template" +R"w2c_template(DEFINE_SIMD_STORE_LANE(v128_store8_lane14, simde_wasm_v128_store8_lane, u8, 1) +)w2c_template" +R"w2c_template(DEFINE_SIMD_STORE_LANE(v128_store8_lane15, simde_wasm_v128_store8_lane, u8, 0) +)w2c_template" +R"w2c_template(DEFINE_SIMD_STORE_LANE(v128_store16_lane0, simde_wasm_v128_store16_lane, u16, 7) +)w2c_template" +R"w2c_template(DEFINE_SIMD_STORE_LANE(v128_store16_lane1, simde_wasm_v128_store16_lane, u16, 6) +)w2c_template" +R"w2c_template(DEFINE_SIMD_STORE_LANE(v128_store16_lane2, simde_wasm_v128_store16_lane, u16, 5) +)w2c_template" +R"w2c_template(DEFINE_SIMD_STORE_LANE(v128_store16_lane3, simde_wasm_v128_store16_lane, u16, 4) +)w2c_template" +R"w2c_template(DEFINE_SIMD_STORE_LANE(v128_store16_lane4, simde_wasm_v128_store16_lane, u16, 3) +)w2c_template" +R"w2c_template(DEFINE_SIMD_STORE_LANE(v128_store16_lane5, simde_wasm_v128_store16_lane, u16, 2) +)w2c_template" +R"w2c_template(DEFINE_SIMD_STORE_LANE(v128_store16_lane6, simde_wasm_v128_store16_lane, u16, 1) +)w2c_template" +R"w2c_template(DEFINE_SIMD_STORE_LANE(v128_store16_lane7, simde_wasm_v128_store16_lane, u16, 0) +)w2c_template" +R"w2c_template(DEFINE_SIMD_STORE_LANE(v128_store32_lane0, simde_wasm_v128_store32_lane, u32, 3) +)w2c_template" +R"w2c_template(DEFINE_SIMD_STORE_LANE(v128_store32_lane1, simde_wasm_v128_store32_lane, u32, 2) +)w2c_template" +R"w2c_template(DEFINE_SIMD_STORE_LANE(v128_store32_lane2, simde_wasm_v128_store32_lane, u32, 1) +)w2c_template" +R"w2c_template(DEFINE_SIMD_STORE_LANE(v128_store32_lane3, simde_wasm_v128_store32_lane, u32, 0) +)w2c_template" +R"w2c_template(DEFINE_SIMD_STORE_LANE(v128_store64_lane0, simde_wasm_v128_store64_lane, u64, 1) +)w2c_template" +R"w2c_template(DEFINE_SIMD_STORE_LANE(v128_store64_lane1, simde_wasm_v128_store64_lane, u64, 0) +)w2c_template" +R"w2c_template(#else +)w2c_template" +R"w2c_template(DEFINE_SIMD_STORE_LANE(v128_store8_lane0, simde_wasm_v128_store8_lane, u8, 0) )w2c_template" R"w2c_template(DEFINE_SIMD_STORE_LANE(v128_store8_lane1, simde_wasm_v128_store8_lane, u8, 1) )w2c_template" @@ -219,6 +380,279 @@ R"w2c_template(DEFINE_SIMD_STORE_LANE(v128_store64_lane0, simde_wasm_v128_store6 )w2c_template" R"w2c_template(DEFINE_SIMD_STORE_LANE(v128_store64_lane1, simde_wasm_v128_store64_lane, u64, 1) )w2c_template" +R"w2c_template(#endif +)w2c_template" +R"w2c_template( +#if WABT_BIG_ENDIAN +)w2c_template" +R"w2c_template(#define v128_const(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) simde_wasm_i8x16_const(p,o,n,m,l,k,j,i,h,g,f,e,d,c,b,a) +)w2c_template" +R"w2c_template(#define v128_i8x16_extract_lane(v, l) simde_wasm_i8x16_extract_lane(v, 15-(l)) +)w2c_template" +R"w2c_template(#define v128_u8x16_extract_lane(v, l) simde_wasm_u8x16_extract_lane(v, 15-(l)) +)w2c_template" +R"w2c_template(#define v128_i16x8_extract_lane(v, l) simde_wasm_i16x8_extract_lane(v, 7-(l)) +)w2c_template" +R"w2c_template(#define v128_u16x8_extract_lane(v, l) simde_wasm_u16x8_extract_lane(v, 7-(l)) +)w2c_template" +R"w2c_template(#define v128_i32x4_extract_lane(v, l) simde_wasm_i32x4_extract_lane(v, 3-(l)) +)w2c_template" +R"w2c_template(#define v128_i64x2_extract_lane(v, l) simde_wasm_i64x2_extract_lane(v, 1-(l)) +)w2c_template" +R"w2c_template(#define v128_f32x4_extract_lane(v, l) simde_wasm_f32x4_extract_lane(v, 3-(l)) +)w2c_template" +R"w2c_template(#define v128_f64x2_extract_lane(v, l) simde_wasm_f64x2_extract_lane(v, 1-(l)) +)w2c_template" +R"w2c_template(#define v128_i8x16_replace_lane(v, l, x) simde_wasm_i8x16_replace_lane(v, 15-(l), x) +)w2c_template" +R"w2c_template(#define v128_u8x16_replace_lane(v, l, x) simde_wasm_u8x16_replace_lane(v, 15-(l), x) +)w2c_template" +R"w2c_template(#define v128_i16x8_replace_lane(v, l, x) simde_wasm_i16x8_replace_lane(v, 7-(l), x) +)w2c_template" +R"w2c_template(#define v128_u16x8_replace_lane(v, l, x) simde_wasm_u16x8_replace_lane(v, 7-(l), x) +)w2c_template" +R"w2c_template(#define v128_i32x4_replace_lane(v, l, x) simde_wasm_i32x4_replace_lane(v, 3-(l), x) +)w2c_template" +R"w2c_template(#define v128_i64x2_replace_lane(v, l, x) simde_wasm_i64x2_replace_lane(v, 1-(l), x) +)w2c_template" +R"w2c_template(#define v128_f32x4_replace_lane(v, l, x) simde_wasm_f32x4_replace_lane(v, 3-(l), x) +)w2c_template" +R"w2c_template(#define v128_f64x2_replace_lane(v, l, x) simde_wasm_f64x2_replace_lane(v, 1-(l), x) +)w2c_template" +R"w2c_template(#define v128_i8x16_bitmask(v) simde_wasm_i8x16_bitmask(simde_wasm_i8x16_swizzle(v, simde_wasm_i8x16_const(15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0))) +)w2c_template" +R"w2c_template(#define v128_i16x8_bitmask(v) simde_wasm_i16x8_bitmask(simde_wasm_i8x16_swizzle(v, simde_wasm_i8x16_const(14,15,12,13,10,11,8,9,6,7,4,5,2,3,0,1))) +)w2c_template" +R"w2c_template(#define v128_i32x4_bitmask(v) simde_wasm_i32x4_bitmask(simde_wasm_i8x16_swizzle(v, simde_wasm_i8x16_const(12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3))) +)w2c_template" +R"w2c_template(#define v128_i64x2_bitmask(v) simde_wasm_i64x2_bitmask(simde_wasm_i8x16_swizzle(v, simde_wasm_i8x16_const(8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7))) +)w2c_template" +R"w2c_template(#define v128_i8x16_swizzle(v1, v2) simde_wasm_i8x16_swizzle(v1, simde_wasm_v128_xor(v2, simde_wasm_i8x16_splat(15))) +)w2c_template" +R"w2c_template(#define v128_i8x16_shuffle(v1,v2,a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) simde_wasm_i8x16_shuffle(v2,v1,31-(p),31-(o),31-(n),31-(m),31-(l),31-(k),31-(j),31-(i),31-(h),31-(g),31-(f),31-(e),31-(d),31-(c),31-(b),31-(a)) +)w2c_template" +R"w2c_template(#define v128_i16x8_extmul_high_i8x16 simde_wasm_i16x8_extmul_low_i8x16 +)w2c_template" +R"w2c_template(#define v128_u16x8_extmul_high_u8x16 simde_wasm_u16x8_extmul_low_u8x16 +)w2c_template" +R"w2c_template(#define v128_i16x8_extmul_low_i8x16 simde_wasm_i16x8_extmul_high_i8x16 +)w2c_template" +R"w2c_template(#define v128_u16x8_extmul_low_u8x16 simde_wasm_u16x8_extmul_high_u8x16 +)w2c_template" +R"w2c_template(#define v128_i32x4_extmul_high_i16x8 simde_wasm_i32x4_extmul_low_i16x8 +)w2c_template" +R"w2c_template(#define v128_u32x4_extmul_high_u16x8 simde_wasm_u32x4_extmul_low_u16x8 +)w2c_template" +R"w2c_template(#define v128_i32x4_extmul_low_i16x8 simde_wasm_i32x4_extmul_high_i16x8 +)w2c_template" +R"w2c_template(#define v128_u32x4_extmul_low_u16x8 simde_wasm_u32x4_extmul_high_u16x8 +)w2c_template" +R"w2c_template(#define v128_i64x2_extmul_high_i32x4 simde_wasm_i64x2_extmul_low_i32x4 +)w2c_template" +R"w2c_template(#define v128_u64x2_extmul_high_u32x4 simde_wasm_u64x2_extmul_low_u32x4 +)w2c_template" +R"w2c_template(#define v128_i64x2_extmul_low_i32x4 simde_wasm_i64x2_extmul_high_i32x4 +)w2c_template" +R"w2c_template(#define v128_u64x2_extmul_low_u32x4 simde_wasm_u64x2_extmul_high_u32x4 +)w2c_template" +R"w2c_template(#define v128_i16x8_extend_high_i8x16 simde_wasm_i16x8_extend_low_i8x16 +)w2c_template" +R"w2c_template(#define v128_u16x8_extend_high_u8x16 simde_wasm_u16x8_extend_low_u8x16 +)w2c_template" +R"w2c_template(#define v128_i16x8_extend_low_i8x16 simde_wasm_i16x8_extend_high_i8x16 +)w2c_template" +R"w2c_template(#define v128_u16x8_extend_low_u8x16 simde_wasm_u16x8_extend_high_u8x16 +)w2c_template" +R"w2c_template(#define v128_i32x4_extend_high_i16x8 simde_wasm_i32x4_extend_low_i16x8 +)w2c_template" +R"w2c_template(#define v128_u32x4_extend_high_u16x8 simde_wasm_u32x4_extend_low_u16x8 +)w2c_template" +R"w2c_template(#define v128_i32x4_extend_low_i16x8 simde_wasm_i32x4_extend_high_i16x8 +)w2c_template" +R"w2c_template(#define v128_u32x4_extend_low_u16x8 simde_wasm_u32x4_extend_high_u16x8 +)w2c_template" +R"w2c_template(#define v128_i64x2_extend_high_i32x4 simde_wasm_i64x2_extend_low_i32x4 +)w2c_template" +R"w2c_template(#define v128_u64x2_extend_high_u32x4 simde_wasm_u64x2_extend_low_u32x4 +)w2c_template" +R"w2c_template(#define v128_i64x2_extend_low_i32x4 simde_wasm_i64x2_extend_high_i32x4 +)w2c_template" +R"w2c_template(#define v128_u64x2_extend_low_u32x4 simde_wasm_u64x2_extend_high_u32x4 +)w2c_template" +R"w2c_template(#define v128_i32x4_trunc_sat_f64x2_zero(a) \ +)w2c_template" +R"w2c_template( simde_wasm_i8x16_swizzle( \ +)w2c_template" +R"w2c_template( simde_wasm_i32x4_trunc_sat_f64x2_zero(a), \ +)w2c_template" +R"w2c_template( simde_wasm_i8x16_const(8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7)) +)w2c_template" +R"w2c_template(#define v128_u32x4_trunc_sat_f64x2_zero(a) \ +)w2c_template" +R"w2c_template( simde_wasm_i8x16_swizzle( \ +)w2c_template" +R"w2c_template( simde_wasm_u32x4_trunc_sat_f64x2_zero(a), \ +)w2c_template" +R"w2c_template( simde_wasm_i8x16_const(8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7)) +)w2c_template" +R"w2c_template(#define v128_i16x8_narrow_i32x4(a,b) simde_wasm_i16x8_narrow_i32x4(b,a) +)w2c_template" +R"w2c_template(#define v128_u16x8_narrow_i32x4(a,b) simde_wasm_u16x8_narrow_i32x4(b,a) +)w2c_template" +R"w2c_template(#define v128_i8x16_narrow_i16x8(a,b) simde_wasm_i8x16_narrow_i16x8(b,a) +)w2c_template" +R"w2c_template(#define v128_u8x16_narrow_i16x8(a,b) simde_wasm_u8x16_narrow_i16x8(b,a) +)w2c_template" +R"w2c_template(#define v128_f64x2_promote_low_f32x4(a) \ +)w2c_template" +R"w2c_template( simde_wasm_f64x2_promote_low_f32x4(simde_wasm_i8x16_swizzle( \ +)w2c_template" +R"w2c_template( a, \ +)w2c_template" +R"w2c_template( simde_wasm_i8x16_const(8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7))) +)w2c_template" +R"w2c_template(#define v128_f32x4_demote_f64x2_zero(a) \ +)w2c_template" +R"w2c_template( simde_wasm_i8x16_swizzle( \ +)w2c_template" +R"w2c_template( simde_wasm_f32x4_demote_f64x2_zero(a), \ +)w2c_template" +R"w2c_template( simde_wasm_i8x16_const(8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7)) +)w2c_template" +R"w2c_template(#define v128_f64x2_convert_low_i32x4(a) \ +)w2c_template" +R"w2c_template( simde_wasm_f64x2_convert_low_i32x4(simde_wasm_i8x16_swizzle( \ +)w2c_template" +R"w2c_template( a, \ +)w2c_template" +R"w2c_template( simde_wasm_i8x16_const(8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7))) +)w2c_template" +R"w2c_template(#define v128_f64x2_convert_low_u32x4(a) \ +)w2c_template" +R"w2c_template( simde_wasm_f64x2_convert_low_u32x4(simde_wasm_i8x16_swizzle( \ +)w2c_template" +R"w2c_template( a, \ +)w2c_template" +R"w2c_template( simde_wasm_i8x16_const(8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7))) +)w2c_template" +R"w2c_template(#else +)w2c_template" +R"w2c_template(#define v128_const simde_wasm_i8x16_const +)w2c_template" +R"w2c_template(#define v128_i8x16_extract_lane simde_wasm_i8x16_extract_lane +)w2c_template" +R"w2c_template(#define v128_u8x16_extract_lane simde_wasm_u8x16_extract_lane +)w2c_template" +R"w2c_template(#define v128_i16x8_extract_lane simde_wasm_i16x8_extract_lane +)w2c_template" +R"w2c_template(#define v128_u16x8_extract_lane simde_wasm_u16x8_extract_lane +)w2c_template" +R"w2c_template(#define v128_i32x4_extract_lane simde_wasm_i32x4_extract_lane +)w2c_template" +R"w2c_template(#define v128_i64x2_extract_lane simde_wasm_i64x2_extract_lane +)w2c_template" +R"w2c_template(#define v128_f32x4_extract_lane simde_wasm_f32x4_extract_lane +)w2c_template" +R"w2c_template(#define v128_f64x2_extract_lane simde_wasm_f64x2_extract_lane +)w2c_template" +R"w2c_template(#define v128_i8x16_replace_lane simde_wasm_i8x16_replace_lane +)w2c_template" +R"w2c_template(#define v128_u8x16_replace_lane simde_wasm_u8x16_replace_lane +)w2c_template" +R"w2c_template(#define v128_i16x8_replace_lane simde_wasm_i16x8_replace_lane +)w2c_template" +R"w2c_template(#define v128_u16x8_replace_lane simde_wasm_u16x8_replace_lane +)w2c_template" +R"w2c_template(#define v128_i32x4_replace_lane simde_wasm_i32x4_replace_lane +)w2c_template" +R"w2c_template(#define v128_i64x2_replace_lane simde_wasm_i64x2_replace_lane +)w2c_template" +R"w2c_template(#define v128_f32x4_replace_lane simde_wasm_f32x4_replace_lane +)w2c_template" +R"w2c_template(#define v128_f64x2_replace_lane simde_wasm_f64x2_replace_lane +)w2c_template" +R"w2c_template(#define v128_i8x16_bitmask simde_wasm_i8x16_bitmask +)w2c_template" +R"w2c_template(#define v128_i16x8_bitmask simde_wasm_i16x8_bitmask +)w2c_template" +R"w2c_template(#define v128_i32x4_bitmask simde_wasm_i32x4_bitmask +)w2c_template" +R"w2c_template(#define v128_i64x2_bitmask simde_wasm_i64x2_bitmask +)w2c_template" +R"w2c_template(#define v128_i8x16_swizzle simde_wasm_i8x16_swizzle +)w2c_template" +R"w2c_template(#define v128_i8x16_shuffle simde_wasm_i8x16_shuffle +)w2c_template" +R"w2c_template(#define v128_i16x8_extmul_high_i8x16 simde_wasm_i16x8_extmul_high_i8x16 +)w2c_template" +R"w2c_template(#define v128_u16x8_extmul_high_u8x16 simde_wasm_u16x8_extmul_high_u8x16 +)w2c_template" +R"w2c_template(#define v128_i16x8_extmul_low_i8x16 simde_wasm_i16x8_extmul_low_i8x16 +)w2c_template" +R"w2c_template(#define v128_u16x8_extmul_low_u8x16 simde_wasm_u16x8_extmul_low_u8x16 +)w2c_template" +R"w2c_template(#define v128_i32x4_extmul_high_i16x8 simde_wasm_i32x4_extmul_high_i16x8 +)w2c_template" +R"w2c_template(#define v128_u32x4_extmul_high_u16x8 simde_wasm_u32x4_extmul_high_u16x8 +)w2c_template" +R"w2c_template(#define v128_i32x4_extmul_low_i16x8 simde_wasm_i32x4_extmul_low_i16x8 +)w2c_template" +R"w2c_template(#define v128_u32x4_extmul_low_u16x8 simde_wasm_u32x4_extmul_low_u16x8 +)w2c_template" +R"w2c_template(#define v128_i64x2_extmul_high_i32x4 simde_wasm_i64x2_extmul_high_i32x4 +)w2c_template" +R"w2c_template(#define v128_u64x2_extmul_high_u32x4 simde_wasm_u64x2_extmul_high_u32x4 +)w2c_template" +R"w2c_template(#define v128_i64x2_extmul_low_i32x4 simde_wasm_i64x2_extmul_low_i32x4 +)w2c_template" +R"w2c_template(#define v128_u64x2_extmul_low_u32x4 simde_wasm_u64x2_extmul_low_u32x4 +)w2c_template" +R"w2c_template(#define v128_i16x8_extend_high_i8x16 simde_wasm_i16x8_extend_high_i8x16 +)w2c_template" +R"w2c_template(#define v128_u16x8_extend_high_u8x16 simde_wasm_u16x8_extend_high_u8x16 +)w2c_template" +R"w2c_template(#define v128_i16x8_extend_low_i8x16 simde_wasm_i16x8_extend_low_i8x16 +)w2c_template" +R"w2c_template(#define v128_u16x8_extend_low_u8x16 simde_wasm_u16x8_extend_low_u8x16 +)w2c_template" +R"w2c_template(#define v128_i32x4_extend_high_i16x8 simde_wasm_i32x4_extend_high_i16x8 +)w2c_template" +R"w2c_template(#define v128_u32x4_extend_high_u16x8 simde_wasm_u32x4_extend_high_u16x8 +)w2c_template" +R"w2c_template(#define v128_i32x4_extend_low_i16x8 simde_wasm_i32x4_extend_low_i16x8 +)w2c_template" +R"w2c_template(#define v128_u32x4_extend_low_u16x8 simde_wasm_u32x4_extend_low_u16x8 +)w2c_template" +R"w2c_template(#define v128_i64x2_extend_high_i32x4 simde_wasm_i64x2_extend_high_i32x4 +)w2c_template" +R"w2c_template(#define v128_u64x2_extend_high_u32x4 simde_wasm_u64x2_extend_high_u32x4 +)w2c_template" +R"w2c_template(#define v128_i64x2_extend_low_i32x4 simde_wasm_i64x2_extend_low_i32x4 +)w2c_template" +R"w2c_template(#define v128_u64x2_extend_low_u32x4 simde_wasm_u64x2_extend_low_u32x4 +)w2c_template" +R"w2c_template(#define v128_i32x4_trunc_sat_f64x2_zero simde_wasm_i32x4_trunc_sat_f64x2_zero +)w2c_template" +R"w2c_template(#define v128_u32x4_trunc_sat_f64x2_zero simde_wasm_u32x4_trunc_sat_f64x2_zero +)w2c_template" +R"w2c_template(#define v128_i16x8_narrow_i32x4 simde_wasm_i16x8_narrow_i32x4 +)w2c_template" +R"w2c_template(#define v128_u16x8_narrow_i32x4 simde_wasm_u16x8_narrow_i32x4 +)w2c_template" +R"w2c_template(#define v128_i8x16_narrow_i16x8 simde_wasm_i8x16_narrow_i16x8 +)w2c_template" +R"w2c_template(#define v128_u8x16_narrow_i16x8 simde_wasm_u8x16_narrow_i16x8 +)w2c_template" +R"w2c_template(#define v128_f64x2_promote_low_f32x4 simde_wasm_f64x2_promote_low_f32x4 +)w2c_template" +R"w2c_template(#define v128_f32x4_demote_f64x2_zero simde_wasm_f32x4_demote_f64x2_zero +)w2c_template" +R"w2c_template(#define v128_f64x2_convert_low_i32x4 simde_wasm_f64x2_convert_low_i32x4 +)w2c_template" +R"w2c_template(#define v128_f64x2_convert_low_u32x4 simde_wasm_f64x2_convert_low_u32x4 +)w2c_template" +R"w2c_template(#endif +)w2c_template" R"w2c_template(// clang-format on )w2c_template" ; diff --git a/src/template/wasm2c_simd.declarations.c b/src/template/wasm2c_simd.declarations.c index cf397856b..d37631add 100644 --- a/src/template/wasm2c_simd.declarations.c +++ b/src/template/wasm2c_simd.declarations.c @@ -34,6 +34,22 @@ } // clang-format off +#if WABT_BIG_ENDIAN +static inline v128 v128_impl_load32_zero(const void* a) { + return simde_wasm_i8x16_swizzle( + simde_wasm_v128_load32_zero(a), + simde_wasm_i8x16_const(12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3)); +} +static inline v128 v128_impl_load64_zero(const void* a) { + return simde_wasm_i8x16_swizzle( + simde_wasm_v128_load64_zero(a), + simde_wasm_i8x16_const(8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7)); +} +#else +#define v128_impl_load32_zero simde_wasm_v128_load32_zero +#define v128_impl_load64_zero simde_wasm_v128_load64_zero +#endif + DEFINE_SIMD_LOAD_FUNC(v128_load, simde_wasm_v128_load, v128) DEFINE_SIMD_LOAD_FUNC(v128_load8_splat, simde_wasm_v128_load8_splat, u8) @@ -48,9 +64,41 @@ DEFINE_SIMD_LOAD_FUNC(u32x4_load16x4, simde_wasm_u32x4_load16x4, u64) DEFINE_SIMD_LOAD_FUNC(i64x2_load32x2, simde_wasm_i64x2_load32x2, u64) DEFINE_SIMD_LOAD_FUNC(u64x2_load32x2, simde_wasm_u64x2_load32x2, u64) -DEFINE_SIMD_LOAD_FUNC(v128_load32_zero, simde_wasm_v128_load32_zero, u32) -DEFINE_SIMD_LOAD_FUNC(v128_load64_zero, simde_wasm_v128_load64_zero, u64) +DEFINE_SIMD_LOAD_FUNC(v128_load32_zero, v128_impl_load32_zero, u32) +DEFINE_SIMD_LOAD_FUNC(v128_load64_zero, v128_impl_load64_zero, u64) +#if WABT_BIG_ENDIAN +DEFINE_SIMD_LOAD_LANE(v128_load8_lane0, simde_wasm_v128_load8_lane, u8, 15) +DEFINE_SIMD_LOAD_LANE(v128_load8_lane1, simde_wasm_v128_load8_lane, u8, 14) +DEFINE_SIMD_LOAD_LANE(v128_load8_lane2, simde_wasm_v128_load8_lane, u8, 13) +DEFINE_SIMD_LOAD_LANE(v128_load8_lane3, simde_wasm_v128_load8_lane, u8, 12) +DEFINE_SIMD_LOAD_LANE(v128_load8_lane4, simde_wasm_v128_load8_lane, u8, 11) +DEFINE_SIMD_LOAD_LANE(v128_load8_lane5, simde_wasm_v128_load8_lane, u8, 10) +DEFINE_SIMD_LOAD_LANE(v128_load8_lane6, simde_wasm_v128_load8_lane, u8, 9) +DEFINE_SIMD_LOAD_LANE(v128_load8_lane7, simde_wasm_v128_load8_lane, u8, 8) +DEFINE_SIMD_LOAD_LANE(v128_load8_lane8, simde_wasm_v128_load8_lane, u8, 7) +DEFINE_SIMD_LOAD_LANE(v128_load8_lane9, simde_wasm_v128_load8_lane, u8, 6) +DEFINE_SIMD_LOAD_LANE(v128_load8_lane10, simde_wasm_v128_load8_lane, u8, 5) +DEFINE_SIMD_LOAD_LANE(v128_load8_lane11, simde_wasm_v128_load8_lane, u8, 4) +DEFINE_SIMD_LOAD_LANE(v128_load8_lane12, simde_wasm_v128_load8_lane, u8, 3) +DEFINE_SIMD_LOAD_LANE(v128_load8_lane13, simde_wasm_v128_load8_lane, u8, 2) +DEFINE_SIMD_LOAD_LANE(v128_load8_lane14, simde_wasm_v128_load8_lane, u8, 1) +DEFINE_SIMD_LOAD_LANE(v128_load8_lane15, simde_wasm_v128_load8_lane, u8, 0) +DEFINE_SIMD_LOAD_LANE(v128_load16_lane0, simde_wasm_v128_load16_lane, u16, 7) +DEFINE_SIMD_LOAD_LANE(v128_load16_lane1, simde_wasm_v128_load16_lane, u16, 6) +DEFINE_SIMD_LOAD_LANE(v128_load16_lane2, simde_wasm_v128_load16_lane, u16, 5) +DEFINE_SIMD_LOAD_LANE(v128_load16_lane3, simde_wasm_v128_load16_lane, u16, 4) +DEFINE_SIMD_LOAD_LANE(v128_load16_lane4, simde_wasm_v128_load16_lane, u16, 3) +DEFINE_SIMD_LOAD_LANE(v128_load16_lane5, simde_wasm_v128_load16_lane, u16, 2) +DEFINE_SIMD_LOAD_LANE(v128_load16_lane6, simde_wasm_v128_load16_lane, u16, 1) +DEFINE_SIMD_LOAD_LANE(v128_load16_lane7, simde_wasm_v128_load16_lane, u16, 0) +DEFINE_SIMD_LOAD_LANE(v128_load32_lane0, simde_wasm_v128_load32_lane, u32, 3) +DEFINE_SIMD_LOAD_LANE(v128_load32_lane1, simde_wasm_v128_load32_lane, u32, 2) +DEFINE_SIMD_LOAD_LANE(v128_load32_lane2, simde_wasm_v128_load32_lane, u32, 1) +DEFINE_SIMD_LOAD_LANE(v128_load32_lane3, simde_wasm_v128_load32_lane, u32, 0) +DEFINE_SIMD_LOAD_LANE(v128_load64_lane0, simde_wasm_v128_load64_lane, u64, 1) +DEFINE_SIMD_LOAD_LANE(v128_load64_lane1, simde_wasm_v128_load64_lane, u64, 0) +#else DEFINE_SIMD_LOAD_LANE(v128_load8_lane0, simde_wasm_v128_load8_lane, u8, 0) DEFINE_SIMD_LOAD_LANE(v128_load8_lane1, simde_wasm_v128_load8_lane, u8, 1) DEFINE_SIMD_LOAD_LANE(v128_load8_lane2, simde_wasm_v128_load8_lane, u8, 2) @@ -81,9 +129,42 @@ DEFINE_SIMD_LOAD_LANE(v128_load32_lane2, simde_wasm_v128_load32_lane, u32, 2) DEFINE_SIMD_LOAD_LANE(v128_load32_lane3, simde_wasm_v128_load32_lane, u32, 3) DEFINE_SIMD_LOAD_LANE(v128_load64_lane0, simde_wasm_v128_load64_lane, u64, 0) DEFINE_SIMD_LOAD_LANE(v128_load64_lane1, simde_wasm_v128_load64_lane, u64, 1) +#endif DEFINE_SIMD_STORE(v128_store, v128) +#if WABT_BIG_ENDIAN +DEFINE_SIMD_STORE_LANE(v128_store8_lane0, simde_wasm_v128_store8_lane, u8, 15) +DEFINE_SIMD_STORE_LANE(v128_store8_lane1, simde_wasm_v128_store8_lane, u8, 14) +DEFINE_SIMD_STORE_LANE(v128_store8_lane2, simde_wasm_v128_store8_lane, u8, 13) +DEFINE_SIMD_STORE_LANE(v128_store8_lane3, simde_wasm_v128_store8_lane, u8, 12) +DEFINE_SIMD_STORE_LANE(v128_store8_lane4, simde_wasm_v128_store8_lane, u8, 11) +DEFINE_SIMD_STORE_LANE(v128_store8_lane5, simde_wasm_v128_store8_lane, u8, 10) +DEFINE_SIMD_STORE_LANE(v128_store8_lane6, simde_wasm_v128_store8_lane, u8, 9) +DEFINE_SIMD_STORE_LANE(v128_store8_lane7, simde_wasm_v128_store8_lane, u8, 8) +DEFINE_SIMD_STORE_LANE(v128_store8_lane8, simde_wasm_v128_store8_lane, u8, 7) +DEFINE_SIMD_STORE_LANE(v128_store8_lane9, simde_wasm_v128_store8_lane, u8, 6) +DEFINE_SIMD_STORE_LANE(v128_store8_lane10, simde_wasm_v128_store8_lane, u8, 5) +DEFINE_SIMD_STORE_LANE(v128_store8_lane11, simde_wasm_v128_store8_lane, u8, 4) +DEFINE_SIMD_STORE_LANE(v128_store8_lane12, simde_wasm_v128_store8_lane, u8, 3) +DEFINE_SIMD_STORE_LANE(v128_store8_lane13, simde_wasm_v128_store8_lane, u8, 2) +DEFINE_SIMD_STORE_LANE(v128_store8_lane14, simde_wasm_v128_store8_lane, u8, 1) +DEFINE_SIMD_STORE_LANE(v128_store8_lane15, simde_wasm_v128_store8_lane, u8, 0) +DEFINE_SIMD_STORE_LANE(v128_store16_lane0, simde_wasm_v128_store16_lane, u16, 7) +DEFINE_SIMD_STORE_LANE(v128_store16_lane1, simde_wasm_v128_store16_lane, u16, 6) +DEFINE_SIMD_STORE_LANE(v128_store16_lane2, simde_wasm_v128_store16_lane, u16, 5) +DEFINE_SIMD_STORE_LANE(v128_store16_lane3, simde_wasm_v128_store16_lane, u16, 4) +DEFINE_SIMD_STORE_LANE(v128_store16_lane4, simde_wasm_v128_store16_lane, u16, 3) +DEFINE_SIMD_STORE_LANE(v128_store16_lane5, simde_wasm_v128_store16_lane, u16, 2) +DEFINE_SIMD_STORE_LANE(v128_store16_lane6, simde_wasm_v128_store16_lane, u16, 1) +DEFINE_SIMD_STORE_LANE(v128_store16_lane7, simde_wasm_v128_store16_lane, u16, 0) +DEFINE_SIMD_STORE_LANE(v128_store32_lane0, simde_wasm_v128_store32_lane, u32, 3) +DEFINE_SIMD_STORE_LANE(v128_store32_lane1, simde_wasm_v128_store32_lane, u32, 2) +DEFINE_SIMD_STORE_LANE(v128_store32_lane2, simde_wasm_v128_store32_lane, u32, 1) +DEFINE_SIMD_STORE_LANE(v128_store32_lane3, simde_wasm_v128_store32_lane, u32, 0) +DEFINE_SIMD_STORE_LANE(v128_store64_lane0, simde_wasm_v128_store64_lane, u64, 1) +DEFINE_SIMD_STORE_LANE(v128_store64_lane1, simde_wasm_v128_store64_lane, u64, 0) +#else DEFINE_SIMD_STORE_LANE(v128_store8_lane0, simde_wasm_v128_store8_lane, u8, 0) DEFINE_SIMD_STORE_LANE(v128_store8_lane1, simde_wasm_v128_store8_lane, u8, 1) DEFINE_SIMD_STORE_LANE(v128_store8_lane2, simde_wasm_v128_store8_lane, u8, 2) @@ -114,4 +195,141 @@ DEFINE_SIMD_STORE_LANE(v128_store32_lane2, simde_wasm_v128_store32_lane, u32, 2) DEFINE_SIMD_STORE_LANE(v128_store32_lane3, simde_wasm_v128_store32_lane, u32, 3) DEFINE_SIMD_STORE_LANE(v128_store64_lane0, simde_wasm_v128_store64_lane, u64, 0) DEFINE_SIMD_STORE_LANE(v128_store64_lane1, simde_wasm_v128_store64_lane, u64, 1) +#endif + +#if WABT_BIG_ENDIAN +#define v128_const(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) simde_wasm_i8x16_const(p,o,n,m,l,k,j,i,h,g,f,e,d,c,b,a) +#define v128_i8x16_extract_lane(v, l) simde_wasm_i8x16_extract_lane(v, 15-(l)) +#define v128_u8x16_extract_lane(v, l) simde_wasm_u8x16_extract_lane(v, 15-(l)) +#define v128_i16x8_extract_lane(v, l) simde_wasm_i16x8_extract_lane(v, 7-(l)) +#define v128_u16x8_extract_lane(v, l) simde_wasm_u16x8_extract_lane(v, 7-(l)) +#define v128_i32x4_extract_lane(v, l) simde_wasm_i32x4_extract_lane(v, 3-(l)) +#define v128_i64x2_extract_lane(v, l) simde_wasm_i64x2_extract_lane(v, 1-(l)) +#define v128_f32x4_extract_lane(v, l) simde_wasm_f32x4_extract_lane(v, 3-(l)) +#define v128_f64x2_extract_lane(v, l) simde_wasm_f64x2_extract_lane(v, 1-(l)) +#define v128_i8x16_replace_lane(v, l, x) simde_wasm_i8x16_replace_lane(v, 15-(l), x) +#define v128_u8x16_replace_lane(v, l, x) simde_wasm_u8x16_replace_lane(v, 15-(l), x) +#define v128_i16x8_replace_lane(v, l, x) simde_wasm_i16x8_replace_lane(v, 7-(l), x) +#define v128_u16x8_replace_lane(v, l, x) simde_wasm_u16x8_replace_lane(v, 7-(l), x) +#define v128_i32x4_replace_lane(v, l, x) simde_wasm_i32x4_replace_lane(v, 3-(l), x) +#define v128_i64x2_replace_lane(v, l, x) simde_wasm_i64x2_replace_lane(v, 1-(l), x) +#define v128_f32x4_replace_lane(v, l, x) simde_wasm_f32x4_replace_lane(v, 3-(l), x) +#define v128_f64x2_replace_lane(v, l, x) simde_wasm_f64x2_replace_lane(v, 1-(l), x) +#define v128_i8x16_bitmask(v) simde_wasm_i8x16_bitmask(simde_wasm_i8x16_swizzle(v, simde_wasm_i8x16_const(15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0))) +#define v128_i16x8_bitmask(v) simde_wasm_i16x8_bitmask(simde_wasm_i8x16_swizzle(v, simde_wasm_i8x16_const(14,15,12,13,10,11,8,9,6,7,4,5,2,3,0,1))) +#define v128_i32x4_bitmask(v) simde_wasm_i32x4_bitmask(simde_wasm_i8x16_swizzle(v, simde_wasm_i8x16_const(12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3))) +#define v128_i64x2_bitmask(v) simde_wasm_i64x2_bitmask(simde_wasm_i8x16_swizzle(v, simde_wasm_i8x16_const(8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7))) +#define v128_i8x16_swizzle(v1, v2) simde_wasm_i8x16_swizzle(v1, simde_wasm_v128_xor(v2, simde_wasm_i8x16_splat(15))) +#define v128_i8x16_shuffle(v1,v2,a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) simde_wasm_i8x16_shuffle(v2,v1,31-(p),31-(o),31-(n),31-(m),31-(l),31-(k),31-(j),31-(i),31-(h),31-(g),31-(f),31-(e),31-(d),31-(c),31-(b),31-(a)) +#define v128_i16x8_extmul_high_i8x16 simde_wasm_i16x8_extmul_low_i8x16 +#define v128_u16x8_extmul_high_u8x16 simde_wasm_u16x8_extmul_low_u8x16 +#define v128_i16x8_extmul_low_i8x16 simde_wasm_i16x8_extmul_high_i8x16 +#define v128_u16x8_extmul_low_u8x16 simde_wasm_u16x8_extmul_high_u8x16 +#define v128_i32x4_extmul_high_i16x8 simde_wasm_i32x4_extmul_low_i16x8 +#define v128_u32x4_extmul_high_u16x8 simde_wasm_u32x4_extmul_low_u16x8 +#define v128_i32x4_extmul_low_i16x8 simde_wasm_i32x4_extmul_high_i16x8 +#define v128_u32x4_extmul_low_u16x8 simde_wasm_u32x4_extmul_high_u16x8 +#define v128_i64x2_extmul_high_i32x4 simde_wasm_i64x2_extmul_low_i32x4 +#define v128_u64x2_extmul_high_u32x4 simde_wasm_u64x2_extmul_low_u32x4 +#define v128_i64x2_extmul_low_i32x4 simde_wasm_i64x2_extmul_high_i32x4 +#define v128_u64x2_extmul_low_u32x4 simde_wasm_u64x2_extmul_high_u32x4 +#define v128_i16x8_extend_high_i8x16 simde_wasm_i16x8_extend_low_i8x16 +#define v128_u16x8_extend_high_u8x16 simde_wasm_u16x8_extend_low_u8x16 +#define v128_i16x8_extend_low_i8x16 simde_wasm_i16x8_extend_high_i8x16 +#define v128_u16x8_extend_low_u8x16 simde_wasm_u16x8_extend_high_u8x16 +#define v128_i32x4_extend_high_i16x8 simde_wasm_i32x4_extend_low_i16x8 +#define v128_u32x4_extend_high_u16x8 simde_wasm_u32x4_extend_low_u16x8 +#define v128_i32x4_extend_low_i16x8 simde_wasm_i32x4_extend_high_i16x8 +#define v128_u32x4_extend_low_u16x8 simde_wasm_u32x4_extend_high_u16x8 +#define v128_i64x2_extend_high_i32x4 simde_wasm_i64x2_extend_low_i32x4 +#define v128_u64x2_extend_high_u32x4 simde_wasm_u64x2_extend_low_u32x4 +#define v128_i64x2_extend_low_i32x4 simde_wasm_i64x2_extend_high_i32x4 +#define v128_u64x2_extend_low_u32x4 simde_wasm_u64x2_extend_high_u32x4 +#define v128_i32x4_trunc_sat_f64x2_zero(a) \ + simde_wasm_i8x16_swizzle( \ + simde_wasm_i32x4_trunc_sat_f64x2_zero(a), \ + simde_wasm_i8x16_const(8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7)) +#define v128_u32x4_trunc_sat_f64x2_zero(a) \ + simde_wasm_i8x16_swizzle( \ + simde_wasm_u32x4_trunc_sat_f64x2_zero(a), \ + simde_wasm_i8x16_const(8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7)) +#define v128_i16x8_narrow_i32x4(a,b) simde_wasm_i16x8_narrow_i32x4(b,a) +#define v128_u16x8_narrow_i32x4(a,b) simde_wasm_u16x8_narrow_i32x4(b,a) +#define v128_i8x16_narrow_i16x8(a,b) simde_wasm_i8x16_narrow_i16x8(b,a) +#define v128_u8x16_narrow_i16x8(a,b) simde_wasm_u8x16_narrow_i16x8(b,a) +#define v128_f64x2_promote_low_f32x4(a) \ + simde_wasm_f64x2_promote_low_f32x4(simde_wasm_i8x16_swizzle( \ + a, \ + simde_wasm_i8x16_const(8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7))) +#define v128_f32x4_demote_f64x2_zero(a) \ + simde_wasm_i8x16_swizzle( \ + simde_wasm_f32x4_demote_f64x2_zero(a), \ + simde_wasm_i8x16_const(8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7)) +#define v128_f64x2_convert_low_i32x4(a) \ + simde_wasm_f64x2_convert_low_i32x4(simde_wasm_i8x16_swizzle( \ + a, \ + simde_wasm_i8x16_const(8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7))) +#define v128_f64x2_convert_low_u32x4(a) \ + simde_wasm_f64x2_convert_low_u32x4(simde_wasm_i8x16_swizzle( \ + a, \ + simde_wasm_i8x16_const(8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7))) +#else +#define v128_const simde_wasm_i8x16_const +#define v128_i8x16_extract_lane simde_wasm_i8x16_extract_lane +#define v128_u8x16_extract_lane simde_wasm_u8x16_extract_lane +#define v128_i16x8_extract_lane simde_wasm_i16x8_extract_lane +#define v128_u16x8_extract_lane simde_wasm_u16x8_extract_lane +#define v128_i32x4_extract_lane simde_wasm_i32x4_extract_lane +#define v128_i64x2_extract_lane simde_wasm_i64x2_extract_lane +#define v128_f32x4_extract_lane simde_wasm_f32x4_extract_lane +#define v128_f64x2_extract_lane simde_wasm_f64x2_extract_lane +#define v128_i8x16_replace_lane simde_wasm_i8x16_replace_lane +#define v128_u8x16_replace_lane simde_wasm_u8x16_replace_lane +#define v128_i16x8_replace_lane simde_wasm_i16x8_replace_lane +#define v128_u16x8_replace_lane simde_wasm_u16x8_replace_lane +#define v128_i32x4_replace_lane simde_wasm_i32x4_replace_lane +#define v128_i64x2_replace_lane simde_wasm_i64x2_replace_lane +#define v128_f32x4_replace_lane simde_wasm_f32x4_replace_lane +#define v128_f64x2_replace_lane simde_wasm_f64x2_replace_lane +#define v128_i8x16_bitmask simde_wasm_i8x16_bitmask +#define v128_i16x8_bitmask simde_wasm_i16x8_bitmask +#define v128_i32x4_bitmask simde_wasm_i32x4_bitmask +#define v128_i64x2_bitmask simde_wasm_i64x2_bitmask +#define v128_i8x16_swizzle simde_wasm_i8x16_swizzle +#define v128_i8x16_shuffle simde_wasm_i8x16_shuffle +#define v128_i16x8_extmul_high_i8x16 simde_wasm_i16x8_extmul_high_i8x16 +#define v128_u16x8_extmul_high_u8x16 simde_wasm_u16x8_extmul_high_u8x16 +#define v128_i16x8_extmul_low_i8x16 simde_wasm_i16x8_extmul_low_i8x16 +#define v128_u16x8_extmul_low_u8x16 simde_wasm_u16x8_extmul_low_u8x16 +#define v128_i32x4_extmul_high_i16x8 simde_wasm_i32x4_extmul_high_i16x8 +#define v128_u32x4_extmul_high_u16x8 simde_wasm_u32x4_extmul_high_u16x8 +#define v128_i32x4_extmul_low_i16x8 simde_wasm_i32x4_extmul_low_i16x8 +#define v128_u32x4_extmul_low_u16x8 simde_wasm_u32x4_extmul_low_u16x8 +#define v128_i64x2_extmul_high_i32x4 simde_wasm_i64x2_extmul_high_i32x4 +#define v128_u64x2_extmul_high_u32x4 simde_wasm_u64x2_extmul_high_u32x4 +#define v128_i64x2_extmul_low_i32x4 simde_wasm_i64x2_extmul_low_i32x4 +#define v128_u64x2_extmul_low_u32x4 simde_wasm_u64x2_extmul_low_u32x4 +#define v128_i16x8_extend_high_i8x16 simde_wasm_i16x8_extend_high_i8x16 +#define v128_u16x8_extend_high_u8x16 simde_wasm_u16x8_extend_high_u8x16 +#define v128_i16x8_extend_low_i8x16 simde_wasm_i16x8_extend_low_i8x16 +#define v128_u16x8_extend_low_u8x16 simde_wasm_u16x8_extend_low_u8x16 +#define v128_i32x4_extend_high_i16x8 simde_wasm_i32x4_extend_high_i16x8 +#define v128_u32x4_extend_high_u16x8 simde_wasm_u32x4_extend_high_u16x8 +#define v128_i32x4_extend_low_i16x8 simde_wasm_i32x4_extend_low_i16x8 +#define v128_u32x4_extend_low_u16x8 simde_wasm_u32x4_extend_low_u16x8 +#define v128_i64x2_extend_high_i32x4 simde_wasm_i64x2_extend_high_i32x4 +#define v128_u64x2_extend_high_u32x4 simde_wasm_u64x2_extend_high_u32x4 +#define v128_i64x2_extend_low_i32x4 simde_wasm_i64x2_extend_low_i32x4 +#define v128_u64x2_extend_low_u32x4 simde_wasm_u64x2_extend_low_u32x4 +#define v128_i32x4_trunc_sat_f64x2_zero simde_wasm_i32x4_trunc_sat_f64x2_zero +#define v128_u32x4_trunc_sat_f64x2_zero simde_wasm_u32x4_trunc_sat_f64x2_zero +#define v128_i16x8_narrow_i32x4 simde_wasm_i16x8_narrow_i32x4 +#define v128_u16x8_narrow_i32x4 simde_wasm_u16x8_narrow_i32x4 +#define v128_i8x16_narrow_i16x8 simde_wasm_i8x16_narrow_i16x8 +#define v128_u8x16_narrow_i16x8 simde_wasm_u8x16_narrow_i16x8 +#define v128_f64x2_promote_low_f32x4 simde_wasm_f64x2_promote_low_f32x4 +#define v128_f32x4_demote_f64x2_zero simde_wasm_f32x4_demote_f64x2_zero +#define v128_f64x2_convert_low_i32x4 simde_wasm_f64x2_convert_low_i32x4 +#define v128_f64x2_convert_low_u32x4 simde_wasm_f64x2_convert_low_u32x4 +#endif // clang-format on diff --git a/src/test-interp.cc b/src/test-interp.cc index 11e4770fc..614eed336 100644 --- a/src/test-interp.cc +++ b/src/test-interp.cc @@ -505,8 +505,13 @@ TEST_F(InterpTest, Rot13) { EXPECT_LT(ptr + size, memory->ByteSize()); +#if WABT_BIG_ENDIAN + std::copy(string_data.rbegin(), string_data.rbegin() + size, + memory->UnsafeData() + memory->ByteSize() - ptr - size); +#else std::copy(string_data.begin(), string_data.begin() + size, memory->UnsafeData() + ptr); +#endif results[0].Set(size); return Result::Ok; @@ -527,8 +532,14 @@ TEST_F(InterpTest, Rot13) { EXPECT_LT(ptr + size, memory->ByteSize()); string_data.resize(size); +#if WABT_BIG_ENDIAN + std::copy(memory->UnsafeData() + memory->ByteSize() - ptr - size, + memory->UnsafeData() + memory->ByteSize() - ptr, + string_data.rbegin()); +#else std::copy(memory->UnsafeData() + ptr, memory->UnsafeData() + ptr + size, string_data.begin()); +#endif return Result::Ok; }; diff --git a/test/harness/wasm2c/simd_formatting.txt b/test/harness/wasm2c/simd_formatting.txt index c92ca050a..8b17ef1a6 100644 --- a/test/harness/wasm2c/simd_formatting.txt +++ b/test/harness/wasm2c/simd_formatting.txt @@ -6,7 +6,7 @@ (assert_return (invoke "x" (v128.const i8x16 0x00 0x01 0x02 0x03 0x04 0x05 0x06 0x07 0x08 0x09 0x0A 0x0B 0x0C 0x0D 0x0E 0x0F)) (v128.const i8x16 0xFF 0xFE 0xFD 0xFC 0xFB 0xFA 0xF9 0xF8 0xF7 0xF6 0xF5 0xF4 0xF3 0xF2 0xF1 0xF0)) (;; STDERR ;;; -simd_formatting.txt:6: assertion failed: in w2c_simd__formatting__0__wasm_x(&simd__formatting__0__wasm_instance, simde_wasm_i8x16_make(0u,1u,2u,3u,4u,5u,6u,7u,8u,9u,10u,11u,12u,13u,14u,15u)): expected <255 254 253 252 251 250 249 248 247 246 245 244 243 242 241 240 >, got <0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 >. +simd_formatting.txt:6: assertion failed: in w2c_simd__formatting__0__wasm_x(&simd__formatting__0__wasm_instance, v128_i8x16_make(0u,1u,2u,3u,4u,5u,6u,7u,8u,9u,10u,11u,12u,13u,14u,15u)): expected <255 254 253 252 251 250 249 248 247 246 245 244 243 242 241 240 >, got <0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 >. ;;; STDERR ;;) (;; STDOUT ;;; 0/1 tests passed. diff --git a/test/run-spec-wasm2c.py b/test/run-spec-wasm2c.py index 6a59471d7..122174e27 100755 --- a/test/run-spec-wasm2c.py +++ b/test/run-spec-wasm2c.py @@ -374,7 +374,7 @@ def _Constant(self, const): return '"(f64 %s)"' % value return F64ToC(int(value)) elif type_ == 'v128': - return 'simde_wasm_' + const['lane_type'] + 'x' + str(len(const['value'])) + '_make(' + ','.join([self._Constant({'type': const['lane_type'], 'value': x}) for x in value]) + ')' + return 'v128_' + const['lane_type'] + 'x' + str(len(const['value'])) + '_make(' + ','.join([self._Constant({'type': const['lane_type'], 'value': x}) for x in value]) + ')' elif type_ == 'externref': if value == 'null': return 'wasm_rt_externref_null_value' diff --git a/test/spec-wasm2c-prefix.c b/test/spec-wasm2c-prefix.c index e107325c8..529e4620a 100644 --- a/test/spec-wasm2c-prefix.c +++ b/test/spec-wasm2c-prefix.c @@ -13,6 +13,42 @@ #include "wasm-rt-impl.h" #include "wasm-rt-exceptions.h" +/* NOTE: function argument evaluation order is implementation-defined in C, + so it SHOULD NOT be relied on by tests. */ +#if WABT_BIG_ENDIAN +#define v128_i8x16_make(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \ + simde_wasm_i8x16_make(p,o,n,m,l,k,j,i,h,g,f,e,d,c,b,a) +#define v128_u8x16_make(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \ + simde_wasm_u8x16_make(p,o,n,m,l,k,j,i,h,g,f,e,d,c,b,a) +#define v128_i16x8_make(a,b,c,d,e,f,g,h) simde_wasm_i16x8_make(h,g,f,e,d,c,b,a) +#define v128_u16x8_make(a,b,c,d,e,f,g,h) simde_wasm_u16x8_make(h,g,f,e,d,c,b,a) +#define v128_i32x4_make(a,b,c,d) simde_wasm_i32x4_make(d,c,b,a) +#define v128_u32x4_make(a,b,c,d) simde_wasm_u32x4_make(d,c,b,a) +#define v128_i64x2_make(a,b) simde_wasm_i64x2_make(b,a) +#define v128_u64x2_make(a,b) simde_wasm_u64x2_make(b,a) +#define v128_f32x4_make(a,b,c,d) simde_wasm_f32x4_make(d,c,b,a) +#define v128_f64x2_make(a,b) simde_wasm_f64x2_make(b,a) +#define v128_i8x16_extract_lane(a,n) simde_wasm_u8x16_extract_lane(a,15-(n)) +#define v128_u8x16_extract_lane(a,n) simde_wasm_u8x16_extract_lane(a,15-(n)) +#define v128_i16x8_extract_lane(a,n) simde_wasm_u16x8_extract_lane(a,7-(n)) +#define v128_u16x8_extract_lane(a,n) simde_wasm_u16x8_extract_lane(a,7-(n)) +#define v128_i32x4_extract_lane(a,n) simde_wasm_u32x4_extract_lane(a,3-(n)) +#define v128_u32x4_extract_lane(a,n) simde_wasm_u32x4_extract_lane(a,3-(n)) +#define v128_i64x2_extract_lane(a,n) simde_wasm_u64x2_extract_lane(a,1-(n)) +#define v128_u64x2_extract_lane(a,n) simde_wasm_u64x2_extract_lane(a,1-(n)) +#define v128_f32x4_extract_lane(a,n) simde_wasm_f32x4_extract_lane(a,3-(n)) +#define v128_f64x2_extract_lane(a,n) simde_wasm_f64x2_extract_lane(a,1-(n)) +#else +#define v128_i8x16_make simde_wasm_i8x16_make +#define v128_u8x16_make simde_wasm_u8x16_make +#define v128_i16x8_make simde_wasm_i16x8_make +#define v128_u16x8_make simde_wasm_u16x8_make +#define v128_i32x4_make simde_wasm_i32x4_make +#define v128_u32x4_make simde_wasm_u32x4_make +#define v128_i64x2_make simde_wasm_i64x2_make +#define v128_u64x2_make simde_wasm_u64x2_make +#define v128_f32x4_make simde_wasm_f32x4_make +#define v128_f64x2_make simde_wasm_f64x2_make // like is_equal_TYPE below, always use unsigned for these #define v128_i8x16_extract_lane simde_wasm_u8x16_extract_lane #define v128_u8x16_extract_lane simde_wasm_u8x16_extract_lane @@ -24,6 +60,7 @@ #define v128_u64x2_extract_lane simde_wasm_u64x2_extract_lane #define v128_f32x4_extract_lane simde_wasm_f32x4_extract_lane #define v128_f64x2_extract_lane simde_wasm_f64x2_extract_lane +#endif static int g_tests_run; static int g_tests_passed; diff --git a/wasm2c/.gitignore b/wasm2c/.gitignore index 77757ac57..70f380917 100644 --- a/wasm2c/.gitignore +++ b/wasm2c/.gitignore @@ -1,4 +1,5 @@ wasm-rt-impl.o +wasm-rt-mem-impl.o wasm-rt-exceptions-impl.o examples/**/*.o examples/fac/fac diff --git a/wasm2c/README.md b/wasm2c/README.md index 9cdbdc275..f928ae408 100644 --- a/wasm2c/README.md +++ b/wasm2c/README.md @@ -109,11 +109,11 @@ int main(int argc, char** argv) { ## Compiling the wasm2c output To compile the executable, we need to use `main.c` and the generated `fac.c`. -We'll also include `wasm-rt-impl.c` which has implementations of the various +We'll also include `wasm-rt-impl.c` and `wasm-rt-mem-impl.c`, which have implementations of the various `wasm_rt_*` functions used by `fac.c` and `fac.h`. ```sh -$ cc -o fac main.c fac.c wasm-rt-impl.c +$ cc -o fac main.c fac.c wasm2c/wasm-rt-impl.c wasm2c/wasm-rt-mem-impl.c -Iwasm2c -lm ``` A note on compiling with optimization: wasm2c relies on certain diff --git a/wasm2c/examples/callback/Makefile b/wasm2c/examples/callback/Makefile index ece54c341..81ee53ef8 100644 --- a/wasm2c/examples/callback/Makefile +++ b/wasm2c/examples/callback/Makefile @@ -3,12 +3,10 @@ CFLAGS=-I../.. all: callback -callback: main.o callback.o ../../wasm-rt-impl.o - clean: rm -rf callback callback.wasm callback.c callback.h *.o -callback: main.o callback.o ../../wasm-rt-impl.o -lm +callback: main.o callback.o ../../wasm-rt-impl.o ../../wasm-rt-mem-impl.o -lm callback.wasm: callback.wat ../../../bin/wat2wasm ../../../bin/wat2wasm --debug-names $< -o $@ diff --git a/wasm2c/examples/rot13/Makefile b/wasm2c/examples/rot13/Makefile index 4461af7c0..1d46b3a9f 100644 --- a/wasm2c/examples/rot13/Makefile +++ b/wasm2c/examples/rot13/Makefile @@ -3,12 +3,10 @@ CFLAGS=-I../.. all: rot13 -rot13: main.o rot13.o ../../wasm-rt-impl.o - clean: rm -rf rot13 rot13.wasm rot13.c *.o -rot13: main.o rot13.o ../../wasm-rt-impl.o -lm +rot13: main.o rot13.o ../../wasm-rt-impl.o ../../wasm-rt-mem-impl.o -lm rot13.wasm: rot13.wat ../../../bin/wat2wasm ../../../bin/wat2wasm $< -o $@ diff --git a/wasm2c/wasm-rt-mem-impl-helper.inc b/wasm2c/wasm-rt-mem-impl-helper.inc index 5cd503c79..2c351296c 100644 --- a/wasm2c/wasm-rt-mem-impl-helper.inc +++ b/wasm2c/wasm-rt-mem-impl-helper.inc @@ -122,8 +122,8 @@ static uint64_t MEMORY_API_NAME(grow_memory_impl)(MEMORY_TYPE* memory, #endif #endif #if WABT_BIG_ENDIAN - memmove(new_data + new_size - old_size, new_data, old_size); - memset(new_data, 0, delta_size); + memmove((void*)(new_data + new_size - old_size), (void*)new_data, old_size); + memset((void*)new_data, 0, delta_size); #endif memory->pages = new_pages; memory->size = new_size; diff --git a/wasm2c/wasm-rt.h b/wasm2c/wasm-rt.h index c8d7bd01e..f06748ebf 100644 --- a/wasm2c/wasm-rt.h +++ b/wasm2c/wasm-rt.h @@ -58,19 +58,19 @@ extern "C" { #endif /** - * Apple and Windows devices don't implement the C11 threads.h. We use pthreads - * on Apple devices, and CriticalSection APIs for Windows. + * Many devices don't implement the C11 threads.h. We use CriticalSection APIs + * for Windows and pthreads on other platforms where threads are not available. */ #ifdef WASM_RT_C11_AVAILABLE -#ifdef __APPLE__ -#include -#define WASM_RT_MUTEX pthread_mutex_t -#define WASM_RT_USE_PTHREADS 1 -#elif defined(_WIN32) +#if defined(_WIN32) #include #define WASM_RT_MUTEX CRITICAL_SECTION #define WASM_RT_USE_CRITICALSECTION 1 +#elif defined(__APPLE__) || defined(__STDC_NO_THREADS__) +#include +#define WASM_RT_MUTEX pthread_mutex_t +#define WASM_RT_USE_PTHREADS 1 #else #include #define WASM_RT_MUTEX mtx_t @@ -135,11 +135,13 @@ extern "C" { * * This defaults to GUARD_PAGES as this is the fasest option, iff the * requirements of GUARD_PAGES --- 64-bit platforms, MMAP allocation strategy, - * no 64-bit memories --- are met. This falls back to BOUNDS otherwise. + * no 64-bit memories, no big-endian --- are met. This falls back to BOUNDS + * otherwise. */ /** Check if Guard checks are supported */ -#if UINTPTR_MAX > 0xffffffff && WASM_RT_USE_MMAP && !SUPPORT_MEMORY64 +#if UINTPTR_MAX > 0xffffffff && WASM_RT_USE_MMAP && !SUPPORT_MEMORY64 && \ + !WABT_BIG_ENDIAN #define WASM_RT_GUARD_PAGES_SUPPORTED 1 #else #define WASM_RT_GUARD_PAGES_SUPPORTED 0