diff --git a/.ci/check-format.sh b/.ci/check-format.sh index a6fdc1c9..de8aa178 100755 --- a/.ci/check-format.sh +++ b/.ci/check-format.sh @@ -6,7 +6,7 @@ set -x for file in ${SOURCES}; do - clang-format-12 ${file} > expected-format + clang-format-18 ${file} > expected-format diff -u -p --label="${file}" --label="expected coding style" ${file} expected-format done -exit $(clang-format-12 --output-replacements-xml ${SOURCES} | egrep -c "") +exit $(clang-format-18 --output-replacements-xml ${SOURCES} | egrep -c "") diff --git a/.ci/common.sh b/.ci/common.sh index c0396062..547c4293 100644 --- a/.ci/common.sh +++ b/.ci/common.sh @@ -1,4 +1,4 @@ -GCC_REL=11.2-2022.02 +GCC_REL=14.2.rel1 ARM_MIRROR=https://github.com/DLTcollab/toolchain-arm/raw/main SOURCES=$(find $(git rev-parse --show-toplevel) | egrep "\.(cpp|h)\$" | egrep -v "gcc-arm-${GCC_REL}-x86_64-aarch64-none-linux-gnu|gcc-arm-${GCC_REL}-x86_64-arm-none-linux-gnueabihf") diff --git a/.ci/cross-check.sh b/.ci/cross-check.sh index 5efb9fef..c0f9fb84 100755 --- a/.ci/cross-check.sh +++ b/.ci/cross-check.sh @@ -14,9 +14,9 @@ fi set -x make clean -export PATH=gcc-arm-${GCC_REL}-x86_64-aarch64-none-linux-gnu/bin:$PATH +export PATH=arm-gnu-toolchain-${GCC_REL}-x86_64-aarch64-none-linux-gnu/bin:$PATH make CROSS_COMPILE=aarch64-none-linux-gnu- check || exit 1 # ARMv8-A make clean -export PATH=gcc-arm-${GCC_REL}-x86_64-arm-none-linux-gnueabihf/bin:$PATH +export PATH=arm-gnu-toolchain-${GCC_REL}-x86_64-arm-none-linux-gnueabihf/bin:$PATH make CROSS_COMPILE=arm-none-linux-gnueabihf- check || exit 1 # ARMv7-A diff --git a/.ci/cross-tool.sh b/.ci/cross-tool.sh index a18e448b..c2013cb4 100755 --- a/.ci/cross-tool.sh +++ b/.ci/cross-tool.sh @@ -18,9 +18,9 @@ set -x sudo apt-get install -y curl xz-utils curl -L \ - ${ARM_MIRROR}/gcc-arm-${GCC_REL}-x86_64-arm-none-linux-gnueabihf.tar.xz \ + ${ARM_MIRROR}/arm-gnu-toolchain-${GCC_REL}-x86_64-arm-none-linux-gnueabihf.tar.xz \ | tar -Jx || exit 1 curl -L \ - ${ARM_MIRROR}/gcc-arm-${GCC_REL}-x86_64-aarch64-none-linux-gnu.tar.xz \ + ${ARM_MIRROR}/arm-gnu-toolchain-${GCC_REL}-x86_64-aarch64-none-linux-gnu.tar.xz \ | tar -Jx || exit 1 diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 5d43f3e1..10c1590b 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -4,11 +4,11 @@ on: [push, pull_request] jobs: host-x86: - runs-on: ubuntu-20.04 + runs-on: ubuntu-24.04 strategy: matrix: arch: [x86_64] - cxx_compiler: [g++-10, clang++-11] + cxx_compiler: [g++, clang++] steps: - name: checkout code uses: actions/checkout@v4 @@ -52,7 +52,7 @@ jobs: run: mingw32-make check host-arm: - runs-on: ubuntu-20.04 + runs-on: ubuntu-24.04 strategy: matrix: arch_with_features: [ @@ -61,7 +61,7 @@ jobs: {arch: aarch64, feature: crypto+crc, arch_cflags: none}, {arch: armv7, feature: none, arch_cflags: '-mcpu=cortex-a32 -mfpu=neon-fp-armv8'} ] - cxx_compiler: [g++-10, clang++-11] + cxx_compiler: [g++] steps: - name: checkout code uses: actions/checkout@v4 @@ -71,14 +71,15 @@ jobs: uses: uraimo/run-on-arch-action@v2 with: arch: ${{ matrix.arch_with_features.arch }} - distro: ubuntu20.04 + distro: ubuntu22.04 + # Speed up builds by storing container images in a GitHub package registry. + githubToken: ${{ github.token }} env: | CXX: ${{ matrix.cxx_compiler }} ARCH_CFLAGS: ${{ matrix.arch_with_features.arch_cflags }} install: | apt-get update -q -y apt-get install -q -y "${{ matrix.cxx_compiler }}" make - apt-get install -q -y gcc run: | make FEATURE=${{ matrix.arch_with_features.feature }} check @@ -101,7 +102,7 @@ jobs: path: ARM64 test-win-msvc: - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 container: linaro/wine-arm64 needs: host-win-msvc steps: @@ -115,12 +116,12 @@ jobs: coding-style: - runs-on: ubuntu-20.04 + runs-on: ubuntu-24.04 steps: - name: checkout code uses: actions/checkout@v4 - name: style check run: | - sudo apt-get install -q -y clang-format-12 + sudo apt-get install -q -y clang-format-18 sh .ci/check-format.sh shell: bash diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 4767ed7a..60afe860 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -26,7 +26,7 @@ We welcome all contributions from corporate, acaddemic and individual developers * All code must adhere to the existing C coding style (see below). While we are somewhat flexible in basic style, you will adhere to what is currently in place. Uncommented, complicated algorithmic constructs will be rejected. * All external pull requests must contain sufficient documentation in the pull request comments in order to be accepted. -Software requirement: [clang-format](https://clang.llvm.org/docs/ClangFormat.html) version 12 or later. +Software requirement: [clang-format](https://clang.llvm.org/docs/ClangFormat.html) version 18 or later. Use the command `$ clang-format -i *.[ch]` to enforce a consistent coding style. diff --git a/Makefile b/Makefile index 999a3a7b..9cbd3b99 100644 --- a/Makefile +++ b/Makefile @@ -83,8 +83,8 @@ endif indent: @echo "Formatting files with clang-format.." - @if ! hash clang-format-12; then echo "clang-format-12 is required to indent"; fi - clang-format-12 -i sse2neon.h tests/*.cpp tests/*.h + @if ! hash clang-format-18; then echo "clang-format-18 is required to indent"; fi + clang-format-18 -i sse2neon.h tests/*.cpp tests/*.h .PHONY: clean check format clean: diff --git a/sse2neon.h b/sse2neon.h index 80d2fea5..a83c12ce 100644 --- a/sse2neon.h +++ b/sse2neon.h @@ -1816,7 +1816,7 @@ FORCE_INLINE void _sse2neon_set_fpcr(uint64_t value) #if defined(_MSC_VER) && !defined(__clang__) _WriteStatusReg(ARM64_FPCR, value); #else - __asm__ __volatile__("msr FPCR, %0" ::"r"(value)); /* write */ + __asm__ __volatile__("msr FPCR, %0" ::"r"(value)); /* write */ #endif } @@ -2431,7 +2431,7 @@ FORCE_INLINE void _sse2neon_mm_set_flush_zero_mode(unsigned int flag) #if defined(__aarch64__) || defined(_M_ARM64) _sse2neon_set_fpcr(r.value); #else - __asm__ __volatile__("vmsr FPSCR, %0" ::"r"(r)); /* write */ + __asm__ __volatile__("vmsr FPSCR, %0" ::"r"(r)); /* write */ #endif } @@ -4977,11 +4977,11 @@ FORCE_INLINE __m128i _mm_set_epi8(signed char b15, signed char b1, signed char b0) { - int8_t ALIGN_STRUCT(16) - data[16] = {(int8_t) b0, (int8_t) b1, (int8_t) b2, (int8_t) b3, - (int8_t) b4, (int8_t) b5, (int8_t) b6, (int8_t) b7, - (int8_t) b8, (int8_t) b9, (int8_t) b10, (int8_t) b11, - (int8_t) b12, (int8_t) b13, (int8_t) b14, (int8_t) b15}; + int8_t ALIGN_STRUCT(16) data[16] = { + (int8_t) b0, (int8_t) b1, (int8_t) b2, (int8_t) b3, + (int8_t) b4, (int8_t) b5, (int8_t) b6, (int8_t) b7, + (int8_t) b8, (int8_t) b9, (int8_t) b10, (int8_t) b11, + (int8_t) b12, (int8_t) b13, (int8_t) b14, (int8_t) b15}; return (__m128i) vld1q_s8(data); } @@ -5112,11 +5112,11 @@ FORCE_INLINE __m128i _mm_setr_epi8(signed char b0, signed char b14, signed char b15) { - int8_t ALIGN_STRUCT(16) - data[16] = {(int8_t) b0, (int8_t) b1, (int8_t) b2, (int8_t) b3, - (int8_t) b4, (int8_t) b5, (int8_t) b6, (int8_t) b7, - (int8_t) b8, (int8_t) b9, (int8_t) b10, (int8_t) b11, - (int8_t) b12, (int8_t) b13, (int8_t) b14, (int8_t) b15}; + int8_t ALIGN_STRUCT(16) data[16] = { + (int8_t) b0, (int8_t) b1, (int8_t) b2, (int8_t) b3, + (int8_t) b4, (int8_t) b5, (int8_t) b6, (int8_t) b7, + (int8_t) b8, (int8_t) b9, (int8_t) b10, (int8_t) b11, + (int8_t) b12, (int8_t) b13, (int8_t) b14, (int8_t) b15}; return (__m128i) vld1q_s8(data); } @@ -6269,7 +6269,7 @@ FORCE_INLINE __m64 _mm_abs_pi8(__m64 a) uint8x8_t tmp_low; \ uint8x8_t tmp_high; \ if ((imm) >= 8) { \ - const int idx = (imm) -8; \ + const int idx = (imm) - 8; \ tmp_low = vreinterpret_u8_m64(_a); \ tmp_high = vdup_n_u8(0); \ ret = vreinterpret_m64_u8(vext_u8(tmp_low, tmp_high, idx)); \ @@ -6790,14 +6790,14 @@ FORCE_INLINE __m64 _mm_sign_pi8(__m64 _a, __m64 _b) _sse2neon_define2( \ __m128i, a, b, \ const uint16_t _mask[8] = \ - _sse2neon_init(((imm) & (1 << 0)) ? (uint16_t) -1 : 0x0, \ - ((imm) & (1 << 1)) ? (uint16_t) -1 : 0x0, \ - ((imm) & (1 << 2)) ? (uint16_t) -1 : 0x0, \ - ((imm) & (1 << 3)) ? (uint16_t) -1 : 0x0, \ - ((imm) & (1 << 4)) ? (uint16_t) -1 : 0x0, \ - ((imm) & (1 << 5)) ? (uint16_t) -1 : 0x0, \ - ((imm) & (1 << 6)) ? (uint16_t) -1 : 0x0, \ - ((imm) & (1 << 7)) ? (uint16_t) -1 : 0x0); \ + _sse2neon_init(((imm) & (1 << 0)) ? (uint16_t) - 1 : 0x0, \ + ((imm) & (1 << 1)) ? (uint16_t) - 1 : 0x0, \ + ((imm) & (1 << 2)) ? (uint16_t) - 1 : 0x0, \ + ((imm) & (1 << 3)) ? (uint16_t) - 1 : 0x0, \ + ((imm) & (1 << 4)) ? (uint16_t) - 1 : 0x0, \ + ((imm) & (1 << 5)) ? (uint16_t) - 1 : 0x0, \ + ((imm) & (1 << 6)) ? (uint16_t) - 1 : 0x0, \ + ((imm) & (1 << 7)) ? (uint16_t) - 1 : 0x0); \ uint16x8_t _mask_vec = vld1q_u16(_mask); \ uint16x8_t __a = vreinterpretq_u16_m128i(_a); \ uint16x8_t __b = vreinterpretq_u16_m128i(_b); _sse2neon_return( \ @@ -6822,11 +6822,11 @@ FORCE_INLINE __m64 _mm_sign_pi8(__m64 _a, __m64 _b) // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_blend_ps FORCE_INLINE __m128 _mm_blend_ps(__m128 _a, __m128 _b, const char imm8) { - const uint32_t ALIGN_STRUCT(16) - data[4] = {((imm8) & (1 << 0)) ? UINT32_MAX : 0, - ((imm8) & (1 << 1)) ? UINT32_MAX : 0, - ((imm8) & (1 << 2)) ? UINT32_MAX : 0, - ((imm8) & (1 << 3)) ? UINT32_MAX : 0}; + const uint32_t + ALIGN_STRUCT(16) data[4] = {((imm8) & (1 << 0)) ? UINT32_MAX : 0, + ((imm8) & (1 << 1)) ? UINT32_MAX : 0, + ((imm8) & (1 << 2)) ? UINT32_MAX : 0, + ((imm8) & (1 << 3)) ? UINT32_MAX : 0}; uint32x4_t mask = vld1q_u32(data); float32x4_t a = vreinterpretq_f32_m128(_a); float32x4_t b = vreinterpretq_f32_m128(_b); @@ -9351,7 +9351,7 @@ FORCE_INLINE void _sse2neon_mm_set_denormals_zero_mode(unsigned int flag) #if defined(__aarch64__) || defined(_M_ARM64) _sse2neon_set_fpcr(r.value); #else - __asm__ __volatile__("vmsr FPSCR, %0" ::"r"(r)); /* write */ + __asm__ __volatile__("vmsr FPSCR, %0" ::"r"(r)); /* write */ #endif } diff --git a/tests/impl.cpp b/tests/impl.cpp index a3887606..93d53b3c 100644 --- a/tests/impl.cpp +++ b/tests/impl.cpp @@ -5878,7 +5878,7 @@ result_t test_mm_shuffle_epi32(const SSE2NEONTestImpl &impl, uint32_t iter) int32_t _d[4]; #define TEST_IMPL(IDX) \ - _d[0] = _a[((IDX) &0x3)]; \ + _d[0] = _a[((IDX) & 0x3)]; \ _d[1] = _a[((IDX >> 2) & 0x3)]; \ _d[2] = _a[((IDX >> 4) & 0x3)]; \ _d[3] = _a[((IDX >> 6) & 0x3)]; \