Skip to content

Commit

Permalink
Updated MSys/ucrt64 workflow to support recent versions of the libraries
Browse files Browse the repository at this point in the history
  • Loading branch information
maxirmx committed Mar 9, 2024
1 parent e81faee commit 44c1b2d
Show file tree
Hide file tree
Showing 13 changed files with 262 additions and 41 deletions.
131 changes: 131 additions & 0 deletions .github/workflows/windows-msys.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
# Copyright (c) 2022, 2024 [Ribose Inc](https://www.ribose.com).
# All rights reserved.
# This file is a part of tamatebako
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS
# BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#
# The purpose of this workflow is to check that build procedures work correctly
# in specific environment. Due to this reason there is no caching. It is done by
# intention. All caching is in upstream projects.
#
# Also this file contains comments that will be useful is dwarfs has to be build
# in different environment, not GHA.
name: Windows-MSys

on:
push:
branches: [ '*' ]
paths-ignore:
- 'docs/**'
- '**.adoc'
- '**.md'
- '.github/workflows/*.yml'
- '!.github/workflows/windows-msys.yml'
pull_request:
paths-ignore:
- 'docs/**'
- '**.adoc'
- '**.md'
- '.github/workflows/*.yml'
- '!.github/workflows/windows-msys.yml'
workflow_dispatch:

concurrency:
group: '${{ github.workflow }}-${{ github.job }}-${{ github.head_ref || github.ref_name }}'
cancel-in-progress: true

env:
BUILD_TYPE: Release

jobs:
build:
name: windows-msys [${{ matrix.env.CC }}, ${{ matrix.setup.scope }}]
runs-on: windows-latest
strategy:
fail-fast: false
matrix:
# We are running two configurations:
# - ALL in order to run regression tests
# - MKD just to be sure that configuration needed for teabako builds without issues
setup:
- scope: 'ALL'
tests: 'ON'
- scope: 'MKD'
tests: 'OFF'
env:
- sys: ucrt64
CC: gcc
CXX: g++
env: ${{ matrix.env }}

defaults:
run:
shell: msys2 {0}

steps:
- name: Setup MSys
uses: msys2/setup-msys2@v2
with:
msystem: ${{matrix.env.sys}}
path-type: minimal
update: true
install: >-
git
tar
bison
flex
pacboy: >-
toolchain:p
openssl:p
cmake:p
boost:p
make:p
diffutils:p
libevent:p
double-conversion:p
glog:p
fmt:p
dlfcn:p
ninja:p
- name: Checkout
uses: actions/checkout@v4
with:
submodules: true

- name: Get number of CPU cores
run: echo "CORES=$(nproc --all)" >> $GITHUB_ENV

- name: Configure
run: |
cmake -B build -DWITH_TESTS=${{ matrix.setup.tests }} -DUSE_JEMALLOC=OFF -DNIXPKGS_DWARFS_VERSION_OVERRIDE=tebako \
-DFOLLY_NO_EXCEPTION_TRACER=ON -DTEBAKO_BUILD_SCOPE=${{ matrix.setup.scope }} -DCMAKE_INSTALL_PREFIX=install -GNinja .
- name: Build
run: |
cmake --build build --parallel $CORES
# - name: Test
# if: ${{ matrix.setup.tests == 'ON' }}
# run: |
# cd build
# ninja test
83 changes: 67 additions & 16 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -158,8 +158,15 @@ include(${TOOLS}/cmake-scripts/def-external-project.cmake)
include_directories(${DEPS_INCLUDE_DIR})
link_directories(${DEPS_LIB_DIR})

set(IS_MSYS OFF)
if(MSVC)
set(OSTYPE_TXT "Windows")
message(STATUS "OSTYPE (forced by MSVC): '${OSTYPE_TXT}'")
elseif(MINGW)
set(OSTYPE_TXT "msys")
message(STATUS "OSTYPE (forced by MINGW): '${OSTYPE_TXT}'")
set(IS_MSYS ON)
include(${TOOLS}/cmake-scripts/setup-libarchive.cmake)
else(MSVC)
include(${TOOLS}/cmake-scripts/setup-libarchive.cmake)

Expand All @@ -181,24 +188,45 @@ else(MSVC)
endif(MSVC)

set(PATCH_FOLLY OFF)
set(PATCH_FBTHRIFT OFF)

if("${OSTYPE_TXT}" MATCHES "^linux-musl.*")
message(STATUS "Adding __musl__ compile definition")
add_compile_definitions(__musl__)
include(${TOOLS}/cmake-scripts/setup-libutfcpp.cmake)
add_dependencies(_LIBARCHIVE ${LIBUTFCPP_PRJ})
set(PATCH_FOLLY ON)
if (NOT NO_PATCH)
set(PATCH_FOLLY ON)
endif(NOT NO_PATCH)
endif()

if ("${OSTYPE_TXT}" MATCHES "^darwin*")
include(${TOOLS}/cmake-scripts/macos-environment.cmake)
include(${TOOLS}/cmake-scripts/setup-libutfcpp.cmake)
add_dependencies(_LIBARCHIVE ${LIBUTFCPP_PRJ})
set(PATCH_FOLLY ON)
if (NOT NO_PATCH)
set(PATCH_FOLLY ON)
endif(NOT NO_PATCH)
endif()

if ("${OSTYPE_TXT}" MATCHES "msys")
include(${TOOLS}/cmake-scripts/msys-environment.cmake)
include(${TOOLS}/cmake-scripts/setup-libutfcpp.cmake)
add_dependencies(_LIBARCHIVE ${LIBUTFCPP_PRJ})
find_library(WINSOCK ws2_32 REQUIRED)
message(STATUS "Using winsock2 at ${WINSOCK}")
# This is because of some new features of glog 0.7.0
add_definitions(-DGLOG_USE_GLOG_EXPORT)

if (NOT NO_PATCH)
set(PATCH_FOLLY ON)
set(PATCH_FBTHRIFT ON)
endif(NOT NO_PATCH)
endif()

if (${PATCH_FOLLY})
set(FOLLY_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/folly")
message(STATUS "Patching folly with '${GNU_BASH} ${CMAKE_CURRENT_SOURCE_DIR}/tools/ci-scripts/patch-folly.sh ${FOLLY_ROOT}'")
execute_process(
COMMAND "${GNU_BASH}"
"${CMAKE_CURRENT_SOURCE_DIR}/tools/ci-scripts/patch-folly.sh"
Expand All @@ -216,6 +244,26 @@ if (${PATCH_FOLLY})
endif(PATCH_FOLLY_RES EQUAL 0)
endif(${PATCH_FOLLY})

if (${PATCH_FBTHRIFT})
set(FBTHRIFT_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/fbthrift")
message(STATUS "Patching fbthrift with '${GNU_BASH} ${CMAKE_CURRENT_SOURCE_DIR}/tools/ci-scripts/patch-fbthrift.sh ${FBTHRIFT_ROOT}'")
execute_process(
COMMAND "${GNU_BASH}"
"${CMAKE_CURRENT_SOURCE_DIR}/tools/ci-scripts/patch-fbthrift.sh"
"${FBTHRIFT_ROOT}"
RESULT_VARIABLE PATCH_FBTHRIFT_RES
OUTPUT_VARIABLE PATCH_FBTHRIFT_TXT
OUTPUT_STRIP_TRAILING_WHITESPACE
ERROR_STRIP_TRAILING_WHITESPACE
)

if(PATCH_FBTHRIFT_RES EQUAL 0)
message(STATUS "Patched fbthrift at '${FBTHRIFT_ROOT}'")
else(PATCH_FBTHRIFT_RES EQUAL 0)
message(FATAL_ERROR "Failed to patch fbthrift at '${FBTHRIFT_ROOT}': ${PATCH_FBTHRIFT_TXT}")
endif(PATCH_FBTHRIFT_RES EQUAL 0)
endif(${PATCH_FBTHRIFT})

check_cxx_source_compiles(
"#include <filesystem>
int main() {
Expand Down Expand Up @@ -948,54 +996,57 @@ add_library(

target_include_directories(dwarfs PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/fsst)

target_link_libraries(
list(APPEND LIBRARIES
dwarfs
dwarfs_tool
metadata_thrift
thrift_light
folly
${WINSOCK}
fsst
${Boost_LIBRARIES})
${Boost_LIBRARIES}
)

if(LIBLZ4_FOUND)
target_link_libraries(dwarfs PkgConfig::LIBLZ4)
list(APPEND LIBRARIES PkgConfig::LIBLZ4)
endif()

if(LIBLZMA_FOUND)
target_link_libraries(dwarfs PkgConfig::LIBLZMA)
list(APPEND LIBRARIES PkgConfig::LIBLZMA)
endif()

if(LIBBROTLIDEC_FOUND AND LIBBROTLIENC_FOUND AND LIBBROTLICOMMON_FOUND)
target_link_libraries(dwarfs PkgConfig::LIBBROTLIDEC PkgConfig::LIBBROTLIENC PkgConfig::LIBBROTLICOMMON)
list(APPEND LIBRARIES PkgConfig::LIBBROTLIDEC PkgConfig::LIBBROTLIENC PkgConfig::LIBBROTLICOMMON)
endif()

if(NOT STATIC_BUILD_DO_NOT_USE)
if(MSVC)
target_link_libraries(dwarfs PkgConfig::LIBARCHIVE)
list(APPEND LIBRARIES PkgConfig::LIBARCHIVE)
else(MSVC)
target_link_libraries(dwarfs _LIBARCHIVE)
list(APPEND LIBRARIES _LIBARCHIVE)
endif(MSVC)
endif(NOT STATIC_BUILD_DO_NOT_USE)

if(ZSTD_FOUND AND PREFER_SYSTEM_ZSTD)
target_link_libraries(dwarfs PkgConfig::ZSTD)
list(APPEND LIBRARIES PkgConfig::ZSTD)
else()
target_link_libraries(dwarfs libzstd_static)
list(APPEND LIBRARIES libzstd_static)
endif()

if(XXHASH_FOUND
AND XXHASH_VERSION_OK
AND PREFER_SYSTEM_XXHASH)
target_link_libraries(dwarfs PkgConfig::XXHASH)
list(APPEND LIBRARIES PkgConfig::XXHASH)
else()
target_link_libraries(dwarfs xxhash)
list(APPEND LIBRARIES xxhash)
endif()

foreach(tgt ${BINARY_TARGETS} ${MAIN_TARGETS})
target_link_libraries(${tgt} "$<LINK_LIBRARY:WHOLE_ARCHIVE,dwarfs_compression>")
if(MSVC)
target_link_libraries(${tgt} dwarfs dwarfs_tool PkgConfig::LIBARCHIVE)
target_link_libraries(${tgt} ${LIBRARIES} PkgConfig::LIBARCHIVE)
else(MSVC)
target_link_libraries(${tgt} dwarfs dwarfs_tool _LIBARCHIVE)
target_link_libraries(${tgt} ${LIBRARIES} _LIBARCHIVE)
endif(MSVC)
if(USE_JEMALLOC)
target_link_libraries(${tgt} PkgConfig::JEMALLOC)
Expand Down Expand Up @@ -1150,7 +1201,7 @@ if(STATIC_BUILD_DO_NOT_USE AND NOT WIN32)
add_custom_target(strip COMMAND strip -s ${FILES_TO_STRIP})
endif()

if(STATIC_BUILD_DO_NOT_USE OR (WIN32 AND "${TEBAKO_BUILD_SCOPE}" STREQUAL "ALL"))
if(STATIC_BUILD_DO_NOT_USE OR (WIN32 AND NOT IS_MSYS AND "${TEBAKO_BUILD_SCOPE}" STREQUAL "ALL"))
find_program(UPX_EXE upx upx.exe PATHS "c:/bin" DOC "ultimate packer for executables" REQUIRED)

set(UNIVERSAL_PACKED
Expand Down
4 changes: 4 additions & 0 deletions cmake/version.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,10 @@ if(((NOT "${REAL_SOURCE_DIR}" STREQUAL "${GIT_TOPLEVEL}")

set(DWARFS_GIT_BUILD OFF)
else()
# message(STATUS "VERSION_SRC_FILE: ${VERSION_SRC_FILE}")
# message(STATUS "VERSION_HDR_FILE: ${VERSION_HDR_FILE}")
# message(STATUS "PKG_VERSION_FILE: ${PKG_VERSION_FILE}")

if(EXISTS ${VERSION_SRC_FILE} OR EXISTS ${VERSION_HDR_FILE} OR EXISTS ${PKG_VERSION_FILE})
message(FATAL_ERROR "version files must not exist in git repository")
endif()
Expand Down
32 changes: 16 additions & 16 deletions fsst/fsst_avx512.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
#if defined(__x86_64__) || defined(_M_X64)
#include <immintrin.h>

#ifdef _WIN32
#ifdef _MSC_VER
bool fsst_hasAVX512() {
int info[4];
__cpuidex(info, 0x00000007, 0);
Expand All @@ -43,11 +43,11 @@ bool fsst_hasAVX512() { return false; }
// In one call of this function, we can compress 512 strings, each of maximum length 511 bytes.
// strings can be shorter than 511 bytes, no problem, but if they are longer we need to cut them up.
//
// In each iteration of the while loop, we find one code in each of the unroll*8 strings, i.e. (8,16,24 or 32) for resp. unroll=1,2,3,4
// In each iteration of the while loop, we find one code in each of the unroll*8 strings, i.e. (8,16,24 or 32) for resp. unroll=1,2,3,4
// unroll3 performs best on my hardware
//
// In the worst case, each final encoded string occupies 512KB bytes (512*1024; with 1024=512xexception, exception = 2 bytes).
// - hence codeBase is a buffer of 512KB (needs 19 bits jobs), symbolBase of 256KB (needs 18 bits jobs).
// - hence codeBase is a buffer of 512KB (needs 19 bits jobs), symbolBase of 256KB (needs 18 bits jobs).
//
// 'jobX' controls the encoding of each string and is therefore a u64 with format [out:19][pos:9][end:18][cur:18] (low-to-high bits)
// The field 'pos' tells which string we are processing (0..511). We need this info as strings will complete compressing out-of-order.
Expand All @@ -60,14 +60,14 @@ bool fsst_hasAVX512() { return false; }
//
// Apart from the coded strings, we return in a output[] array of size 'processed' the job values of the 'finished' strings.
// In the following 'unfinished' slots (processed=finished+unfinished) we output the 'job' values of the unfinished strings.
//
//
// For the finished strings, we need [out:19] to see the compressed size and [pos:9] to see which string we refer to.
// For the unfinished strings, we need all fields of 'job' to continue the compression with scalar code (see SIMD code in compressBatch).
//
// THIS IS A SEPARATE CODE FILE NOT BECAUSE OF MY LOVE FOR MODULARIZED CODE BUT BECAUSE IT ALLOWS TO COMPILE IT WITH DIFFERENT FLAGS
// in particular, unrolling is crucial for gather/scatter performance, but requires registers. the #define all_* expressions however,
// will be detected to be constants by g++ -O2 and will be precomputed and placed into AVX512 registers - spoiling 9 of them.
// This reduces the effectiveness of unrolling, hence -O2 makes the loop perform worse than -O1 which skips this optimization.
// in particular, unrolling is crucial for gather/scatter performance, but requires registers. the #define all_* expressions however,
// will be detected to be constants by g++ -O2 and will be precomputed and placed into AVX512 registers - spoiling 9 of them.
// This reduces the effectiveness of unrolling, hence -O2 makes the loop perform worse than -O1 which skips this optimization.
// Assembly inspection confirmed that 3-way unroll with -O1 avoids needless load/stores.

size_t fsst_compressAVX512(SymbolTable &symbolTable, u8* codeBase, u8* symbolBase, SIMDjob *input, SIMDjob *output, size_t n, size_t unroll) {
Expand All @@ -88,7 +88,7 @@ size_t fsst_compressAVX512(SymbolTable &symbolTable, u8* codeBase, u8* symbolBas
#define all_FF _mm512_srli_epi64(all_MASK, 56)

SIMDjob *inputEnd = input+n;
assert(n >= unroll*8 && n <= 512); // should be close to 512
assert(n >= unroll*8 && n <= 512); // should be close to 512
__m512i job1, job2, job3, job4; // will contain current jobs, for each unroll 1,2,3,4
__mmask8 loadmask1 = 255, loadmask2 = 255*(unroll>1), loadmask3 = 255*(unroll>2), loadmask4 = 255*(unroll>3); // 2b loaded new strings bitmask per unroll
u32 delta1 = 8, delta2 = 8*(unroll>1), delta3 = 8*(unroll>2), delta4 = 8*(unroll>3); // #new loads this SIMD iteration per unroll
Expand All @@ -111,22 +111,22 @@ size_t fsst_compressAVX512(SymbolTable &symbolTable, u8* codeBase, u8* symbolBas
}
}

// flush the job states of the unfinished strings at the end of output[]
// flush the job states of the unfinished strings at the end of output[]
processed = n - (inputEnd - input);
u32 unfinished = 0;
if (unroll > 1) {
if (unroll > 2) {
if (unroll > 3) {
_mm512_mask_compressstoreu_epi64(output+unfinished, loadmask4=~loadmask4, job4);
if (unroll > 1) {
if (unroll > 2) {
if (unroll > 3) {
_mm512_mask_compressstoreu_epi64(output+unfinished, loadmask4=~loadmask4, job4);
unfinished += _mm_popcnt_u32((int) loadmask4);
}
_mm512_mask_compressstoreu_epi64(output+unfinished, loadmask3=~loadmask3, job3);
_mm512_mask_compressstoreu_epi64(output+unfinished, loadmask3=~loadmask3, job3);
unfinished += _mm_popcnt_u32((int) loadmask3);
}
_mm512_mask_compressstoreu_epi64(output+unfinished, loadmask2=~loadmask2, job2);
_mm512_mask_compressstoreu_epi64(output+unfinished, loadmask2=~loadmask2, job2);
unfinished += _mm_popcnt_u32((int) loadmask2);
}
_mm512_mask_compressstoreu_epi64(output+unfinished, loadmask1=~loadmask1, job1);
_mm512_mask_compressstoreu_epi64(output+unfinished, loadmask1=~loadmask1, job1);
#else
(void) symbolTable;
(void) codeBase;
Expand Down
Loading

0 comments on commit 44c1b2d

Please sign in to comment.