From 83065af7c259bb8383b39938e73f27f8ea85838d Mon Sep 17 00:00:00 2001 From: David Li Date: Wed, 4 Dec 2024 22:03:30 -0500 Subject: [PATCH] GH-13: Set up JNI build (dataset, etc.) Fixes #13. --- .env | 7 + .github/workflows/test_jni.yml | 196 +++++++++++++++++++++++++ ci/docker/vcpkg-jni.dockerfile | 46 ++++++ ci/scripts/java_jni_macos_build.sh | 147 +++++++++++++++++++ ci/scripts/java_jni_manylinux_build.sh | 177 ++++++++++++++++++++++ docker-compose.yml | 32 ++++ 6 files changed, 605 insertions(+) create mode 100644 .github/workflows/test_jni.yml create mode 100644 ci/docker/vcpkg-jni.dockerfile create mode 100755 ci/scripts/java_jni_macos_build.sh create mode 100755 ci/scripts/java_jni_manylinux_build.sh diff --git a/.env b/.env index b50a16eb..5398249b 100644 --- a/.env +++ b/.env @@ -40,6 +40,7 @@ ARCH_SHORT=amd64 # Default repository to pull and push images from REPO=ghcr.io/apache/arrow-java-dev +ARROW_REPO=apache/arrow-dev # The setup attempts to generate coredumps by default, in order to disable the # coredump generation set it to 0 @@ -48,3 +49,9 @@ ULIMIT_CORE=-1 # Default versions for various dependencies JDK=11 MAVEN=3.9.9 + +# Versions for various dependencies used to build artifacts +# Keep in sync with apache/arrow +ARROW_REPO_ROOT=./arrow +PYTHON=3.9 +VCPKG="943c5ef1c8f6b5e6ced092b242c8299caae2ff01" # 2024.04.26 Release diff --git a/.github/workflows/test_jni.yml b/.github/workflows/test_jni.yml new file mode 100644 index 00000000..8c28707d --- /dev/null +++ b/.github/workflows/test_jni.yml @@ -0,0 +1,196 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: Test (JNI) + +on: + push: + branches: + - '**' + - '!dependabot/**' + tags: + - '**' + pull_request: + +concurrency: + group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} + cancel-in-progress: true + +permissions: + contents: read + +env: + DOCKER_VOLUME_PREFIX: ".docker/" + +jobs: + cpp-ubuntu: + name: Build C++ libraries ${{ matrix.platform.runs_on }} ${{ matrix.platform.arch }} + runs-on: ${{ matrix.platform.runs_on }} + strategy: + fail-fast: false + matrix: + platform: + - runs_on: ubuntu-latest + arch: "x86_64" + archery_arch: "amd64" + archery_arch_alias: "x86_64" + archery_arch_short: "amd64" + env: + # architecture name used for archery build + ARCH: ${{ matrix.platform.archery_arch }} + ARCH_ALIAS: ${{ matrix.platform.archery_arch_alias }} + ARCH_SHORT: ${{ matrix.platform.archery_arch_short }} + permissions: + contents: read + packages: write + steps: + - name: Checkout apache/arrow-java + uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + with: + fetch-depth: 0 + submodules: recursive + - name: Checkout apache/arrow + uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + with: + repository: apache/arrow + fetch-depth: 0 + path: arrow + submodules: recursive + - uses: docker/login-action@9780b0c442fbb1117ed29e0efdff1e18412f7567 # v3.3.0 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Build C++ libraries + env: + VCPKG_BINARY_SOURCES: "clear;nuget,GitHub,readwrite" + run: | + docker compose run vcpkg-jni + - name: Push Docker image + run: | + docker push ghcr.io/apache/arrow-java-dev:amd64-vcpkg-jni + - name: Compress into single artifact to keep directory structure + run: tar -cvzf arrow-shared-libs-linux-${{ matrix.platform.arch }}.tar.gz dist/ + - name: Upload artifacts + uses: actions/upload-artifact@v4 + with: + name: ubuntu-shared-lib-${{ matrix.platform.arch }} + path: arrow-shared-libs-linux-${{ matrix.platform.arch }}.tar.gz + + cpp-macos: + name: Build C++ libraries macOS ${{ matrix.platform.runs_on }} ${{ matrix.platform.arch }} + runs-on: ${{ matrix.platform.runs_on }} + strategy: + fail-fast: false + matrix: + platform: + - { runs_on: macos-13, arch: "x86_64"} + - { runs_on: macos-14, arch: "aarch_64" } + env: + MACOSX_DEPLOYMENT_TARGET: "14.0" + steps: + - name: Checkout apache/arrow-java + uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + with: + fetch-depth: 0 + submodules: recursive + - name: Checkout apache/arrow + uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + with: + repository: apache/arrow + fetch-depth: 0 + path: arrow + submodules: recursive + - name: Set up Python + uses: actions/setup-python@v4 + with: + cache: 'pip' + python-version: 3.12 + - name: Install Archery + run: pip install -e arrow/dev/archery[all] + - name: Install dependencies + run: | + # We want to use llvm@14 to avoid shared z3 + # dependency. llvm@14 doesn't depend on z3 and llvm depends + # on z3. And Homebrew's z3 provides only shared library. It + # doesn't provides static z3 because z3's CMake doesn't accept + # building both shared and static libraries at once. + # See also: Z3_BUILD_LIBZ3_SHARED in + # https://github.com/Z3Prover/z3/blob/master/README-CMake.md + # + # If llvm is installed, Apache Arrow C++ uses llvm rather than + # llvm@14 because llvm is newer than llvm@14. + brew uninstall llvm || : + + # Ensure updating python@XXX with the "--overwrite" option. + # If python@XXX is updated without "--overwrite", it causes + # a conflict error. Because Python 3 installed not by + # Homebrew exists in /usr/local on GitHub Actions. If + # Homebrew's python@XXX is updated without "--overwrite", it + # tries to replace /usr/local/bin/2to3 and so on and causes + # a conflict error. + brew update + for python_package in $(brew list | grep python@); do + brew install --overwrite ${python_package} + done + brew install --overwrite python + + if [ "$(uname -m)" = "arm64" ]; then + # pkg-config formula is deprecated but it's still installed + # in GitHub Actions runner now. We can remove this once + # pkg-config formula is removed from GitHub Actions runner. + brew uninstall pkg-config || : + brew uninstall pkg-config@0.29.2 || : + fi + + brew bundle --file=arrow/cpp/Brewfile + # We want to link aws-sdk-cpp statically but Homebrew's + # aws-sdk-cpp provides only shared library. If we have + # Homebrew's aws-sdk-cpp, our build mix Homebrew's + # aws-sdk-cpp and bundled aws-sdk-cpp. We uninstall Homebrew's + # aws-sdk-cpp to ensure using only bundled aws-sdk-cpp. + brew uninstall aws-sdk-cpp + # We want to use bundled RE2 for static linking. If + # Homebrew's RE2 is installed, its header file may be used. + # We uninstall Homebrew's RE2 to ensure using bundled RE2. + brew uninstall grpc || : # gRPC depends on RE2 + brew uninstall grpc@1.54 || : # gRPC 1.54 may be installed too + brew uninstall re2 + # We want to use bundled Protobuf for static linking. If + # Homebrew's Protobuf is installed, its library file may be + # used on test We uninstall Homebrew's Protobuf to ensure using + # bundled Protobuf. + brew uninstall protobuf + + brew bundle --file=Brewfile + - name: Build C++ libraries + run: | + set -e + # make brew Java available to CMake + export JAVA_HOME=$(brew --prefix openjdk@11)/libexec/openjdk.jdk/Contents/Home + ./ci/scripts/java_jni_macos_build.sh \ + $GITHUB_WORKSPACE/arrow-java \ + $GITHUB_WORKSPACE/arrow \ + $GITHUB_WORKSPACE/arrow-java/cpp-build \ + $GITHUB_WORKSPACE/dist + - name: Compress into single artifact to keep directory structure + run: tar -cvzf arrow-shared-libs-macos-${{ matrix.platform.arch }}.tar.gz dist/ + - name: Upload artifacts + uses: actions/upload-artifact@v4 + with: + name: macos-shared-lib-${{ matrix.platform.arch }} + path: arrow-shared-libs-macos-${{ matrix.platform.arch }}.tar.gz diff --git a/ci/docker/vcpkg-jni.dockerfile b/ci/docker/vcpkg-jni.dockerfile new file mode 100644 index 00000000..55fa35e0 --- /dev/null +++ b/ci/docker/vcpkg-jni.dockerfile @@ -0,0 +1,46 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ARG base +FROM ${base} + +# Install the libraries required by Gandiva to run +# Use enable llvm[enable-rtti] in the vcpkg.json to avoid link problems in Gandiva +RUN vcpkg install \ + --clean-after-build \ + --x-install-root=${VCPKG_ROOT}/installed \ + --x-manifest-root=/arrow/ci/vcpkg \ + --x-feature=dev \ + --x-feature=flight \ + --x-feature=gcs \ + --x-feature=json \ + --x-feature=parquet \ + --x-feature=gandiva \ + --x-feature=s3 + +# Install Java +# We need Java for JNI headers, but we don't invoke Maven in this build. +ARG java=11 +RUN yum install -y java-$java-openjdk-devel && yum clean all + +# For ci/scripts/{cpp,java}_*.sh +ENV ARROW_HOME=/tmp/local \ + ARROW_JAVA_CDATA=ON \ + ARROW_JAVA_JNI=ON \ + ARROW_USE_CCACHE=ON + +LABEL org.opencontainers.image.source https://github.com/apache/arrow-java diff --git a/ci/scripts/java_jni_macos_build.sh b/ci/scripts/java_jni_macos_build.sh new file mode 100755 index 00000000..90c4d154 --- /dev/null +++ b/ci/scripts/java_jni_macos_build.sh @@ -0,0 +1,147 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# This script is like java_jni_build.sh, but is meant for release artifacts +# and hardcodes assumptions about the environment it is being run in. + +set -ex + +arrow_java_dir=${1} +arrow_dir=${2} +build_dir=${3} +normalized_arch=$(arch) +case ${normalized_arch} in +arm64) + normalized_arch=aarch_64 + ;; +i386) + normalized_arch=x86_64 + ;; +esac +# The directory where the final binaries will be stored when scripts finish +dist_dir=${4} + +echo "=== Clear output directories and leftovers ===" +# Clear output directories and leftovers +rm -rf ${build_dir} + +echo "=== Building Arrow C++ libraries ===" +install_dir=${build_dir}/cpp-install +: ${ARROW_ACERO:=ON} +export ARROW_ACERO +: ${ARROW_BUILD_TESTS:=ON} +: ${ARROW_DATASET:=ON} +export ARROW_DATASET +: ${ARROW_GANDIVA:=ON} +export ARROW_GANDIVA +: ${ARROW_ORC:=ON} +export ARROW_ORC +: ${ARROW_PARQUET:=ON} +: ${ARROW_S3:=ON} +: ${ARROW_USE_CCACHE:=OFF} +: ${CMAKE_BUILD_TYPE:=Release} +: ${CMAKE_UNITY_BUILD:=ON} + +if [ "${ARROW_USE_CCACHE}" == "ON" ]; then + echo "=== ccache statistics before build ===" + ccache -sv 2>/dev/null || ccache -s +fi + +export ARROW_TEST_DATA="${arrow_dir}/testing/data" +export PARQUET_TEST_DATA="${arrow_dir}/cpp/submodules/parquet-testing/data" +export AWS_EC2_METADATA_DISABLED=TRUE + +mkdir -p "${build_dir}/cpp" +pushd "${build_dir}/cpp" + +cmake \ + -DARROW_ACERO=${ARROW_ACERO} \ + -DARROW_BUILD_SHARED=OFF \ + -DARROW_BUILD_TESTS=${ARROW_BUILD_TESTS} \ + -DARROW_CSV=${ARROW_DATASET} \ + -DARROW_DATASET=${ARROW_DATASET} \ + -DARROW_SUBSTRAIT=${ARROW_DATASET} \ + -DARROW_DEPENDENCY_USE_SHARED=OFF \ + -DARROW_GANDIVA=${ARROW_GANDIVA} \ + -DARROW_GANDIVA_STATIC_LIBSTDCPP=ON \ + -DARROW_JSON=${ARROW_DATASET} \ + -DARROW_ORC=${ARROW_ORC} \ + -DARROW_PARQUET=${ARROW_PARQUET} \ + -DARROW_S3=${ARROW_S3} \ + -DARROW_USE_CCACHE=${ARROW_USE_CCACHE} \ + -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \ + -DCMAKE_INSTALL_PREFIX=${install_dir} \ + -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD} \ + -DGTest_SOURCE=BUNDLED \ + -DPARQUET_BUILD_EXAMPLES=OFF \ + -DPARQUET_BUILD_EXECUTABLES=OFF \ + -DPARQUET_REQUIRE_ENCRYPTION=OFF \ + -Dre2_SOURCE=BUNDLED \ + -GNinja \ + ${arrow_dir}/cpp +cmake --build . --target install + +if [ "${ARROW_BUILD_TESTS}" == "ON" ]; then + # MinIO is required + exclude_tests="arrow-s3fs-test" + # unstable + exclude_tests="${exclude_tests}|arrow-acero-asof-join-node-test" + exclude_tests="${exclude_tests}|arrow-acero-hash-join-node-test" + ctest \ + --exclude-regex "${exclude_tests}" \ + --label-regex unittest \ + --output-on-failure \ + --parallel $(sysctl -n hw.ncpu) \ + --timeout 300 +fi + +popd + +export JAVA_JNI_CMAKE_ARGS="-DProtobuf_ROOT=${build_dir}/cpp/protobuf_ep-install" +${arrow_java_dir}/ci/scripts/java_jni_build.sh \ + ${arrow_java_dir} \ + ${install_dir} \ + ${build_dir} \ + ${dist_dir} + +if [ "${ARROW_USE_CCACHE}" == "ON" ]; then + echo "=== ccache statistics after build ===" + ccache -sv 2>/dev/null || ccache -s +fi + +echo "=== Checking shared dependencies for libraries ===" +pushd ${dist_dir} +archery linking check-dependencies \ + --allow CoreFoundation \ + --allow Security \ + --allow libSystem \ + --allow libarrow_cdata_jni \ + --allow libarrow_dataset_jni \ + --allow libarrow_orc_jni \ + --allow libc++ \ + --allow libcurl \ + --allow libgandiva_jni \ + --allow libncurses \ + --allow libobjc \ + --allow libz \ + arrow_cdata_jni/${normalized_arch}/libarrow_cdata_jni.dylib \ + arrow_dataset_jni/${normalized_arch}/libarrow_dataset_jni.dylib \ + arrow_orc_jni/${normalized_arch}/libarrow_orc_jni.dylib \ + gandiva_jni/${normalized_arch}/libgandiva_jni.dylib +popd diff --git a/ci/scripts/java_jni_manylinux_build.sh b/ci/scripts/java_jni_manylinux_build.sh new file mode 100755 index 00000000..c7e71317 --- /dev/null +++ b/ci/scripts/java_jni_manylinux_build.sh @@ -0,0 +1,177 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# This script is like java_jni_build.sh, but is meant for release artifacts +# and hardcodes assumptions about the environment it is being run in. + +set -eo pipefail + +arrow_java_dir=${1} +arrow_dir=${2} +build_dir=${3} +normalized_arch=$(arch) +case ${normalized_arch} in +aarch64) + normalized_arch=aarch_64 + ;; +esac +# The directory where the final binaries will be stored when scripts finish +dist_dir=${4} + +echo "=== Install Archery ===" +pip install -e "${arrow_dir}/dev/archery[all]" + +echo "=== Clear output directories and leftovers ===" +# Clear output directories and leftovers +rm -rf ${build_dir} +rm -rf "${dist_dir}" + +echo "=== Building Arrow C++ libraries ===" +devtoolset_version=$(rpm -qa "devtoolset-*-gcc" --queryformat %{VERSION} | + grep -o "^[0-9]*") +devtoolset_include_cpp="/opt/rh/devtoolset-${devtoolset_version}/root/usr/include/c++/${devtoolset_version}" +: ${ARROW_ACERO:=ON} +export ARROW_ACERO +: ${ARROW_BUILD_TESTS:=ON} +: ${ARROW_DATASET:=ON} +export ARROW_DATASET +: ${ARROW_GANDIVA:=ON} +export ARROW_GANDIVA +: ${ARROW_GCS:=ON} +: ${ARROW_JEMALLOC:=ON} +: ${ARROW_RPATH_ORIGIN:=ON} +: ${ARROW_ORC:=ON} +export ARROW_ORC +: ${ARROW_PARQUET:=ON} +: ${ARROW_S3:=ON} +: ${ARROW_USE_CCACHE:=OFF} +: ${CMAKE_BUILD_TYPE:=release} +: ${CMAKE_UNITY_BUILD:=ON} +: ${VCPKG_ROOT:=/opt/vcpkg} +: ${VCPKG_FEATURE_FLAGS:=-manifests} +: ${VCPKG_TARGET_TRIPLET:=${VCPKG_DEFAULT_TRIPLET:-x64-linux-static-${CMAKE_BUILD_TYPE}}} +: ${GANDIVA_CXX_FLAGS:=-isystem;${devtoolset_include_cpp};-isystem;${devtoolset_include_cpp}/x86_64-redhat-linux;-lpthread} + +if [ "${ARROW_USE_CCACHE}" == "ON" ]; then + echo "=== ccache statistics before build ===" + ccache -sv 2>/dev/null || ccache -s +fi + +export ARROW_TEST_DATA="${arrow_dir}/testing/data" +export PARQUET_TEST_DATA="${arrow_dir}/cpp/submodules/parquet-testing/data" +export AWS_EC2_METADATA_DISABLED=TRUE + +mkdir -p "${build_dir}/cpp" +pushd "${build_dir}/cpp" + +cmake \ + -DARROW_ACERO=${ARROW_ACERO} \ + -DARROW_BUILD_SHARED=OFF \ + -DARROW_BUILD_TESTS=ON \ + -DARROW_CSV=${ARROW_DATASET} \ + -DARROW_DATASET=${ARROW_DATASET} \ + -DARROW_SUBSTRAIT=${ARROW_DATASET} \ + -DARROW_DEPENDENCY_SOURCE="VCPKG" \ + -DARROW_DEPENDENCY_USE_SHARED=OFF \ + -DARROW_GANDIVA_PC_CXX_FLAGS=${GANDIVA_CXX_FLAGS} \ + -DARROW_GANDIVA=${ARROW_GANDIVA} \ + -DARROW_GCS=${ARROW_GCS} \ + -DARROW_JEMALLOC=${ARROW_JEMALLOC} \ + -DARROW_ORC=${ARROW_ORC} \ + -DARROW_PARQUET=${ARROW_PARQUET} \ + -DARROW_RPATH_ORIGIN=${ARROW_RPATH_ORIGIN} \ + -DARROW_S3=${ARROW_S3} \ + -DARROW_USE_CCACHE=${ARROW_USE_CCACHE} \ + -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \ + -DCMAKE_INSTALL_PREFIX=${ARROW_HOME} \ + -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD} \ + -DGTest_SOURCE=BUNDLED \ + -DORC_SOURCE=BUNDLED \ + -DORC_PROTOBUF_EXECUTABLE=${VCPKG_ROOT}/installed/${VCPKG_TARGET_TRIPLET}/tools/protobuf/protoc \ + -DPARQUET_BUILD_EXAMPLES=OFF \ + -DPARQUET_BUILD_EXECUTABLES=OFF \ + -DPARQUET_REQUIRE_ENCRYPTION=OFF \ + -DVCPKG_MANIFEST_MODE=OFF \ + -DVCPKG_TARGET_TRIPLET=${VCPKG_TARGET_TRIPLET} \ + -GNinja \ + ${arrow_dir}/cpp +ninja install + +if [ "${ARROW_BUILD_TESTS}" = "ON" ]; then + # MinIO is required + exclude_tests="arrow-s3fs-test" + case $(arch) in + aarch64) + # GCS testbench is crashed on aarch64: + # ImportError: ../grpc/_cython/cygrpc.cpython-38-aarch64-linux-gnu.so: + # undefined symbol: vtable for std::__cxx11::basic_ostringstream< + # char, std::char_traits, std::allocator > + exclude_tests="${exclude_tests}|arrow-gcsfs-test" + ;; + esac + # unstable + exclude_tests="${exclude_tests}|arrow-acero-asof-join-node-test" + exclude_tests="${exclude_tests}|arrow-acero-hash-join-node-test" + # external dependency + exclude_tests="${exclude_tests}|arrow-gcsfs-test" + # strptime + exclude_tests="${exclude_tests}|arrow-utility-test" + ctest \ + --exclude-regex "${exclude_tests}" \ + --label-regex unittest \ + --output-on-failure \ + --parallel $(nproc) \ + --timeout 300 +fi + +popd + +JAVA_JNI_CMAKE_ARGS="" +JAVA_JNI_CMAKE_ARGS="${JAVA_JNI_CMAKE_ARGS} -DCMAKE_TOOLCHAIN_FILE=${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" +JAVA_JNI_CMAKE_ARGS="${JAVA_JNI_CMAKE_ARGS} -DVCPKG_TARGET_TRIPLET=${VCPKG_TARGET_TRIPLET}" +export JAVA_JNI_CMAKE_ARGS +${arrow_java_dir}/ci/scripts/java_jni_build.sh \ + ${arrow_java_dir} \ + ${ARROW_HOME} \ + ${build_dir} \ + ${dist_dir} + +if [ "${ARROW_USE_CCACHE}" == "ON" ]; then + echo "=== ccache statistics after build ===" + ccache -sv 2>/dev/null || ccache -s +fi + +echo "=== Checking shared dependencies for libraries ===" +pushd ${dist_dir} +archery linking check-dependencies \ + --allow ld-linux-aarch64 \ + --allow ld-linux-x86-64 \ + --allow libc \ + --allow libdl \ + --allow libgcc_s \ + --allow libm \ + --allow libpthread \ + --allow librt \ + --allow libstdc++ \ + --allow libz \ + --allow linux-vdso \ + arrow_cdata_jni/${normalized_arch}/libarrow_cdata_jni.so \ + arrow_dataset_jni/${normalized_arch}/libarrow_dataset_jni.so \ + arrow_orc_jni/${normalized_arch}/libarrow_orc_jni.so \ + gandiva_jni/${normalized_arch}/libgandiva_jni.so +popd diff --git a/docker-compose.yml b/docker-compose.yml index ae378865..4eaf82af 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -31,6 +31,8 @@ volumes: services: ubuntu: + # Build and test arrow-java on Ubuntu. + # # Usage: # docker compose build ubuntu # docker compose run ubuntu @@ -47,6 +49,10 @@ services: /arrow-java/ci/scripts/java_test.sh /arrow-java /build" conda-jni-cdata: + # Builds and tests just the C Data Interface JNI library and JARs. + # (No dependencies on arrow-cpp.) + # This build isn't meant for distribution. It's for testing only. + # # Usage: # docker compose build conda-jni-cdata # docker compose run conda-jni-cdata @@ -75,3 +81,29 @@ services: /arrow-java/ci/scripts/java_jni_build.sh /arrow-java /build/jni /build /jni && /arrow-java/ci/scripts/java_build.sh /arrow-java /build /jni && /arrow-java/ci/scripts/java_test.sh /arrow-java /build /jni" + + vcpkg-jni: + # Builds all the JNI libraries, but not the JARs. + # (Requires arrow-cpp.) + # The artifacts from this build are meant to be used for packaging. + # + # Usage: + # docker compose build vcpkg-jni + # docker compose run vcpkg-jni + image: ${REPO}:${ARCH}-vcpkg-jni + build: + context: . + dockerfile: ci/docker/vcpkg-jni.dockerfile + cache_from: + - ${REPO}:${ARCH}-vcpkg-jni + args: + base: ${ARROW_REPO}:${ARCH}-python-${PYTHON}-wheel-manylinux-2014-vcpkg-${VCPKG} + volumes: + - .:/arrow-java:delegated + - ${ARROW_REPO_ROOT}:/arrow:delegated + - ${DOCKER_VOLUME_PREFIX}maven-cache:/root/.m2:delegated + environment: + ARROW_JAVA_CDATA: "ON" + command: + ["git config --global --add safe.directory /arrow-java && \ + /arrow-java/ci/scripts/java_jni_manylinux_build.sh /arrow-java /arrow /build /arrow-java/dist"]