From 83065af7c259bb8383b39938e73f27f8ea85838d Mon Sep 17 00:00:00 2001
From: David Li
Date: Wed, 4 Dec 2024 22:03:30 -0500
Subject: [PATCH] GH-13: Set up JNI build (dataset, etc.)
Fixes #13.
---
.env | 7 +
.github/workflows/test_jni.yml | 196 +++++++++++++++++++++++++
ci/docker/vcpkg-jni.dockerfile | 46 ++++++
ci/scripts/java_jni_macos_build.sh | 147 +++++++++++++++++++
ci/scripts/java_jni_manylinux_build.sh | 177 ++++++++++++++++++++++
docker-compose.yml | 32 ++++
6 files changed, 605 insertions(+)
create mode 100644 .github/workflows/test_jni.yml
create mode 100644 ci/docker/vcpkg-jni.dockerfile
create mode 100755 ci/scripts/java_jni_macos_build.sh
create mode 100755 ci/scripts/java_jni_manylinux_build.sh
diff --git a/.env b/.env
index b50a16eb..5398249b 100644
--- a/.env
+++ b/.env
@@ -40,6 +40,7 @@ ARCH_SHORT=amd64
# Default repository to pull and push images from
REPO=ghcr.io/apache/arrow-java-dev
+ARROW_REPO=apache/arrow-dev
# The setup attempts to generate coredumps by default, in order to disable the
# coredump generation set it to 0
@@ -48,3 +49,9 @@ ULIMIT_CORE=-1
# Default versions for various dependencies
JDK=11
MAVEN=3.9.9
+
+# Versions for various dependencies used to build artifacts
+# Keep in sync with apache/arrow
+ARROW_REPO_ROOT=./arrow
+PYTHON=3.9
+VCPKG="943c5ef1c8f6b5e6ced092b242c8299caae2ff01" # 2024.04.26 Release
diff --git a/.github/workflows/test_jni.yml b/.github/workflows/test_jni.yml
new file mode 100644
index 00000000..8c28707d
--- /dev/null
+++ b/.github/workflows/test_jni.yml
@@ -0,0 +1,196 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+name: Test (JNI)
+
+on:
+ push:
+ branches:
+ - '**'
+ - '!dependabot/**'
+ tags:
+ - '**'
+ pull_request:
+
+concurrency:
+ group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
+ cancel-in-progress: true
+
+permissions:
+ contents: read
+
+env:
+ DOCKER_VOLUME_PREFIX: ".docker/"
+
+jobs:
+ cpp-ubuntu:
+ name: Build C++ libraries ${{ matrix.platform.runs_on }} ${{ matrix.platform.arch }}
+ runs-on: ${{ matrix.platform.runs_on }}
+ strategy:
+ fail-fast: false
+ matrix:
+ platform:
+ - runs_on: ubuntu-latest
+ arch: "x86_64"
+ archery_arch: "amd64"
+ archery_arch_alias: "x86_64"
+ archery_arch_short: "amd64"
+ env:
+ # architecture name used for archery build
+ ARCH: ${{ matrix.platform.archery_arch }}
+ ARCH_ALIAS: ${{ matrix.platform.archery_arch_alias }}
+ ARCH_SHORT: ${{ matrix.platform.archery_arch_short }}
+ permissions:
+ contents: read
+ packages: write
+ steps:
+ - name: Checkout apache/arrow-java
+ uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
+ with:
+ fetch-depth: 0
+ submodules: recursive
+ - name: Checkout apache/arrow
+ uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
+ with:
+ repository: apache/arrow
+ fetch-depth: 0
+ path: arrow
+ submodules: recursive
+ - uses: docker/login-action@9780b0c442fbb1117ed29e0efdff1e18412f7567 # v3.3.0
+ with:
+ registry: ghcr.io
+ username: ${{ github.actor }}
+ password: ${{ secrets.GITHUB_TOKEN }}
+ - name: Build C++ libraries
+ env:
+ VCPKG_BINARY_SOURCES: "clear;nuget,GitHub,readwrite"
+ run: |
+ docker compose run vcpkg-jni
+ - name: Push Docker image
+ run: |
+ docker push ghcr.io/apache/arrow-java-dev:amd64-vcpkg-jni
+ - name: Compress into single artifact to keep directory structure
+ run: tar -cvzf arrow-shared-libs-linux-${{ matrix.platform.arch }}.tar.gz dist/
+ - name: Upload artifacts
+ uses: actions/upload-artifact@v4
+ with:
+ name: ubuntu-shared-lib-${{ matrix.platform.arch }}
+ path: arrow-shared-libs-linux-${{ matrix.platform.arch }}.tar.gz
+
+ cpp-macos:
+ name: Build C++ libraries macOS ${{ matrix.platform.runs_on }} ${{ matrix.platform.arch }}
+ runs-on: ${{ matrix.platform.runs_on }}
+ strategy:
+ fail-fast: false
+ matrix:
+ platform:
+ - { runs_on: macos-13, arch: "x86_64"}
+ - { runs_on: macos-14, arch: "aarch_64" }
+ env:
+ MACOSX_DEPLOYMENT_TARGET: "14.0"
+ steps:
+ - name: Checkout apache/arrow-java
+ uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
+ with:
+ fetch-depth: 0
+ submodules: recursive
+ - name: Checkout apache/arrow
+ uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
+ with:
+ repository: apache/arrow
+ fetch-depth: 0
+ path: arrow
+ submodules: recursive
+ - name: Set up Python
+ uses: actions/setup-python@v4
+ with:
+ cache: 'pip'
+ python-version: 3.12
+ - name: Install Archery
+ run: pip install -e arrow/dev/archery[all]
+ - name: Install dependencies
+ run: |
+ # We want to use llvm@14 to avoid shared z3
+ # dependency. llvm@14 doesn't depend on z3 and llvm depends
+ # on z3. And Homebrew's z3 provides only shared library. It
+ # doesn't provides static z3 because z3's CMake doesn't accept
+ # building both shared and static libraries at once.
+ # See also: Z3_BUILD_LIBZ3_SHARED in
+ # https://github.com/Z3Prover/z3/blob/master/README-CMake.md
+ #
+ # If llvm is installed, Apache Arrow C++ uses llvm rather than
+ # llvm@14 because llvm is newer than llvm@14.
+ brew uninstall llvm || :
+
+ # Ensure updating python@XXX with the "--overwrite" option.
+ # If python@XXX is updated without "--overwrite", it causes
+ # a conflict error. Because Python 3 installed not by
+ # Homebrew exists in /usr/local on GitHub Actions. If
+ # Homebrew's python@XXX is updated without "--overwrite", it
+ # tries to replace /usr/local/bin/2to3 and so on and causes
+ # a conflict error.
+ brew update
+ for python_package in $(brew list | grep python@); do
+ brew install --overwrite ${python_package}
+ done
+ brew install --overwrite python
+
+ if [ "$(uname -m)" = "arm64" ]; then
+ # pkg-config formula is deprecated but it's still installed
+ # in GitHub Actions runner now. We can remove this once
+ # pkg-config formula is removed from GitHub Actions runner.
+ brew uninstall pkg-config || :
+ brew uninstall pkg-config@0.29.2 || :
+ fi
+
+ brew bundle --file=arrow/cpp/Brewfile
+ # We want to link aws-sdk-cpp statically but Homebrew's
+ # aws-sdk-cpp provides only shared library. If we have
+ # Homebrew's aws-sdk-cpp, our build mix Homebrew's
+ # aws-sdk-cpp and bundled aws-sdk-cpp. We uninstall Homebrew's
+ # aws-sdk-cpp to ensure using only bundled aws-sdk-cpp.
+ brew uninstall aws-sdk-cpp
+ # We want to use bundled RE2 for static linking. If
+ # Homebrew's RE2 is installed, its header file may be used.
+ # We uninstall Homebrew's RE2 to ensure using bundled RE2.
+ brew uninstall grpc || : # gRPC depends on RE2
+ brew uninstall grpc@1.54 || : # gRPC 1.54 may be installed too
+ brew uninstall re2
+ # We want to use bundled Protobuf for static linking. If
+ # Homebrew's Protobuf is installed, its library file may be
+ # used on test We uninstall Homebrew's Protobuf to ensure using
+ # bundled Protobuf.
+ brew uninstall protobuf
+
+ brew bundle --file=Brewfile
+ - name: Build C++ libraries
+ run: |
+ set -e
+ # make brew Java available to CMake
+ export JAVA_HOME=$(brew --prefix openjdk@11)/libexec/openjdk.jdk/Contents/Home
+ ./ci/scripts/java_jni_macos_build.sh \
+ $GITHUB_WORKSPACE/arrow-java \
+ $GITHUB_WORKSPACE/arrow \
+ $GITHUB_WORKSPACE/arrow-java/cpp-build \
+ $GITHUB_WORKSPACE/dist
+ - name: Compress into single artifact to keep directory structure
+ run: tar -cvzf arrow-shared-libs-macos-${{ matrix.platform.arch }}.tar.gz dist/
+ - name: Upload artifacts
+ uses: actions/upload-artifact@v4
+ with:
+ name: macos-shared-lib-${{ matrix.platform.arch }}
+ path: arrow-shared-libs-macos-${{ matrix.platform.arch }}.tar.gz
diff --git a/ci/docker/vcpkg-jni.dockerfile b/ci/docker/vcpkg-jni.dockerfile
new file mode 100644
index 00000000..55fa35e0
--- /dev/null
+++ b/ci/docker/vcpkg-jni.dockerfile
@@ -0,0 +1,46 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+ARG base
+FROM ${base}
+
+# Install the libraries required by Gandiva to run
+# Use enable llvm[enable-rtti] in the vcpkg.json to avoid link problems in Gandiva
+RUN vcpkg install \
+ --clean-after-build \
+ --x-install-root=${VCPKG_ROOT}/installed \
+ --x-manifest-root=/arrow/ci/vcpkg \
+ --x-feature=dev \
+ --x-feature=flight \
+ --x-feature=gcs \
+ --x-feature=json \
+ --x-feature=parquet \
+ --x-feature=gandiva \
+ --x-feature=s3
+
+# Install Java
+# We need Java for JNI headers, but we don't invoke Maven in this build.
+ARG java=11
+RUN yum install -y java-$java-openjdk-devel && yum clean all
+
+# For ci/scripts/{cpp,java}_*.sh
+ENV ARROW_HOME=/tmp/local \
+ ARROW_JAVA_CDATA=ON \
+ ARROW_JAVA_JNI=ON \
+ ARROW_USE_CCACHE=ON
+
+LABEL org.opencontainers.image.source https://github.com/apache/arrow-java
diff --git a/ci/scripts/java_jni_macos_build.sh b/ci/scripts/java_jni_macos_build.sh
new file mode 100755
index 00000000..90c4d154
--- /dev/null
+++ b/ci/scripts/java_jni_macos_build.sh
@@ -0,0 +1,147 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# This script is like java_jni_build.sh, but is meant for release artifacts
+# and hardcodes assumptions about the environment it is being run in.
+
+set -ex
+
+arrow_java_dir=${1}
+arrow_dir=${2}
+build_dir=${3}
+normalized_arch=$(arch)
+case ${normalized_arch} in
+arm64)
+ normalized_arch=aarch_64
+ ;;
+i386)
+ normalized_arch=x86_64
+ ;;
+esac
+# The directory where the final binaries will be stored when scripts finish
+dist_dir=${4}
+
+echo "=== Clear output directories and leftovers ==="
+# Clear output directories and leftovers
+rm -rf ${build_dir}
+
+echo "=== Building Arrow C++ libraries ==="
+install_dir=${build_dir}/cpp-install
+: ${ARROW_ACERO:=ON}
+export ARROW_ACERO
+: ${ARROW_BUILD_TESTS:=ON}
+: ${ARROW_DATASET:=ON}
+export ARROW_DATASET
+: ${ARROW_GANDIVA:=ON}
+export ARROW_GANDIVA
+: ${ARROW_ORC:=ON}
+export ARROW_ORC
+: ${ARROW_PARQUET:=ON}
+: ${ARROW_S3:=ON}
+: ${ARROW_USE_CCACHE:=OFF}
+: ${CMAKE_BUILD_TYPE:=Release}
+: ${CMAKE_UNITY_BUILD:=ON}
+
+if [ "${ARROW_USE_CCACHE}" == "ON" ]; then
+ echo "=== ccache statistics before build ==="
+ ccache -sv 2>/dev/null || ccache -s
+fi
+
+export ARROW_TEST_DATA="${arrow_dir}/testing/data"
+export PARQUET_TEST_DATA="${arrow_dir}/cpp/submodules/parquet-testing/data"
+export AWS_EC2_METADATA_DISABLED=TRUE
+
+mkdir -p "${build_dir}/cpp"
+pushd "${build_dir}/cpp"
+
+cmake \
+ -DARROW_ACERO=${ARROW_ACERO} \
+ -DARROW_BUILD_SHARED=OFF \
+ -DARROW_BUILD_TESTS=${ARROW_BUILD_TESTS} \
+ -DARROW_CSV=${ARROW_DATASET} \
+ -DARROW_DATASET=${ARROW_DATASET} \
+ -DARROW_SUBSTRAIT=${ARROW_DATASET} \
+ -DARROW_DEPENDENCY_USE_SHARED=OFF \
+ -DARROW_GANDIVA=${ARROW_GANDIVA} \
+ -DARROW_GANDIVA_STATIC_LIBSTDCPP=ON \
+ -DARROW_JSON=${ARROW_DATASET} \
+ -DARROW_ORC=${ARROW_ORC} \
+ -DARROW_PARQUET=${ARROW_PARQUET} \
+ -DARROW_S3=${ARROW_S3} \
+ -DARROW_USE_CCACHE=${ARROW_USE_CCACHE} \
+ -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \
+ -DCMAKE_INSTALL_PREFIX=${install_dir} \
+ -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD} \
+ -DGTest_SOURCE=BUNDLED \
+ -DPARQUET_BUILD_EXAMPLES=OFF \
+ -DPARQUET_BUILD_EXECUTABLES=OFF \
+ -DPARQUET_REQUIRE_ENCRYPTION=OFF \
+ -Dre2_SOURCE=BUNDLED \
+ -GNinja \
+ ${arrow_dir}/cpp
+cmake --build . --target install
+
+if [ "${ARROW_BUILD_TESTS}" == "ON" ]; then
+ # MinIO is required
+ exclude_tests="arrow-s3fs-test"
+ # unstable
+ exclude_tests="${exclude_tests}|arrow-acero-asof-join-node-test"
+ exclude_tests="${exclude_tests}|arrow-acero-hash-join-node-test"
+ ctest \
+ --exclude-regex "${exclude_tests}" \
+ --label-regex unittest \
+ --output-on-failure \
+ --parallel $(sysctl -n hw.ncpu) \
+ --timeout 300
+fi
+
+popd
+
+export JAVA_JNI_CMAKE_ARGS="-DProtobuf_ROOT=${build_dir}/cpp/protobuf_ep-install"
+${arrow_java_dir}/ci/scripts/java_jni_build.sh \
+ ${arrow_java_dir} \
+ ${install_dir} \
+ ${build_dir} \
+ ${dist_dir}
+
+if [ "${ARROW_USE_CCACHE}" == "ON" ]; then
+ echo "=== ccache statistics after build ==="
+ ccache -sv 2>/dev/null || ccache -s
+fi
+
+echo "=== Checking shared dependencies for libraries ==="
+pushd ${dist_dir}
+archery linking check-dependencies \
+ --allow CoreFoundation \
+ --allow Security \
+ --allow libSystem \
+ --allow libarrow_cdata_jni \
+ --allow libarrow_dataset_jni \
+ --allow libarrow_orc_jni \
+ --allow libc++ \
+ --allow libcurl \
+ --allow libgandiva_jni \
+ --allow libncurses \
+ --allow libobjc \
+ --allow libz \
+ arrow_cdata_jni/${normalized_arch}/libarrow_cdata_jni.dylib \
+ arrow_dataset_jni/${normalized_arch}/libarrow_dataset_jni.dylib \
+ arrow_orc_jni/${normalized_arch}/libarrow_orc_jni.dylib \
+ gandiva_jni/${normalized_arch}/libgandiva_jni.dylib
+popd
diff --git a/ci/scripts/java_jni_manylinux_build.sh b/ci/scripts/java_jni_manylinux_build.sh
new file mode 100755
index 00000000..c7e71317
--- /dev/null
+++ b/ci/scripts/java_jni_manylinux_build.sh
@@ -0,0 +1,177 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# This script is like java_jni_build.sh, but is meant for release artifacts
+# and hardcodes assumptions about the environment it is being run in.
+
+set -eo pipefail
+
+arrow_java_dir=${1}
+arrow_dir=${2}
+build_dir=${3}
+normalized_arch=$(arch)
+case ${normalized_arch} in
+aarch64)
+ normalized_arch=aarch_64
+ ;;
+esac
+# The directory where the final binaries will be stored when scripts finish
+dist_dir=${4}
+
+echo "=== Install Archery ==="
+pip install -e "${arrow_dir}/dev/archery[all]"
+
+echo "=== Clear output directories and leftovers ==="
+# Clear output directories and leftovers
+rm -rf ${build_dir}
+rm -rf "${dist_dir}"
+
+echo "=== Building Arrow C++ libraries ==="
+devtoolset_version=$(rpm -qa "devtoolset-*-gcc" --queryformat %{VERSION} |
+ grep -o "^[0-9]*")
+devtoolset_include_cpp="/opt/rh/devtoolset-${devtoolset_version}/root/usr/include/c++/${devtoolset_version}"
+: ${ARROW_ACERO:=ON}
+export ARROW_ACERO
+: ${ARROW_BUILD_TESTS:=ON}
+: ${ARROW_DATASET:=ON}
+export ARROW_DATASET
+: ${ARROW_GANDIVA:=ON}
+export ARROW_GANDIVA
+: ${ARROW_GCS:=ON}
+: ${ARROW_JEMALLOC:=ON}
+: ${ARROW_RPATH_ORIGIN:=ON}
+: ${ARROW_ORC:=ON}
+export ARROW_ORC
+: ${ARROW_PARQUET:=ON}
+: ${ARROW_S3:=ON}
+: ${ARROW_USE_CCACHE:=OFF}
+: ${CMAKE_BUILD_TYPE:=release}
+: ${CMAKE_UNITY_BUILD:=ON}
+: ${VCPKG_ROOT:=/opt/vcpkg}
+: ${VCPKG_FEATURE_FLAGS:=-manifests}
+: ${VCPKG_TARGET_TRIPLET:=${VCPKG_DEFAULT_TRIPLET:-x64-linux-static-${CMAKE_BUILD_TYPE}}}
+: ${GANDIVA_CXX_FLAGS:=-isystem;${devtoolset_include_cpp};-isystem;${devtoolset_include_cpp}/x86_64-redhat-linux;-lpthread}
+
+if [ "${ARROW_USE_CCACHE}" == "ON" ]; then
+ echo "=== ccache statistics before build ==="
+ ccache -sv 2>/dev/null || ccache -s
+fi
+
+export ARROW_TEST_DATA="${arrow_dir}/testing/data"
+export PARQUET_TEST_DATA="${arrow_dir}/cpp/submodules/parquet-testing/data"
+export AWS_EC2_METADATA_DISABLED=TRUE
+
+mkdir -p "${build_dir}/cpp"
+pushd "${build_dir}/cpp"
+
+cmake \
+ -DARROW_ACERO=${ARROW_ACERO} \
+ -DARROW_BUILD_SHARED=OFF \
+ -DARROW_BUILD_TESTS=ON \
+ -DARROW_CSV=${ARROW_DATASET} \
+ -DARROW_DATASET=${ARROW_DATASET} \
+ -DARROW_SUBSTRAIT=${ARROW_DATASET} \
+ -DARROW_DEPENDENCY_SOURCE="VCPKG" \
+ -DARROW_DEPENDENCY_USE_SHARED=OFF \
+ -DARROW_GANDIVA_PC_CXX_FLAGS=${GANDIVA_CXX_FLAGS} \
+ -DARROW_GANDIVA=${ARROW_GANDIVA} \
+ -DARROW_GCS=${ARROW_GCS} \
+ -DARROW_JEMALLOC=${ARROW_JEMALLOC} \
+ -DARROW_ORC=${ARROW_ORC} \
+ -DARROW_PARQUET=${ARROW_PARQUET} \
+ -DARROW_RPATH_ORIGIN=${ARROW_RPATH_ORIGIN} \
+ -DARROW_S3=${ARROW_S3} \
+ -DARROW_USE_CCACHE=${ARROW_USE_CCACHE} \
+ -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \
+ -DCMAKE_INSTALL_PREFIX=${ARROW_HOME} \
+ -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD} \
+ -DGTest_SOURCE=BUNDLED \
+ -DORC_SOURCE=BUNDLED \
+ -DORC_PROTOBUF_EXECUTABLE=${VCPKG_ROOT}/installed/${VCPKG_TARGET_TRIPLET}/tools/protobuf/protoc \
+ -DPARQUET_BUILD_EXAMPLES=OFF \
+ -DPARQUET_BUILD_EXECUTABLES=OFF \
+ -DPARQUET_REQUIRE_ENCRYPTION=OFF \
+ -DVCPKG_MANIFEST_MODE=OFF \
+ -DVCPKG_TARGET_TRIPLET=${VCPKG_TARGET_TRIPLET} \
+ -GNinja \
+ ${arrow_dir}/cpp
+ninja install
+
+if [ "${ARROW_BUILD_TESTS}" = "ON" ]; then
+ # MinIO is required
+ exclude_tests="arrow-s3fs-test"
+ case $(arch) in
+ aarch64)
+ # GCS testbench is crashed on aarch64:
+ # ImportError: ../grpc/_cython/cygrpc.cpython-38-aarch64-linux-gnu.so:
+ # undefined symbol: vtable for std::__cxx11::basic_ostringstream<
+ # char, std::char_traits, std::allocator >
+ exclude_tests="${exclude_tests}|arrow-gcsfs-test"
+ ;;
+ esac
+ # unstable
+ exclude_tests="${exclude_tests}|arrow-acero-asof-join-node-test"
+ exclude_tests="${exclude_tests}|arrow-acero-hash-join-node-test"
+ # external dependency
+ exclude_tests="${exclude_tests}|arrow-gcsfs-test"
+ # strptime
+ exclude_tests="${exclude_tests}|arrow-utility-test"
+ ctest \
+ --exclude-regex "${exclude_tests}" \
+ --label-regex unittest \
+ --output-on-failure \
+ --parallel $(nproc) \
+ --timeout 300
+fi
+
+popd
+
+JAVA_JNI_CMAKE_ARGS=""
+JAVA_JNI_CMAKE_ARGS="${JAVA_JNI_CMAKE_ARGS} -DCMAKE_TOOLCHAIN_FILE=${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake"
+JAVA_JNI_CMAKE_ARGS="${JAVA_JNI_CMAKE_ARGS} -DVCPKG_TARGET_TRIPLET=${VCPKG_TARGET_TRIPLET}"
+export JAVA_JNI_CMAKE_ARGS
+${arrow_java_dir}/ci/scripts/java_jni_build.sh \
+ ${arrow_java_dir} \
+ ${ARROW_HOME} \
+ ${build_dir} \
+ ${dist_dir}
+
+if [ "${ARROW_USE_CCACHE}" == "ON" ]; then
+ echo "=== ccache statistics after build ==="
+ ccache -sv 2>/dev/null || ccache -s
+fi
+
+echo "=== Checking shared dependencies for libraries ==="
+pushd ${dist_dir}
+archery linking check-dependencies \
+ --allow ld-linux-aarch64 \
+ --allow ld-linux-x86-64 \
+ --allow libc \
+ --allow libdl \
+ --allow libgcc_s \
+ --allow libm \
+ --allow libpthread \
+ --allow librt \
+ --allow libstdc++ \
+ --allow libz \
+ --allow linux-vdso \
+ arrow_cdata_jni/${normalized_arch}/libarrow_cdata_jni.so \
+ arrow_dataset_jni/${normalized_arch}/libarrow_dataset_jni.so \
+ arrow_orc_jni/${normalized_arch}/libarrow_orc_jni.so \
+ gandiva_jni/${normalized_arch}/libgandiva_jni.so
+popd
diff --git a/docker-compose.yml b/docker-compose.yml
index ae378865..4eaf82af 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -31,6 +31,8 @@ volumes:
services:
ubuntu:
+ # Build and test arrow-java on Ubuntu.
+ #
# Usage:
# docker compose build ubuntu
# docker compose run ubuntu
@@ -47,6 +49,10 @@ services:
/arrow-java/ci/scripts/java_test.sh /arrow-java /build"
conda-jni-cdata:
+ # Builds and tests just the C Data Interface JNI library and JARs.
+ # (No dependencies on arrow-cpp.)
+ # This build isn't meant for distribution. It's for testing only.
+ #
# Usage:
# docker compose build conda-jni-cdata
# docker compose run conda-jni-cdata
@@ -75,3 +81,29 @@ services:
/arrow-java/ci/scripts/java_jni_build.sh /arrow-java /build/jni /build /jni &&
/arrow-java/ci/scripts/java_build.sh /arrow-java /build /jni &&
/arrow-java/ci/scripts/java_test.sh /arrow-java /build /jni"
+
+ vcpkg-jni:
+ # Builds all the JNI libraries, but not the JARs.
+ # (Requires arrow-cpp.)
+ # The artifacts from this build are meant to be used for packaging.
+ #
+ # Usage:
+ # docker compose build vcpkg-jni
+ # docker compose run vcpkg-jni
+ image: ${REPO}:${ARCH}-vcpkg-jni
+ build:
+ context: .
+ dockerfile: ci/docker/vcpkg-jni.dockerfile
+ cache_from:
+ - ${REPO}:${ARCH}-vcpkg-jni
+ args:
+ base: ${ARROW_REPO}:${ARCH}-python-${PYTHON}-wheel-manylinux-2014-vcpkg-${VCPKG}
+ volumes:
+ - .:/arrow-java:delegated
+ - ${ARROW_REPO_ROOT}:/arrow:delegated
+ - ${DOCKER_VOLUME_PREFIX}maven-cache:/root/.m2:delegated
+ environment:
+ ARROW_JAVA_CDATA: "ON"
+ command:
+ ["git config --global --add safe.directory /arrow-java && \
+ /arrow-java/ci/scripts/java_jni_manylinux_build.sh /arrow-java /arrow /build /arrow-java/dist"]