Skip to content

Commit

Permalink
Merge branch 'main' into parquet/support-write-bloom-filter
Browse files Browse the repository at this point in the history
  • Loading branch information
mapleFU committed Jan 13, 2025
2 parents ef3291d + 913cb58 commit 7aee7dd
Show file tree
Hide file tree
Showing 112 changed files with 3,697 additions and 971 deletions.
9 changes: 5 additions & 4 deletions .env
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ ULIMIT_CORE=-1

# Default versions for platforms
ALMALINUX=8
ALPINE_LINUX=3.16
ALPINE_LINUX=3.18
DEBIAN=12
FEDORA=39
UBUNTU=22.04
Expand All @@ -57,7 +57,7 @@ CLANG_TOOLS=14
CUDA=11.2.2
DASK=latest
DOTNET=8.0
GCC_VERSION=""
GCC=
HDFS=3.2.1
JDK=11
KARTOTHEK=latest
Expand Down Expand Up @@ -92,10 +92,11 @@ TZ=UTC
VCPKG="943c5ef1c8f6b5e6ced092b242c8299caae2ff01" # 2024.04.26 Release

# This must be updated when we update
# ci/docker/python-wheel-windows-vs2019.dockerfile.
# ci/docker/python-*-windows-*.dockerfile.
# This is a workaround for our CI problem that "archery docker build" doesn't
# use pulled built images in dev/tasks/python-wheels/github.windows.yml.
PYTHON_WHEEL_WINDOWS_IMAGE_REVISION=2024-08-06
PYTHON_WHEEL_WINDOWS_IMAGE_REVISION=2025-01-08
PYTHON_WHEEL_WINDOWS_TEST_IMAGE_REVISION=2025-01-08

# Use conanio/${CONAN_BASE}:{CONAN_VERSION} for "docker compose run --rm conan".
# See https://github.com/conan-io/conan-docker-tools#readme and
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/csharp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ jobs:
dotnet: ['8.0.x']
steps:
- name: Install C#
uses: actions/setup-dotnet@v4.1.0
uses: actions/setup-dotnet@v4.2.0
with:
dotnet-version: ${{ matrix.dotnet }}
- name: Setup Python
Expand Down Expand Up @@ -86,7 +86,7 @@ jobs:
dotnet: ['8.0.x']
steps:
- name: Install C#
uses: actions/setup-dotnet@v4.1.0
uses: actions/setup-dotnet@v4.2.0
with:
dotnet-version: ${{ matrix.dotnet }}
- name: Checkout Arrow
Expand All @@ -113,7 +113,7 @@ jobs:
dotnet: ['8.0.x']
steps:
- name: Install C#
uses: actions/setup-dotnet@v4.1.0
uses: actions/setup-dotnet@v4.2.0
with:
dotnet-version: ${{ matrix.dotnet }}
- name: Setup Python
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ jobs:
with:
ruby-version: ruby
- name: Install .NET
uses: actions/setup-dotnet@3e891b0cb619bf60e2c25674b222b8940e2c1c25 # v4.1.0
uses: actions/setup-dotnet@87b7050bc53ea08284295505d98d2aa94301e852 # v4.2.0
with:
dotnet-version: '8.0.x'
- name: Install Dependencies
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/pr_review_trigger.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: "Upload PR review Payload"
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b # v4.5.0
with:
path: "${{ github.event_path }}"
name: "pr_review_payload"
6 changes: 3 additions & 3 deletions .github/workflows/r.yml
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ jobs:
if: always()
- name: Save the test output
if: always()
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b # v4.5.0
with:
name: test-output-${{ matrix.ubuntu }}-${{ matrix.r }}
path: r/check/arrow.Rcheck/tests/testthat.Rout*
Expand Down Expand Up @@ -237,7 +237,7 @@ jobs:
if: always()
- name: Save the test output
if: always()
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b # v4.5.0
with:
name: test-output-bundled
path: r/check/arrow.Rcheck/tests/testthat.Rout*
Expand Down Expand Up @@ -299,7 +299,7 @@ jobs:
# So that they're unique when multiple are downloaded in the next step
shell: bash
run: mv libarrow.zip libarrow-rtools${{ matrix.config.rtools }}-${{ matrix.config.arch }}.zip
- uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
- uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b # v4.5.0
with:
name: libarrow-rtools${{ matrix.config.rtools }}-${{ matrix.config.arch }}.zip
path: libarrow-rtools${{ matrix.config.rtools }}-${{ matrix.config.arch }}.zip
Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ repos:
files: >-
(
?^ci/docker/conda-python-emscripten\.dockerfile$|
?^ci/docker/python-wheel-windows-test-vs2019\.dockerfile$|
?^ci/docker/python-.*-wheel-windows-test-vs2019.*\.dockerfile$|
)
types: []
- repo: https://github.com/pycqa/flake8
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,10 @@
# under the License.

ARG arch=amd64
FROM ${arch}/alpine:3.16
FROM ${arch}/alpine:3.18

RUN apk add \
apache-orc-dev \
bash \
benchmark-dev \
boost-dev \
Expand All @@ -39,8 +40,8 @@ RUN apk add \
grpc-dev \
gtest-dev \
libxml2-dev \
llvm13-dev \
llvm13-static \
llvm16-dev \
llvm16-static \
lz4-dev \
make \
musl-locales \
Expand All @@ -61,6 +62,7 @@ RUN apk add \
thrift-dev \
tzdata \
utf8proc-dev \
xsimd-dev \
zlib-dev \
zstd-dev && \
rm -rf /var/cache/apk/* && \
Expand Down Expand Up @@ -98,6 +100,5 @@ ENV ARROW_ACERO=ON \
ARROW_WITH_ZSTD=ON \
AWSSDK_SOURCE=BUNDLED \
google_cloud_cpp_storage_SOURCE=BUNDLED \
ORC_SOURCE=BUNDLED \
PATH=/usr/lib/ccache/:$PATH \
xsimd_SOURCE=BUNDLED
MUSL_LOCPATH=/usr/share/i18n/locales/musl \
PATH=/usr/lib/ccache/bin:$PATH
3 changes: 2 additions & 1 deletion ci/docker/conda-integration.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ RUN mamba install -q -y \
maven=${maven} \
nodejs=${node} \
yarn=${yarn} \
openjdk=${jdk} && \
openjdk=${jdk} \
zstd && \
mamba clean --all --force-pkgs-dirs

# Install Rust with only the needed components
Expand Down
6 changes: 3 additions & 3 deletions ci/docker/conda-python-emscripten.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,14 @@ ARG required_python_min="(3,12)"
# fail if python version < 3.12
RUN echo "check PYTHON>=${required_python_min}" && python -c "import sys;sys.exit(0 if sys.version_info>=${required_python_min} else 1)"

# install selenium and pyodide-build and recent python
# install selenium and recent pyodide-build and recent python

# needs to be a login shell so ~/.profile is read
SHELL ["/bin/bash", "--login", "-c", "-o", "pipefail"]

RUN python -m pip install --no-cache-dir selenium==${selenium_version} && \
python -m pip install --no-cache-dir --upgrade pyodide-build==${pyodide_version}
python -m pip install --no-cache-dir --upgrade pyodide-build>=${pyodide_version}

# install pyodide dist directory to /pyodide
RUN pyodide_dist_url="https://github.com/pyodide/pyodide/releases/download/${pyodide_version}/pyodide-${pyodide_version}.tar.bz2" && \
wget -q "${pyodide_dist_url}" -O- | tar -xj -C /
Expand Down
143 changes: 143 additions & 0 deletions ci/docker/debian-experimental-cpp.dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

ARG arch=amd64
FROM ${arch}/debian:experimental
ARG arch

ENV DEBIAN_FRONTEND noninteractive

ARG gcc
ARG llvm
RUN if [ -n "${gcc}" ]; then \
gcc_package_suffix="-${gcc}"; \
else \
gcc_package_suffix=""; \
fi && \
if [ -n "${llvm}" ]; then \
llvm_package_suffix="-${llvm}"; \
else \
llvm_package_suffix=""; \
fi && \
apt-get update -y -q && \
apt-get install -y -q --no-install-recommends \
autoconf \
ccache \
cmake \
curl \
g++ \
gcc \
gdb \
git \
libbenchmark-dev \
libboost-filesystem-dev \
libboost-system-dev \
libbrotli-dev \
libbz2-dev \
libc-ares-dev \
libcurl4-openssl-dev \
libgflags-dev \
libgmock-dev \
libgoogle-glog-dev \
libgrpc++-dev \
libidn2-dev \
libkrb5-dev \
libldap-dev \
liblz4-dev \
libnghttp2-dev \
libprotobuf-dev \
libprotoc-dev \
libpsl-dev \
libre2-dev \
librtmp-dev \
libsnappy-dev \
libsqlite3-dev \
libssh-dev \
libssh2-1-dev \
libssl-dev \
libthrift-dev \
libutf8proc-dev \
libxml2-dev \
libxsimd-dev \
libzstd-dev \
make \
ninja-build \
nlohmann-json3-dev \
npm \
opentelemetry-cpp-dev \
pkg-config \
protobuf-compiler-grpc \
python3-dev \
python3-pip \
python3-venv \
rapidjson-dev \
rsync \
tzdata \
zlib1g-dev && \
apt-get install -y -q --no-install-recommends -t experimental \
clang${llvm_package_suffix} \
g++${gcc_package_suffix} \
gcc${gcc_package_suffix} \
llvm${llvm_package_suffix}-dev && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*

COPY ci/scripts/install_minio.sh /arrow/ci/scripts/
RUN /arrow/ci/scripts/install_minio.sh latest /usr/local

COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/
RUN /arrow/ci/scripts/install_gcs_testbench.sh default

COPY ci/scripts/install_azurite.sh /arrow/ci/scripts/
RUN /arrow/ci/scripts/install_azurite.sh

COPY ci/scripts/install_sccache.sh /arrow/ci/scripts/
RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin

# Prioritize system packages and local installation.
ENV ARROW_ACERO=ON \
ARROW_AZURE=ON \
ARROW_BUILD_TESTS=ON \
ARROW_DATASET=ON \
ARROW_DEPENDENCY_SOURCE=SYSTEM \
ARROW_DATASET=ON \
ARROW_FLIGHT=ON \
ARROW_FLIGHT_SQL=ON \
ARROW_GANDIVA=ON \
ARROW_GCS=ON \
ARROW_HOME=/usr/local \
ARROW_JEMALLOC=ON \
ARROW_ORC=ON \
ARROW_PARQUET=ON \
ARROW_S3=ON \
ARROW_SUBSTRAIT=ON \
ARROW_USE_CCACHE=ON \
ARROW_WITH_BROTLI=ON \
ARROW_WITH_BZ2=ON \
ARROW_WITH_LZ4=ON \
ARROW_WITH_OPENTELEMETRY=ON \
ARROW_WITH_SNAPPY=ON \
ARROW_WITH_ZLIB=ON \
ARROW_WITH_ZSTD=ON \
AWSSDK_SOURCE=BUNDLED \
Azure_SOURCE=BUNDLED \
CC=gcc${gcc:+-${gcc}} \
CXX=g++${gcc:+-${gcc}} \
google_cloud_cpp_storage_SOURCE=BUNDLED \
ORC_SOURCE=BUNDLED \
PATH=/usr/lib/ccache/:$PATH \
PYTHON=python3
8 changes: 4 additions & 4 deletions ci/docker/linux-apt-r.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,10 @@ RUN apt-get update -y && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*

ARG gcc_version=""
RUN if [ "${gcc_version}" != "" ]; then \
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-${gcc_version} 100 && \
update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-${gcc_version} 100 && \
ARG gcc=""
RUN if [ "${gcc}" != "" ]; then \
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-${gcc} 100 && \
update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-${gcc} 100 && \
update-alternatives --install /usr/bin/cc cc /usr/bin/gcc 30 && \
update-alternatives --set cc /usr/bin/gcc && \
update-alternatives --install /usr/bin/c++ c++ /usr/bin/g++ 30 && \
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

# NOTE: You must update PYTHON_WHEEL_WINDOWS_TEST_IMAGE_REVISION in .env
# when you update this file.

ARG base
# https://github.com/hadolint/hadolint/wiki/DL3006
# (Hadolint does not expand variables and thinks '${base}' is an untagged image)
# hadolint ignore=DL3006
FROM ${base}

ARG python=3.13

SHELL ["powershell", "-NoProfile", "-Command", "$ErrorActionPreference = 'Stop'; $ProgressPreference = 'SilentlyContinue';"]
RUN $filename = 'python-3.13.1-amd64.exe'; \
$url = 'https://www.python.org/ftp/python/3.13.1/' + $filename; \
Invoke-WebRequest -Uri $url -OutFile $filename; \
Start-Process -FilePath $filename -ArgumentList '/quiet', 'Include_freethreaded=1' -Wait

ENV PYTHON_CMD="py -${python}t"

SHELL ["cmd", "/S", "/C"]
RUN %PYTHON_CMD% -m pip install -U pip setuptools

COPY python/requirements-wheel-test.txt C:/arrow/python/
# Cython and Pandas wheels for 3.13 free-threaded are not released yet
RUN %PYTHON_CMD% -m pip install \
--extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple \
--pre \
--prefer-binary \
-r C:/arrow/python/requirements-wheel-test.txt
# cffi-based tests would crash when importing cffi.
# hadolint ignore=DL3059
RUN %PYTHON_CMD% -m pip uninstall -y cffi

ENV PYTHON="${python}t"
ENV PYTHON_GIL=0
Loading

0 comments on commit 7aee7dd

Please sign in to comment.