Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into dict_fsst_compression
Browse files Browse the repository at this point in the history
  • Loading branch information
Tishj committed Jan 7, 2025
2 parents 48b84bb + adc6f60 commit 16bd3a0
Show file tree
Hide file tree
Showing 724 changed files with 28,766 additions and 17,783 deletions.
85 changes: 6 additions & 79 deletions .github/actions/build_extensions_dockerized/action.yml
Original file line number Diff line number Diff line change
@@ -1,89 +1,13 @@
name: "Build Extensions"
description: "Build, test and deploy the DuckDB extensions"
name: "Build Extensions using Docker"
description: "Build and test the DuckDB extensions"
inputs:
# Test config
run_tests:
description: 'Run extension tests after build'
default: 1
run_autoload_tests:
description: 'Runs the autoloading tests'
default: 1

# Deploy config
deploy_as:
description: 'Binary architecture name for deploy step'
default: ''
deploy_version:
description: 'Version tag or commit short hash for deploy step'
default: ''
s3_id:
description: 'S3 key ID'
default: ''
s3_key:
description: 'S3 key secret'
default: ''
signing_pk:
description: 'Extension signing RSA private key'
default: ''

# Build config
duckdb_arch:
description: 'Provide DUCKDB_PLATFORM to build system for cross compilation'
default: ''
static_link_build:
description: 'Links DuckDB statically to the loadable extensions'
default: 1
no_static_linking:
description: 'Disables linking extensions into DuckDB for testing'
default: 0
vcpkg_build:
description: 'Installs vcpkg and pass its toolchain to CMakes'
default: 1
build_dir:
description: 'DuckDB source directory to run the build in'
default: '.'
ninja:
description: 'Use ninja for building'
default: 0
openssl_path:
description: 'Directory of OpenSSL installation'
default: ''
post_install:
description: 'Post-install scripts to run'
default: ''
treat_warn_as_error:
description: 'Treat compilation warnings as errors'
default: 1
build_in_tree_extensions:
description: 'Build in-tree extensions'
default: 1
build_out_of_tree_extensions:
description: 'Build out-of-tree extensions'
default: 1
osx_universal:
description: 'Build Universal Binary for OSX'
default: 0
osx_arch:
description: 'Build specific architecture for OSX'
default: ''
aarch64_cross_compile:
description: 'Enable Linux aarch64 cross-compiling'
default: 0
vcpkg_target_triplet:
description: 'Target triplet for installing vcpkg dependencies'
default: ''
override_cc:
description: 'Override C Compiler'
default: ''
override_cxx:
description: 'Override CXX Compiler'
default: ''
unittest_script:
description: 'Script/program to execute the unittests'
default: 'python3 scripts/run_tests_one_by_one.py ./build/release/test/unittest'
cmake_flags:
description: 'Flags to be passed to cmake'
default: ''

runs:
using: "composite"
Expand Down Expand Up @@ -131,7 +55,9 @@ runs:
echo "DUCKDB_PLATFORM=${{ inputs.duckdb_arch }}" >> docker_env.txt
echo "DUCKDB_GIT_VERSION=${{ inputs.override_git_describe }}" >> docker_env.txt
echo "LINUX_CI_IN_DOCKER=1" >> docker_env.txt
echo "TOOLCHAIN_FLAGS=''" >> docker_env.txt
echo "TOOLCHAIN_FLAGS=${{ inputs.duckdb_arch == 'linux_arm64' && '-DCMAKE_C_COMPILER=aarch64-linux-gnu-gcc -DCMAKE_CXX_COMPILER=aarch64-linux-gnu-g++ -DCMAKE_Fortran_COMPILER=aarch64-linux-gnu-gfortran' || '' }}" >> docker_env.txt
echo "CC=${{ inputs.duckdb_arch == 'linux_arm64' && 'aarch64-linux-gnu-gcc' || '' }}" >> docker_env.txt
echo "CXX=${{ inputs.duckdb_arch == 'linux_arm64' && 'aarch64-linux-gnu-g++' || '' }}" >> docker_env.txt
- name: Generate timestamp for Ccache entry
shell: cmake -P {0}
Expand Down Expand Up @@ -165,5 +91,6 @@ runs:
- name: Test extension (inside docker)
shell: bash
if: inputs.duckdb_arch != 'linux_arm64'
run: |
docker run --env-file=docker_env.txt -v `pwd`:/duckdb_build_dir -v `pwd`/ccache_dir:/ccache_dir duckdb/${{ inputs.duckdb_arch }} make test_release
40 changes: 16 additions & 24 deletions .github/config/out_of_tree_extensions.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -43,20 +43,14 @@ if (NOT MINGW AND NOT ${WASM_ENABLED})
)
endif()

### Currently libxml2, an azure dependency, has the repository repo return 503
### Re-enable AZURE when the problem goes away. This means AZURE needs to be
### build on a side
if (NO)
################# AZURE
if (NOT MINGW AND NOT ${WASM_ENABLED})
duckdb_extension_load(azure
LOAD_TESTS
GIT_URL https://github.com/duckdb/duckdb_azure
GIT_TAG a40ecb7bc9036eb8ecc5bf30db935a31b78011f5
APPLY_PATCHES
GIT_TAG 88011ee6ef66f223badc9beb04d4723651ac6623
)
endif()
endif()

################# DELTA
# MinGW build is not available, and our current manylinux ci does not have enough storage space to run the rust build
Expand All @@ -80,20 +74,20 @@ duckdb_extension_load(excel

################# ICEBERG
# Windows tests for iceberg currently not working
#if (NOT WIN32)
# set(LOAD_ICEBERG_TESTS "LOAD_TESTS")
#else ()
# set(LOAD_ICEBERG_TESTS "")
#endif()
#
#if (NOT MINGW AND NOT ${WASM_ENABLED})
# duckdb_extension_load(iceberg
# ${LOAD_ICEBERG_TESTS}
# GIT_URL https://github.com/duckdb/duckdb_iceberg
# GIT_TAG d62d91d8a089371c4d1862a88f2e62a97bc2af3a
# APPLY_PATCHES
# )
#endif()
if (NOT WIN32)
set(LOAD_ICEBERG_TESTS "LOAD_TESTS")
else ()
set(LOAD_ICEBERG_TESTS "")
endif()

if (NOT MINGW AND NOT ${WASM_ENABLED})
duckdb_extension_load(iceberg
${LOAD_ICEBERG_TESTS}
GIT_URL https://github.com/duckdb/duckdb_iceberg
GIT_TAG d62d91d8a089371c4d1862a88f2e62a97bc2af3a
APPLY_PATCHES
)
endif()

################# INET
duckdb_extension_load(inet
Expand Down Expand Up @@ -145,13 +139,11 @@ duckdb_extension_load(sqlite_scanner
APPLY_PATCHES
)

if (NOT ${WASM_ENABLED})
duckdb_extension_load(sqlsmith
DONT_LINK LOAD_TESTS
GIT_URL https://github.com/duckdb/duckdb_sqlsmith
GIT_TAG d6d62c1cba6b1369ba79db4bff3c67f24aaa95c2
GIT_TAG b13723fe701f1e38d2cd65b3b6eb587c6553a251
)
endif()

################# VSS
duckdb_extension_load(vss
Expand Down
53 changes: 0 additions & 53 deletions .github/patches/extensions/azure/reformat_string_functions.patch

This file was deleted.

19 changes: 0 additions & 19 deletions .github/workflows/LinuxRelease.yml
Original file line number Diff line number Diff line change
Expand Up @@ -206,16 +206,7 @@ jobs:
- uses: ./duckdb/.github/actions/build_extensions_dockerized
with:
vcpkg_target_triplet: x64-linux
deploy_as: linux_amd64
duckdb_arch: linux_amd64
treat_warn_as_error: 0
s3_id: ${{ secrets.S3_ID }}
s3_key: ${{ secrets.S3_KEY }}
signing_pk: ${{ secrets.DUCKDB_EXTENSION_SIGNING_PK }}
aarch64_cross_compile: 1
run_tests: 0 # Cannot run tests here due to cross-compiling
run_autoload_tests: 0
ninja: 1

- uses: actions/upload-artifact@v4
with:
Expand Down Expand Up @@ -248,7 +239,6 @@ jobs:
name: Linux Extensions (aarch64)
runs-on: ubuntu-latest
needs: linux-extensions-64
if: ${{ github.ref == 'refs/heads/main' || github.ref == 'refs/heads/feature' }}

steps:
- uses: actions/checkout@v3
Expand All @@ -260,16 +250,7 @@ jobs:
- uses: ./duckdb/.github/actions/build_extensions_dockerized
with:
vcpkg_target_triplet: arm64-linux
deploy_as: linux_arm64
duckdb_arch: linux_arm64
treat_warn_as_error: 0
s3_id: ${{ secrets.S3_ID }}
s3_key: ${{ secrets.S3_KEY }}
signing_pk: ${{ secrets.DUCKDB_EXTENSION_SIGNING_PK }}
aarch64_cross_compile: 1
run_tests: 0 # Cannot run tests here due to cross-compiling
run_autoload_tests: 0
ninja: 1

- uses: actions/upload-artifact@v4
with:
Expand Down
2 changes: 0 additions & 2 deletions .github/workflows/OSX.yml
Original file line number Diff line number Diff line change
Expand Up @@ -208,8 +208,6 @@ jobs:
run_autoload_tests: ${{ inputs.skip_tests != 'true' && 1 || 0 }}

env:
DUCKDB_EXTENSION_SIGNING_PK: ${{ secrets.DUCKDB_EXTENSION_SIGNING_PK }}
AWS_DEFAULT_REGION: us-east-1
VCPKG_TARGET_TRIPLET: ${{ matrix.vcpkg_triplet }}
GEN: ninja
DUCKDB_PLATFORM: ${{ matrix.duckdb_arch }}
Expand Down
9 changes: 0 additions & 9 deletions .github/workflows/Python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -141,16 +141,7 @@ jobs:
- uses: ./duckdb/.github/actions/build_extensions_dockerized
with:
vcpkg_target_triplet: x64-linux
deploy_as: linux_amd64_gcc4
duckdb_arch: linux_amd64_gcc4
treat_warn_as_error: 0
s3_id: ${{ secrets.S3_ID }}
s3_key: ${{ secrets.S3_KEY }}
signing_pk: ${{ secrets.DUCKDB_EXTENSION_SIGNING_PK }}
aarch64_cross_compile: 1
run_tests: 0 # Cannot run tests here due to cross-compiling
run_autoload_tests: 0
ninja: 1

- uses: actions/upload-artifact@v4
with:
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/_sign_deploy_extensions.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ jobs:
AWS_ACCESS_KEY_ID: ${{secrets.S3_ID}}
AWS_SECRET_ACCESS_KEY: ${{secrets.S3_KEY}}
DUCKDB_DEPLOY_SCRIPT_MODE: for_real
DUCKDB_EXTENSION_SIGNING_PK: ${{ secrets.DUCKDB_EXTENSION_SIGNING_PK }}
AWS_DEFAULT_REGION: us-east-1
run: |
pip install awscli
TARGET_SHA=${{ inputs.duckdb_sha }}
Expand Down
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Copyright 2018-2024 Stichting DuckDB Foundation
Copyright 2018-2025 Stichting DuckDB Foundation

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

DuckDB is a high-performance analytical database system. It is designed to be fast, reliable, portable, and easy to use. DuckDB provides a rich SQL dialect, with support far beyond basic SQL. DuckDB supports arbitrary and nested correlated subqueries, window functions, collations, complex types (arrays, structs, maps), and [several extensions designed to make SQL easier to use](https://duckdb.org/docs/guides/sql_features/friendly_sql).

DuckDB is available as a [standalone CLI application](https://duckdb.org/docs/api/cli/overview) and has clients for [Python](https://duckdb.org/docs/api/python/overview), [R](https://duckdb.org/docs/api/r), [Java](https://duckdb.org/docs/api/java), [Wasm](https://duckdb.org/docs/api/wasm/overview), etc., with deep integrations with packages such as [pandas](https://duckdb.org/docs/guides/python/sql_on_pandas) and [dplyr](https://duckdblabs.github.io/duckplyr/).
DuckDB is available as a [standalone CLI application](https://duckdb.org/docs/api/cli/overview) and has clients for [Python](https://duckdb.org/docs/api/python/overview), [R](https://duckdb.org/docs/api/r), [Java](https://duckdb.org/docs/api/java), [Wasm](https://duckdb.org/docs/api/wasm/overview), etc., with deep integrations with packages such as [pandas](https://duckdb.org/docs/guides/python/sql_on_pandas) and [dplyr](https://duckdb.org/docs/api/r#duckplyr-dplyr-api).

For more information on using DuckDB, please refer to the [DuckDB documentation](https://duckdb.org/docs/).

Expand Down
2 changes: 1 addition & 1 deletion benchmark/micro/join/asof_join_small_probe.benchmark
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,4 @@ ASOF JOIN "build"
;

result I
4555765974
4560929499
23 changes: 23 additions & 0 deletions benchmark/micro/join/external_join_partition_order.benchmark
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# name: benchmark/micro/join/external_join_partition_order.benchmark
# description: Test that the external hash join partition selection selects even partitions sequentially
# group: [join]

name External Join Partition Order
group join

cache external_join_partition_order.duckdb

load
create table build as select range c from range(1000e5::bigint);
create table probe as select range c from range(1000e5::bigint);

init
set threads=4;
set temp_directory='${BENCHMARK_DIR}/external_join_partition_order.duckdb.tmp';
set memory_limit='1000mb';

run
select count(*) from probe join build using (c)

result I
100000000
24 changes: 24 additions & 0 deletions benchmark/micro/join/external_join_partition_selection.benchmark
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# name: benchmark/micro/join/external_join_partition_selection.benchmark
# description: Test that the external hash join partition selection selects the large partition last
# group: [join]

name External Join Partition Selection
group join

cache external_join_partition_selection.duckdb

load
create table build as select range * 2 c from range(100e5::bigint);
insert into build select 42 c from range(100e5::bigint);
create table probe as select range c from range(1000e5::bigint);

init
set threads=4;
set temp_directory='${BENCHMARK_DIR}/external_join_partition_selection.duckdb.tmp';
set memory_limit='500mb';

run
select count(*) from probe join build using (c)

result I
20000000
2 changes: 1 addition & 1 deletion benchmark/micro/join/left_outer_join_right_big.benchmark
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,4 @@ run
SELECT count(*) FROM small_table d LEFT OUTER JOIN big_table f ON ( d.pkey=f.c_10k);

result I
99995004
99995018
2 changes: 1 addition & 1 deletion benchmark/micro/join/positional_join.benchmark
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,4 @@ SELECT COUNT(*), SUM(df1_0), SUM(df2_0),
FROM df1 POSITIONAL JOIN df2;

result III
10000000 499988274 499879479
10000000 499831718 499971590
Loading

0 comments on commit 16bd3a0

Please sign in to comment.