From a689220a5d7f84f85d36311bb0cf017061eb72d9 Mon Sep 17 00:00:00 2001 From: Steve Purcell Date: Sun, 1 Oct 2023 14:27:51 +0200 Subject: [PATCH] duckdb: Incorporate changes from #257452 --- pkgs/development/libraries/duckdb/default.nix | 66 +++++++------------ .../libraries/duckdb/version.patch | 58 ---------------- .../python-modules/duckdb-engine/default.nix | 9 ++- .../duckdb-engine/remote_data.patch | 24 +++++++ .../python-modules/duckdb/default.nix | 19 ++++-- .../python-modules/duckdb/setup.patch | 22 +++++++ .../python-modules/ibis-framework/default.nix | 11 ++++ 7 files changed, 100 insertions(+), 109 deletions(-) delete mode 100644 pkgs/development/libraries/duckdb/version.patch create mode 100644 pkgs/development/python-modules/duckdb-engine/remote_data.patch create mode 100644 pkgs/development/python-modules/duckdb/setup.patch diff --git a/pkgs/development/libraries/duckdb/default.nix b/pkgs/development/libraries/duckdb/default.nix index 28f287b945e40..201a8533674e8 100644 --- a/pkgs/development/libraries/duckdb/default.nix +++ b/pkgs/development/libraries/duckdb/default.nix @@ -2,6 +2,7 @@ , stdenv , fetchFromGitHub , cmake +, git , ninja , openssl , openjdk11 @@ -20,35 +21,24 @@ stdenv.mkDerivation rec { src = fetchFromGitHub { owner = pname; repo = pname; + deepClone = true; rev = "v${version}"; hash = "sha256-EKvDH7RwOC4Gu/lturrfnGpzXnJ9azIwAFeuVoa6L/Y="; }; - patches = [ ./version.patch ]; + nativeBuildInputs = [ cmake git ninja ]; - postPatch = '' - substituteInPlace CMakeLists.txt --subst-var-by DUCKDB_VERSION "v${version}" - ''; - - nativeBuildInputs = [ cmake ninja ]; buildInputs = [ openssl ] ++ lib.optionals withJdbc [ openjdk11 ] ++ lib.optionals withOdbc [ unixODBC ]; cmakeFlags = [ - "-DBUILD_AUTOCOMPLETE_EXTENSION=ON" + # use similar flags to what is defined in ${src}/.github/workflow/{LinuxRelease,OSX}.yml "-DBUILD_ICU_EXTENSION=ON" + "-DDEBUG_STACKTRACE=1" "-DBUILD_PARQUET_EXTENSION=ON" + "-DDUCKDB_EXTENSION_CONFIGS=${src}/.github/config/bundled_extensions.cmake" "-DBUILD_TPCH_EXTENSION=ON" - "-DBUILD_TPCDS_EXTENSION=ON" - "-DBUILD_FTS_EXTENSION=ON" - "-DBUILD_HTTPFS_EXTENSION=ON" - "-DBUILD_VISUALIZER_EXTENSION=ON" - "-DBUILD_JSON_EXTENSION=ON" - "-DBUILD_JEMALLOC_EXTENSION=ON" - "-DBUILD_EXCEL_EXTENSION=ON" - "-DBUILD_INET_EXTENSION=ON" - "-DBUILD_TPCE=ON" "-DBUILD_ODBC_DRIVER=${enableFeature withOdbc}" "-DJDBC_DRIVER=${enableFeature withJdbc}" ] ++ lib.optionals doInstallCheck [ @@ -66,44 +56,34 @@ stdenv.mkDerivation rec { installCheckPhase = let - excludes = map (pattern: "exclude:'${pattern}'") [ - "[s3]" + excludes = map (pattern: "exclude:'${pattern}'") ([ + "Test using a remote optimizer pass in case thats important to someone" "Test closing database during long running query" - "test/common/test_cast_hugeint.test" - "test/sql/copy/csv/test_csv_remote.test" - "test/sql/copy/parquet/test_parquet_remote.test" - "test/sql/copy/parquet/test_parquet_remote_foreign_files.test" - "test/sql/storage/compression/chimp/chimp_read.test" - "test/sql/storage/compression/chimp/chimp_read_float.test" - "test/sql/storage/compression/patas/patas_compression_ratio.test_coverage" - "test/sql/storage/compression/patas/patas_read.test" - "test/sql/json/read_json_objects.test" - "test/sql/json/read_json.test" - "test/sql/copy/parquet/parquet_5968.test" "test/fuzzer/pedro/buffer_manager_out_of_memory.test" - "test/sql/storage/compression/bitpacking/bitpacking_size_calculation.test" + "test/sql/copy/csv/parallel/csv_parallel_httpfs.test" + "test/sql/copy/csv/parallel/test_parallel_csv.test" + "test/sql/copy/csv/test_csv_httpfs_prepared.test" + "test/sql/copy/csv/test_csv_httpfs.test" + "test/sql/copy/csv/test_csv_remote.test" "test/sql/copy/parquet/delta_byte_array_length_mismatch.test" - "test/sql/function/timestamp/test_icu_strptime.test" - "test/sql/timezone/test_icu_timezone.test" + "test/sql/copy/parquet/delta_byte_array_multiple_pages.test" + "test/sql/copy/parquet/parquet_5968.test" "test/sql/copy/parquet/snowflake_lineitem.test" "test/sql/copy/parquet/test_parquet_force_download.test" - "test/sql/copy/parquet/delta_byte_array_multiple_pages.test" - "test/sql/copy/csv/test_csv_httpfs_prepared.test" - "test/sql/copy/csv/test_csv_httpfs.test" - "test/sql/copy/csv/parallel/test_parallel_csv.test" - "test/sql/copy/csv/parallel/csv_parallel_httpfs.test" - "test/common/test_cast_struct.test" - # test is order sensitive - "test/sql/copy/parquet/parquet_glob.test" + "test/sql/copy/parquet/test_parquet_remote_foreign_files.test" + "test/sql/copy/parquet/test_parquet_remote.test" + "test/sql/json/table/read_json_objects.test" + "test/sql/json/table/read_json.test" + "test/sql/settings/test_disabled_file_system_httpfs.test" + "test/sql/storage/compression/bitpacking/bitpacking_size_calculation.test" + # these are only hidden if no filters are passed in "[!hide]" - # this test apparently never terminates - "test/sql/copy/csv/auto/test_csv_auto.test" ] ++ lib.optionals stdenv.isAarch64 [ "test/sql/aggregate/aggregates/test_kurtosis.test" "test/sql/aggregate/aggregates/test_skewness.test" "test/sql/function/list/aggregates/skewness.test" - ]; + ]); in '' runHook preInstallCheck diff --git a/pkgs/development/libraries/duckdb/version.patch b/pkgs/development/libraries/duckdb/version.patch deleted file mode 100644 index 1f52fdb3b9351..0000000000000 --- a/pkgs/development/libraries/duckdb/version.patch +++ /dev/null @@ -1,58 +0,0 @@ -diff --git a/CMakeLists.txt b/CMakeLists.txt -index 349af6acf7..7ffec0b4cb 100644 ---- a/CMakeLists.txt -+++ b/CMakeLists.txt -@@ -196,52 +196,7 @@ if(${CMAKE_SYSTEM_NAME} STREQUAL "SunOS") - set(SUN TRUE) - endif() - --find_package(Git) --if(Git_FOUND) -- if (NOT DEFINED GIT_COMMIT_HASH) -- execute_process( -- COMMAND ${GIT_EXECUTABLE} log -1 --format=%h -- WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} -- RESULT_VARIABLE GIT_RESULT -- OUTPUT_VARIABLE GIT_COMMIT_HASH -- OUTPUT_STRIP_TRAILING_WHITESPACE) -- endif() -- execute_process( -- COMMAND ${GIT_EXECUTABLE} describe --tags --abbrev=0 -- WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} -- OUTPUT_VARIABLE GIT_LAST_TAG -- OUTPUT_STRIP_TRAILING_WHITESPACE) -- execute_process( -- COMMAND ${GIT_EXECUTABLE} describe --tags --long -- WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} -- OUTPUT_VARIABLE GIT_ITERATION -- OUTPUT_STRIP_TRAILING_WHITESPACE) --else() -- message("Git NOT FOUND") --endif() -- --if(GIT_RESULT EQUAL "0") -- string(REGEX REPLACE "v([0-9]+).[0-9]+.[0-9]+" "\\1" DUCKDB_MAJOR_VERSION "${GIT_LAST_TAG}") -- string(REGEX REPLACE "v[0-9]+.([0-9]+).[0-9]+" "\\1" DUCKDB_MINOR_VERSION "${GIT_LAST_TAG}") -- string(REGEX REPLACE "v[0-9]+.[0-9]+.([0-9]+)" "\\1" DUCKDB_PATCH_VERSION "${GIT_LAST_TAG}") -- string(REGEX REPLACE ".*-([0-9]+)-.*" "\\1" DUCKDB_DEV_ITERATION "${GIT_ITERATION}") -- -- if(DUCKDB_DEV_ITERATION EQUAL 0) -- # on a tag; directly use the version -- set(DUCKDB_VERSION "${GIT_LAST_TAG}") -- else() -- # not on a tag, increment the patch version by one and add a -devX suffix -- math(EXPR DUCKDB_PATCH_VERSION "${DUCKDB_PATCH_VERSION}+1") -- set(DUCKDB_VERSION "v${DUCKDB_MAJOR_VERSION}.${DUCKDB_MINOR_VERSION}.${DUCKDB_PATCH_VERSION}-dev${DUCKDB_DEV_ITERATION}") -- endif() --else() -- # fallback for when building from tarball -- set(DUCKDB_MAJOR_VERSION 0) -- set(DUCKDB_MINOR_VERSION 0) -- set(DUCKDB_PATCH_VERSION 1) -- set(DUCKDB_DEV_ITERATION 0) -- set(DUCKDB_VERSION "v${DUCKDB_MAJOR_VERSION}.${DUCKDB_MINOR_VERSION}.${DUCKDB_PATCH_VERSION}-dev${DUCKDB_DEV_ITERATION}") --endif() -+set(DUCKDB_VERSION "@DUCKDB_VERSION@") - - message(STATUS "git hash ${GIT_COMMIT_HASH}, version ${DUCKDB_VERSION}") - diff --git a/pkgs/development/python-modules/duckdb-engine/default.nix b/pkgs/development/python-modules/duckdb-engine/default.nix index f2b49cd0ea7be..9634c8fa128c8 100644 --- a/pkgs/development/python-modules/duckdb-engine/default.nix +++ b/pkgs/development/python-modules/duckdb-engine/default.nix @@ -14,7 +14,7 @@ buildPythonPackage rec { pname = "duckdb-engine"; - version = "0.7.3"; + version = "0.9.2"; format = "pyproject"; disabled = pythonOlder "3.7"; @@ -23,9 +23,11 @@ buildPythonPackage rec { repo = "duckdb_engine"; owner = "Mause"; rev = "refs/tags/v${version}"; - hash = "sha256-Z9m1+Bc/csWKdPDuwf82xX0qOiD1Y5LBgJjUlLntAO8="; + hash = "sha256-T02nGF+YlughRQPinb0I3NC6xsarh4+qRhG8YfhTvhI="; }; + patches = [ ./remote_data.patch ]; + nativeBuildInputs = [ poetry-core ]; @@ -42,8 +44,11 @@ buildPythonPackage rec { disabledTests = [ # this test tries to download the httpfs extension "test_preload_extension" + "test_motherduck" # test should be skipped based on sqlalchemy version but isn't and fails "test_commit" + # rowcount no longer generates an attribute error. + "test_rowcount" ]; nativeCheckInputs = [ diff --git a/pkgs/development/python-modules/duckdb-engine/remote_data.patch b/pkgs/development/python-modules/duckdb-engine/remote_data.patch new file mode 100644 index 0000000000000..dd53c00f65097 --- /dev/null +++ b/pkgs/development/python-modules/duckdb-engine/remote_data.patch @@ -0,0 +1,24 @@ +diff --git a/duckdb_engine/tests/test_basic.py b/duckdb_engine/tests/test_basic.py +index 302636f..ed20f12 100644 +--- a/duckdb_engine/tests/test_basic.py ++++ b/duckdb_engine/tests/test_basic.py +@@ -183,7 +183,6 @@ def test_get_views(engine: Engine) -> None: + + + @mark.skipif(os.uname().machine == "aarch64", reason="not supported on aarch64") +-@mark.remote_data + def test_preload_extension() -> None: + duckdb.default_connection.execute("INSTALL httpfs") + engine = create_engine( +diff --git a/duckdb_engine/tests/test_integration.py b/duckdb_engine/tests/test_integration.py +index 349c976..bf3dbaa 100644 +--- a/duckdb_engine/tests/test_integration.py ++++ b/duckdb_engine/tests/test_integration.py +@@ -24,7 +24,6 @@ def test_integration(engine: Engine) -> None: + conn.execute(text("select * from test_df")) + + +-@mark.remote_data + @mark.skipif( + "dev" in duckdb.__version__, reason="md extension not available for dev builds" # type: ignore[attr-defined] + ) \ No newline at end of file diff --git a/pkgs/development/python-modules/duckdb/default.nix b/pkgs/development/python-modules/duckdb/default.nix index 8989bd5d46c16..0d313f1a12064 100644 --- a/pkgs/development/python-modules/duckdb/default.nix +++ b/pkgs/development/python-modules/duckdb/default.nix @@ -2,8 +2,10 @@ , buildPythonPackage , duckdb , fsspec +, git , google-cloud-storage , numpy +, openssl , pandas , psutil , pybind11 @@ -12,9 +14,12 @@ }: buildPythonPackage rec { - inherit (duckdb) pname version src patches; + inherit (duckdb) pname version src; format = "setuptools"; + BUILD_HTTPFS = 1; + patches = [ ./setup.patch ]; + postPatch = '' # we can't use sourceRoot otherwise patches don't apply, because the patches apply to the C++ library cd tools/pythonpkg @@ -22,30 +27,32 @@ buildPythonPackage rec { # 1. let nix control build cores # 2. unconstrain setuptools_scm version substituteInPlace setup.py \ - --replace "multiprocessing.cpu_count()" "$NIX_BUILD_CORES" \ - --replace "setuptools_scm<7.0.0" "setuptools_scm" + --replace "multiprocessing.cpu_count()" "$NIX_BUILD_CORES" - # avoid dependency on mypy - rm tests/stubs/test_stubs.py + # avoid dependency on mypy + rm tests/stubs/test_stubs.py ''; SETUPTOOLS_SCM_PRETEND_VERSION = version; nativeBuildInputs = [ + git pybind11 setuptools-scm ]; + buildInputs = [ openssl ]; + propagatedBuildInputs = [ numpy pandas ]; nativeCheckInputs = [ + fsspec google-cloud-storage psutil pytestCheckHook - fsspec ]; disabledTests = [ diff --git a/pkgs/development/python-modules/duckdb/setup.patch b/pkgs/development/python-modules/duckdb/setup.patch new file mode 100644 index 0000000000000..9649f18117602 --- /dev/null +++ b/pkgs/development/python-modules/duckdb/setup.patch @@ -0,0 +1,22 @@ +diff --git a/tools/pythonpkg/setup.py b/tools/pythonpkg/setup.py +index fdf2911019..c363cc518a 100644 +--- a/tools/pythonpkg/setup.py ++++ b/tools/pythonpkg/setup.py +@@ -163,8 +163,6 @@ if 'BUILD_HTTPFS' in os.environ: + for ext in extensions: + toolchain_args.extend(['-DDUCKDB_EXTENSION_{}_LINKED'.format(ext.upper())]) + +-toolchain_args.extend(['-DDUCKDB_EXTENSION_AUTOLOAD_DEFAULT=1', '-DDUCKDB_EXTENSION_AUTOINSTALL_DEFAULT=1']) +- + + class get_pybind_include(object): + def __init__(self, user=False): +@@ -343,7 +341,7 @@ setup( + packages=packages, + include_package_data=True, + python_requires='>=3.7.0', +- setup_requires=setup_requires + ["setuptools_scm<7.0.0", 'pybind11>=2.6.0'], ++ setup_requires=setup_requires + ["setuptools_scm", 'pybind11>=2.6.0'], + use_scm_version=setuptools_scm_conf, + tests_require=['google-cloud-storage', 'mypy', 'pytest'], + classifiers=[ \ No newline at end of file diff --git a/pkgs/development/python-modules/ibis-framework/default.nix b/pkgs/development/python-modules/ibis-framework/default.nix index 16274bc3f00df..9b77a6be51d0d 100644 --- a/pkgs/development/python-modules/ibis-framework/default.nix +++ b/pkgs/development/python-modules/ibis-framework/default.nix @@ -130,6 +130,17 @@ buildPythonPackage rec { # tries to download duckdb extensions "--deselect=ibis/backends/duckdb/tests/test_register.py::test_register_sqlite" "--deselect=ibis/backends/duckdb/tests/test_register.py::test_read_sqlite" + + # duckdb does not respect sample_size=2 (reads 3 lines of csv). + "--deselect=ibis/backends/tests/test_register.py::test_csv_reregister_schema" + + # duckdb fails with: + # "This function can not be called with an active transaction!, commit or abort the existing one first" + "--deselect=ibis/backends/tests/test_udf.py::test_vectorized_udf" + "--deselect=ibis/backends/tests/test_udf.py::test_map_merge_udf" + "--deselect=ibis/backends/tests/test_udf.py::test_udf" + "--deselect=ibis/backends/tests/test_udf.py::test_map_udf" + # pyarrow13 is not supported yet. "--deselect=ibis/backends/tests/test_temporal.py::test_date_truncate" "--deselect=ibis/backends/tests/test_temporal.py::test_integer_to_interval_timestamp"