diff --git a/.github/config/out_of_tree_extensions.cmake b/.github/config/out_of_tree_extensions.cmake index 858c897af81d..8cebc0bdc7c4 100644 --- a/.github/config/out_of_tree_extensions.cmake +++ b/.github/config/out_of_tree_extensions.cmake @@ -58,6 +58,7 @@ duckdb_extension_load(spatial GIT_TAG 36e5a126976ac3b66716893360ef7e6295707082 INCLUDE_DIR spatial/include TEST_DIR test/sql + APPLY_PATCHES ) ################# SQLITE_SCANNER diff --git a/.github/patches/extensions/spatial/const_copy_param.patch b/.github/patches/extensions/spatial/const_copy_param.patch new file mode 100644 index 000000000000..1b50a8e5e5f4 --- /dev/null +++ b/.github/patches/extensions/spatial/const_copy_param.patch @@ -0,0 +1,15 @@ +diff --git a/spatial/src/spatial/gdal/functions/st_write.cpp b/spatial/src/spatial/gdal/functions/st_write.cpp +index 36a71da..15ebcf4 100644 +--- a/spatial/src/spatial/gdal/functions/st_write.cpp ++++ b/spatial/src/spatial/gdal/functions/st_write.cpp +@@ -55,8 +55,8 @@ struct GlobalState : public GlobalFunctionData { + //===--------------------------------------------------------------------===// + // Bind + //===--------------------------------------------------------------------===// +-static unique_ptr Bind(ClientContext &context, CopyInfo &info, vector &names, +- vector &sql_types) { ++static unique_ptr Bind(ClientContext &context, const CopyInfo &info, const vector &names, ++ const vector &sql_types) { + + GdalFileHandler::SetLocalClientContext(context); + diff --git a/.github/workflows/InternalIssuesCreateMirror.yml b/.github/workflows/InternalIssuesCreateMirror.yml index 9c56ff546a67..94dcf4dd9d2b 100644 --- a/.github/workflows/InternalIssuesCreateMirror.yml +++ b/.github/workflows/InternalIssuesCreateMirror.yml @@ -26,7 +26,7 @@ jobs: - name: Get mirror issue number run: | - gh issue list --repo duckdblabs/duckdb-internal --json title,number --jq ".[] | select(.title | startswith(\"$TITLE_PREFIX\")).number" > mirror_issue_number.txt + gh issue list --repo duckdblabs/duckdb-internal --search "${TITLE_PREFIX}" --json title,number --jq ".[] | select(.title | startswith(\"$TITLE_PREFIX\")).number" > mirror_issue_number.txt echo "MIRROR_ISSUE_NUMBER=$(cat mirror_issue_number.txt)" >> $GITHUB_ENV - name: Print whether mirror issue exists diff --git a/.github/workflows/InternalIssuesUpdateMirror.yml b/.github/workflows/InternalIssuesUpdateMirror.yml index ba50e59c8d9f..58c53b117275 100644 --- a/.github/workflows/InternalIssuesUpdateMirror.yml +++ b/.github/workflows/InternalIssuesUpdateMirror.yml @@ -15,7 +15,7 @@ jobs: steps: - name: Get mirror issue number run: | - gh issue list --repo duckdblabs/duckdb-internal --json title,number --jq ".[] | select(.title | startswith(\"$TITLE_PREFIX\")).number" > mirror_issue_number.txt + gh issue list --repo duckdblabs/duckdb-internal --search "${TITLE_PREFIX}" --json title,number --jq ".[] | select(.title | startswith(\"$TITLE_PREFIX\")).number" > mirror_issue_number.txt echo "MIRROR_ISSUE_NUMBER=$(cat mirror_issue_number.txt)" >> $GITHUB_ENV - name: Print whether mirror issue exists diff --git a/.github/workflows/NeedsDocumentation.yml b/.github/workflows/NeedsDocumentation.yml new file mode 100644 index 000000000000..e9fcaf9c82c1 --- /dev/null +++ b/.github/workflows/NeedsDocumentation.yml @@ -0,0 +1,37 @@ +name: Create Documentation issue for the Needs Documentation label +on: + issues: + types: + - labeled + pull_request: + types: + - labeled + +env: + GH_TOKEN: ${{ secrets.DUCKDBLABS_BOT_TOKEN }} + TITLE_PREFIX: "duckdb/#${{ github.event.issue.number }}]" + PUBLIC_ISSUE_TITLE: ${{ github.event.issue.title }} + +jobs: + create_documentation_issue: + if: github.event.label.name == 'Needs Documentation' + runs-on: ubuntu-latest + steps: + - name: Get mirror issue number + run: | + gh issue list --repo duckdb/duckdb-web --json title,number --jq ".[] | select(.title | startswith(\"${TITLE_PREFIX}\")).number" > mirror_issue_number.txt + echo "MIRROR_ISSUE_NUMBER=$(cat mirror_issue_number.txt)" >> ${GITHUB_ENV} + + - name: Print whether mirror issue exists + run: | + if [ "${MIRROR_ISSUE_NUMBER}" == "" ]; then + echo "Mirror issue with title prefix '${TITLE_PREFIX}' does not exist yet" + else + echo "Mirror issue with title prefix '${TITLE_PREFIX}' exists with number ${MIRROR_ISSUE_NUMBER}" + fi + + - name: Create mirror issue if it does not yet exist + run: | + if [ "${MIRROR_ISSUE_NUMBER}" == "" ]; then + gh issue create --repo duckdb/duckdb-web --title "${TITLE_PREFIX} - ${PUBLIC_ISSUE_TITLE} needs documentation" --body "See https://github.com/duckdb/duckdb/issues/${{ github.event.issue.number }}" + fi diff --git a/.github/workflows/R_CMD_CHECK.yml b/.github/workflows/R_CMD_CHECK.yml index 2c0fb5794d09..6e0a873f2033 100644 --- a/.github/workflows/R_CMD_CHECK.yml +++ b/.github/workflows/R_CMD_CHECK.yml @@ -79,6 +79,15 @@ jobs: extra-packages: any::rcmdcheck needs: check + - name: Apply duckdb-r patches + shell: bash + working-directory: ${{ env.DUCKDB_SRC }} + run: | + shopt -s nullglob + for filename in .github/patches/duckdb-r/*.patch; do + git apply $filename + done + # needed so we can run git commit in vendor.sh - name: setup github and create parallel builds shell: bash diff --git a/.github/workflows/Regression.yml b/.github/workflows/Regression.yml index bb2c81b1a253..9979c090df3e 100644 --- a/.github/workflows/Regression.yml +++ b/.github/workflows/Regression.yml @@ -224,7 +224,7 @@ jobs: - uses: actions/setup-python@v4 with: - python-version: '3.7' + python-version: '3.11' - name: Install shell: bash diff --git a/CMakeLists.txt b/CMakeLists.txt index 82ee76200419..359f70d49c41 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -661,7 +661,7 @@ function(build_loadable_extension_directory NAME OUTPUT_DIRECTORY PARAMETERS) if(WASM_LOADABLE_EXTENSIONS) set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -sSIDE_MODULE=1 -DWASM_LOADABLE_EXTENSIONS") elseif (EXTENSION_STATIC_BUILD) - if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") + if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU" OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang") if (APPLE) set_target_properties(${TARGET_NAME} PROPERTIES CXX_VISIBILITY_PRESET hidden) # Note that on MacOS we need to use the -exported_symbol whitelist feature due to a lack of -exclude-libs flag in mac's ld variant diff --git a/extension/parquet/parquet_extension.cpp b/extension/parquet/parquet_extension.cpp index e897645e0c4e..d61ddafa7e9b 100644 --- a/extension/parquet/parquet_extension.cpp +++ b/extension/parquet/parquet_extension.cpp @@ -740,8 +740,8 @@ static void GetFieldIDs(const Value &field_ids_value, ChildFieldIDs &field_ids, } } -unique_ptr ParquetWriteBind(ClientContext &context, CopyInfo &info, vector &names, - vector &sql_types) { +unique_ptr ParquetWriteBind(ClientContext &context, const CopyInfo &info, const vector &names, + const vector &sql_types) { D_ASSERT(names.size() == sql_types.size()); bool row_group_size_bytes_set = false; auto bind_data = make_uniq(); diff --git a/src/catalog/catalog_entry/view_catalog_entry.cpp b/src/catalog/catalog_entry/view_catalog_entry.cpp index 1f41f740bc0c..9c3a12cabdd7 100644 --- a/src/catalog/catalog_entry/view_catalog_entry.cpp +++ b/src/catalog/catalog_entry/view_catalog_entry.cpp @@ -32,6 +32,7 @@ unique_ptr ViewCatalogEntry::GetInfo() const { result->query = unique_ptr_cast(query->Copy()); result->aliases = aliases; result->types = types; + result->temporary = temporary; return std::move(result); } @@ -58,23 +59,16 @@ string ViewCatalogEntry::ToSQL() const { //! Return empty sql with view name so pragma view_tables don't complain return sql; } - return sql + "\n;"; + auto info = GetInfo(); + auto result = info->ToString(); + return result + ";\n"; } unique_ptr ViewCatalogEntry::Copy(ClientContext &context) const { D_ASSERT(!internal); - CreateViewInfo create_info(schema, name); - create_info.query = unique_ptr_cast(query->Copy()); - for (idx_t i = 0; i < aliases.size(); i++) { - create_info.aliases.push_back(aliases[i]); - } - for (idx_t i = 0; i < types.size(); i++) { - create_info.types.push_back(types[i]); - } - create_info.temporary = temporary; - create_info.sql = sql; + auto create_info = GetInfo(); - return make_uniq(catalog, schema, create_info); + return make_uniq(catalog, schema, create_info->Cast()); } } // namespace duckdb diff --git a/src/common/arrow/appender/union_data.cpp b/src/common/arrow/appender/union_data.cpp index cfe54f89b418..3adb8d05da54 100644 --- a/src/common/arrow/appender/union_data.cpp +++ b/src/common/arrow/appender/union_data.cpp @@ -24,7 +24,7 @@ void ArrowUnionData::Append(ArrowAppendData &append_data, Vector &input, idx_t f duckdb::vector child_vectors; for (const auto &child : UnionType::CopyMemberTypes(input.GetType())) { - child_vectors.emplace_back(child.second); + child_vectors.emplace_back(child.second, size); } for (idx_t input_idx = from; input_idx < to; input_idx++) { diff --git a/src/common/enum_util.cpp b/src/common/enum_util.cpp index 2d39793634f5..567182a4de1b 100644 --- a/src/common/enum_util.cpp +++ b/src/common/enum_util.cpp @@ -6010,6 +6010,8 @@ const char* EnumUtil::ToChars(UnionInvalidReason value) { return "VALIDITY_OVERLAP"; case UnionInvalidReason::TAG_MISMATCH: return "TAG_MISMATCH"; + case UnionInvalidReason::NULL_TAG: + return "NULL_TAG"; default: throw NotImplementedException(StringUtil::Format("Enum value: '%d' not implemented", value)); } @@ -6032,6 +6034,9 @@ UnionInvalidReason EnumUtil::FromString(const char *value) { if (StringUtil::Equals(value, "TAG_MISMATCH")) { return UnionInvalidReason::TAG_MISMATCH; } + if (StringUtil::Equals(value, "NULL_TAG")) { + return UnionInvalidReason::NULL_TAG; + } throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value)); } diff --git a/src/common/types/list_segment.cpp b/src/common/types/list_segment.cpp index de350b605cb1..2a14718bf382 100644 --- a/src/common/types/list_segment.cpp +++ b/src/common/types/list_segment.cpp @@ -462,6 +462,10 @@ void SegmentPrimitiveFunction(ListSegmentFunctions &functions) { void GetSegmentDataFunctions(ListSegmentFunctions &functions, const LogicalType &type) { + if (type.id() == LogicalTypeId::UNKNOWN) { + throw ParameterNotResolvedException(); + } + auto physical_type = type.InternalType(); switch (physical_type) { case PhysicalType::BIT: diff --git a/src/common/types/vector.cpp b/src/common/types/vector.cpp index 58b9f1626518..03af4fb904cf 100644 --- a/src/common/types/vector.cpp +++ b/src/common/types/vector.cpp @@ -1131,9 +1131,12 @@ void Vector::VerifyMap(Vector &vector_p, const SelectionVector &sel_p, idx_t cou void Vector::VerifyUnion(Vector &vector_p, const SelectionVector &sel_p, idx_t count) { #ifdef DEBUG + D_ASSERT(vector_p.GetType().id() == LogicalTypeId::UNION); auto valid_check = UnionVector::CheckUnionValidity(vector_p, count, sel_p); - D_ASSERT(valid_check == UnionInvalidReason::VALID); + if (valid_check != UnionInvalidReason::VALID) { + throw InternalException("Union not valid, reason: %s", EnumUtil::ToString(valid_check)); + } #endif // DEBUG } @@ -1250,7 +1253,8 @@ void Vector::Verify(Vector &vector_p, const SelectionVector &sel_p, idx_t count) } if (vector->GetType().id() == LogicalTypeId::UNION) { - VerifyUnion(*vector, *sel, count); + // Pass in raw vector + VerifyUnion(vector_p, sel_p, count); } } @@ -1911,7 +1915,13 @@ void UnionVector::SetToMember(Vector &union_vector, union_tag_t tag, Vector &mem // if the member vector is constant, we can set the union to constant as well union_vector.SetVectorType(VectorType::CONSTANT_VECTOR); ConstantVector::GetData(tag_vector)[0] = tag; - ConstantVector::SetNull(union_vector, ConstantVector::IsNull(member_vector)); + if (keep_tags_for_null) { + ConstantVector::SetNull(union_vector, false); + ConstantVector::SetNull(tag_vector, false); + } else { + ConstantVector::SetNull(union_vector, ConstantVector::IsNull(member_vector)); + ConstantVector::SetNull(tag_vector, ConstantVector::IsNull(member_vector)); + } } else { // otherwise flatten and set to flatvector @@ -1962,53 +1972,75 @@ union_tag_t UnionVector::GetTag(const Vector &vector, idx_t index) { return FlatVector::GetData(tag_vector)[index]; } -UnionInvalidReason UnionVector::CheckUnionValidity(Vector &vector, idx_t count, const SelectionVector &sel) { - D_ASSERT(vector.GetType().id() == LogicalTypeId::UNION); - auto member_count = UnionType::GetMemberCount(vector.GetType()); +//! Raw selection vector passed in (not merged with any other selection vectors) +UnionInvalidReason UnionVector::CheckUnionValidity(Vector &vector_p, idx_t count, const SelectionVector &sel_p) { + D_ASSERT(vector_p.GetType().id() == LogicalTypeId::UNION); + + // Will contain the (possibly) merged selection vector + const SelectionVector *sel = &sel_p; + SelectionVector owned_sel; + Vector *vector = &vector_p; + if (vector->GetVectorType() == VectorType::DICTIONARY_VECTOR) { + // In the case of a dictionary vector, unwrap the Vector, and merge the selection vectors. + auto &child = DictionaryVector::Child(*vector); + D_ASSERT(child.GetVectorType() != VectorType::DICTIONARY_VECTOR); + auto &dict_sel = DictionaryVector::SelVector(*vector); + // merge the selection vectors and verify the child + auto new_buffer = dict_sel.Slice(*sel, count); + owned_sel.Initialize(new_buffer); + sel = &owned_sel; + vector = &child; + } else if (vector->GetVectorType() == VectorType::CONSTANT_VECTOR) { + sel = ConstantVector::ZeroSelectionVector(count, owned_sel); + } + + auto member_count = UnionType::GetMemberCount(vector_p.GetType()); if (member_count == 0) { return UnionInvalidReason::NO_MEMBERS; } - UnifiedVectorFormat union_vdata; - vector.ToUnifiedFormat(count, union_vdata); + UnifiedVectorFormat vector_vdata; + vector_p.ToUnifiedFormat(count, vector_vdata); - UnifiedVectorFormat tags_vdata; - auto &tag_vector = UnionVector::GetTags(vector); - tag_vector.ToUnifiedFormat(count, tags_vdata); + auto &entries = StructVector::GetEntries(vector_p); + duckdb::vector child_vdata(entries.size()); + for (idx_t entry_idx = 0; entry_idx < entries.size(); entry_idx++) { + auto &child = *entries[entry_idx]; + child.ToUnifiedFormat(count, child_vdata[entry_idx]); + } + + auto &tag_vdata = child_vdata[0]; - // check that only one member is valid at a time for (idx_t row_idx = 0; row_idx < count; row_idx++) { - auto union_mapped_row_idx = sel.get_index(row_idx); - if (!union_vdata.validity.RowIsValid(union_mapped_row_idx)) { - continue; - } + auto mapped_idx = sel->get_index(row_idx); - auto tag_mapped_row_idx = tags_vdata.sel->get_index(row_idx); - if (!tags_vdata.validity.RowIsValid(tag_mapped_row_idx)) { + if (!vector_vdata.validity.RowIsValid(mapped_idx)) { continue; } - auto tag = (UnifiedVectorFormat::GetData(tags_vdata))[tag_mapped_row_idx]; + auto tag_idx = tag_vdata.sel->get_index(sel_p.get_index(row_idx)); + if (!tag_vdata.validity.RowIsValid(tag_idx)) { + // we can't have NULL tags! + return UnionInvalidReason::NULL_TAG; + } + auto tag = UnifiedVectorFormat::GetData(tag_vdata)[tag_idx]; if (tag >= member_count) { return UnionInvalidReason::TAG_OUT_OF_RANGE; } bool found_valid = false; - for (idx_t member_idx = 0; member_idx < member_count; member_idx++) { - - UnifiedVectorFormat member_vdata; - auto &member = UnionVector::GetMember(vector, member_idx); - member.ToUnifiedFormat(count, member_vdata); - - auto mapped_row_idx = member_vdata.sel->get_index(row_idx); - if (member_vdata.validity.RowIsValid(mapped_row_idx)) { - if (found_valid) { - return UnionInvalidReason::VALIDITY_OVERLAP; - } - found_valid = true; - if (tag != static_cast(member_idx)) { - return UnionInvalidReason::TAG_MISMATCH; - } + for (idx_t i = 0; i < member_count; i++) { + auto &member_vdata = child_vdata[1 + i]; // skip the tag + idx_t member_idx = member_vdata.sel->get_index(sel_p.get_index(row_idx)); + if (!member_vdata.validity.RowIsValid(member_idx)) { + continue; + } + if (found_valid) { + return UnionInvalidReason::VALIDITY_OVERLAP; + } + found_valid = true; + if (tag != static_cast(i)) { + return UnionInvalidReason::TAG_MISMATCH; } } } diff --git a/src/core_functions/scalar/date/strftime.cpp b/src/core_functions/scalar/date/strftime.cpp index a764c97eefb3..708ff2c3df2f 100644 --- a/src/core_functions/scalar/date/strftime.cpp +++ b/src/core_functions/scalar/date/strftime.cpp @@ -183,7 +183,14 @@ struct StrpTimeFunction { auto &func_expr = state.expr.Cast(); auto &info = func_expr.bind_info->Cast(); - if (args.data[1].GetVectorType() == VectorType::CONSTANT_VECTOR && ConstantVector::IsNull(args.data[1])) { + // There is a bizarre situation where the format column is foldable but not constant + // (i.e., the statistics tell us it has only one value) + // We have to check whether that value is NULL + const auto count = args.size(); + UnifiedVectorFormat format_unified; + args.data[1].ToUnifiedFormat(count, format_unified); + + if (!format_unified.validity.RowIsValid(0)) { result.SetVectorType(VectorType::CONSTANT_VECTOR); ConstantVector::SetNull(result, true); return; diff --git a/src/execution/index/art/art_key.cpp b/src/execution/index/art/art_key.cpp index 9cc26be2b54c..5f50b4e11ef2 100644 --- a/src/execution/index/art/art_key.cpp +++ b/src/execution/index/art/art_key.cpp @@ -20,10 +20,10 @@ ARTKey ARTKey::CreateARTKey(ArenaAllocator &allocator, const LogicalType &type, // FIXME: rethink this if (type == LogicalType::BLOB || type == LogicalType::VARCHAR) { - // indexes cannot contain BLOBs (or BLOBs cast to VARCHARs) that contain null-terminated bytes + // indexes cannot contain BLOBs (or BLOBs cast to VARCHARs) that contain zero bytes for (uint32_t i = 0; i < len - 1; i++) { if (data[i] == '\0') { - throw NotImplementedException("Indexes cannot contain BLOBs that contain null-terminated bytes."); + throw NotImplementedException("ART indexes cannot contain BLOBs with zero bytes."); } } } @@ -45,10 +45,10 @@ void ARTKey::CreateARTKey(ArenaAllocator &allocator, const LogicalType &type, AR // FIXME: rethink this if (type == LogicalType::BLOB || type == LogicalType::VARCHAR) { - // indexes cannot contain BLOBs (or BLOBs cast to VARCHARs) that contain null-terminated bytes + // indexes cannot contain BLOBs (or BLOBs cast to VARCHARs) that contain zero bytes for (uint32_t i = 0; i < key.len - 1; i++) { if (key.data[i] == '\0') { - throw NotImplementedException("Indexes cannot contain BLOBs that contain null-terminated bytes."); + throw NotImplementedException("ART indexes cannot contain BLOBs with zero bytes."); } } } diff --git a/src/execution/window_executor.cpp b/src/execution/window_executor.cpp index fb094e8da00b..91d39183cfd9 100644 --- a/src/execution/window_executor.cpp +++ b/src/execution/window_executor.cpp @@ -293,6 +293,18 @@ struct WindowBoundariesState { } } + static inline bool ExpressionNeedsPeer(const ExpressionType &type) { + switch (type) { + case ExpressionType::WINDOW_RANK: + case ExpressionType::WINDOW_RANK_DENSE: + case ExpressionType::WINDOW_PERCENT_RANK: + case ExpressionType::WINDOW_CUME_DIST: + return true; + default: + return false; + } + } + WindowBoundariesState(BoundWindowExpression &wexpr, const idx_t input_size); void Update(const idx_t row_idx, const WindowInputColumn &range_collection, const idx_t chunk_idx, @@ -532,7 +544,7 @@ WindowBoundariesState::WindowBoundariesState(BoundWindowExpression &wexpr, const partition_count(wexpr.partitions.size()), order_count(wexpr.orders.size()), range_sense(wexpr.orders.empty() ? OrderType::INVALID : wexpr.orders[0].type), has_preceding_range(HasPrecedingRange(wexpr)), has_following_range(HasFollowingRange(wexpr)), - needs_peer(BoundaryNeedsPeer(wexpr.end) || wexpr.type == ExpressionType::WINDOW_CUME_DIST) { + needs_peer(BoundaryNeedsPeer(wexpr.end) || ExpressionNeedsPeer(wexpr.type)) { } void WindowBoundariesState::Bounds(DataChunk &bounds, idx_t row_idx, const WindowInputColumn &range, const idx_t count, diff --git a/src/function/cast/union/from_struct.cpp b/src/function/cast/union/from_struct.cpp index 7e3a0ae6e31e..559803b86740 100644 --- a/src/function/cast/union/from_struct.cpp +++ b/src/function/cast/union/from_struct.cpp @@ -59,6 +59,28 @@ bool StructToUnionCast::Cast(Vector &source, Vector &result, idx_t count, CastPa D_ASSERT(converted); } + if (source.GetVectorType() == VectorType::CONSTANT_VECTOR) { + result.SetVectorType(VectorType::CONSTANT_VECTOR); + ConstantVector::SetNull(result, ConstantVector::IsNull(source)); + + // if the tag is NULL, the union should be NULL + auto &tag_vec = *target_children[0]; + ConstantVector::SetNull(result, ConstantVector::IsNull(tag_vec)); + } else { + source.Flatten(count); + FlatVector::Validity(result) = FlatVector::Validity(source); + + // if the tag is NULL, the union should be NULL + auto &tag_vec = *target_children[0]; + UnifiedVectorFormat tag_data; + tag_vec.ToUnifiedFormat(count, tag_data); + for (idx_t i = 0; i < count; i++) { + if (!tag_data.validity.RowIsValid(tag_data.sel->get_index(i))) { + FlatVector::SetNull(result, i, true); + } + } + } + auto check_tags = UnionVector::CheckUnionValidity(result, count); switch (check_tags) { case UnionInvalidReason::TAG_OUT_OF_RANGE: @@ -68,19 +90,14 @@ bool StructToUnionCast::Cast(Vector &source, Vector &result, idx_t count, CastPa case UnionInvalidReason::TAG_MISMATCH: throw ConversionException( "One or more rows in the produced UNION have tags that don't point to the valid member"); + case UnionInvalidReason::NULL_TAG: + throw ConversionException("One or more rows in the produced UNION have a NULL tag"); case UnionInvalidReason::VALID: break; default: throw InternalException("Struct to union cast failed for unknown reason"); } - if (source.GetVectorType() == VectorType::CONSTANT_VECTOR) { - result.SetVectorType(VectorType::CONSTANT_VECTOR); - ConstantVector::SetNull(result, ConstantVector::IsNull(source)); - } else { - source.Flatten(count); - FlatVector::Validity(result) = FlatVector::Validity(source); - } result.Verify(count); return true; } diff --git a/src/function/cast/vector_cast_helpers.cpp b/src/function/cast/vector_cast_helpers.cpp index 876f3841b674..98417a88320a 100644 --- a/src/function/cast/vector_cast_helpers.cpp +++ b/src/function/cast/vector_cast_helpers.cpp @@ -66,7 +66,7 @@ static bool SkipToClose(idx_t &idx, const char *buf, idx_t &len, idx_t &lvl, cha static idx_t StringTrim(const char *buf, idx_t &start_pos, idx_t pos) { idx_t trailing_whitespace = 0; - while (StringUtil::CharacterIsSpace(buf[pos - trailing_whitespace - 1])) { + while (pos > start_pos && StringUtil::CharacterIsSpace(buf[pos - trailing_whitespace - 1])) { trailing_whitespace++; } if ((buf[start_pos] == '"' && buf[pos - trailing_whitespace - 1] == '"') || diff --git a/src/function/scalar/string/concat.cpp b/src/function/scalar/string/concat.cpp index fc3b41142f41..f5b04a4b8c0e 100644 --- a/src/function/scalar/string/concat.cpp +++ b/src/function/scalar/string/concat.cpp @@ -118,7 +118,10 @@ static void TemplatedConcatWS(DataChunk &args, const string_t *sep_data, const S const SelectionVector &rsel, idx_t count, Vector &result) { vector result_lengths(args.size(), 0); vector has_results(args.size(), false); - auto orrified_data = make_unsafe_uniq_array(args.ColumnCount() - 1); + + // we overallocate here, but this is important for static analysis + auto orrified_data = make_unsafe_uniq_array(args.ColumnCount()); + for (idx_t col_idx = 1; col_idx < args.ColumnCount(); col_idx++) { args.data[col_idx].ToUnifiedFormat(args.size(), orrified_data[col_idx - 1]); } diff --git a/src/function/table/arrow.cpp b/src/function/table/arrow.cpp index 306e57d30dcc..f62449960166 100644 --- a/src/function/table/arrow.cpp +++ b/src/function/table/arrow.cpp @@ -14,7 +14,7 @@ namespace duckdb { -unique_ptr ArrowTableFunction::GetArrowLogicalType(ArrowSchema &schema) { +static unique_ptr GetArrowLogicalTypeNoDictionary(ArrowSchema &schema) { auto format = string(schema.format); if (format == "n") { return make_uniq(LogicalType::SQLNULL); @@ -87,13 +87,13 @@ unique_ptr ArrowTableFunction::GetArrowLogicalType(ArrowSchema &schem } else if (format == "tin") { return make_uniq(LogicalType::INTERVAL, ArrowDateTimeType::MONTH_DAY_NANO); } else if (format == "+l") { - auto child_type = GetArrowLogicalType(*schema.children[0]); + auto child_type = ArrowTableFunction::GetArrowLogicalType(*schema.children[0]); auto list_type = make_uniq(LogicalType::LIST(child_type->GetDuckType()), ArrowVariableSizeType::NORMAL); list_type->AddChild(std::move(child_type)); return list_type; } else if (format == "+L") { - auto child_type = GetArrowLogicalType(*schema.children[0]); + auto child_type = ArrowTableFunction::GetArrowLogicalType(*schema.children[0]); auto list_type = make_uniq(LogicalType::LIST(child_type->GetDuckType()), ArrowVariableSizeType::SUPER_SIZE); list_type->AddChild(std::move(child_type)); @@ -101,7 +101,7 @@ unique_ptr ArrowTableFunction::GetArrowLogicalType(ArrowSchema &schem } else if (format[0] == '+' && format[1] == 'w') { std::string parameters = format.substr(format.find(':') + 1); idx_t fixed_size = std::stoi(parameters); - auto child_type = GetArrowLogicalType(*schema.children[0]); + auto child_type = ArrowTableFunction::GetArrowLogicalType(*schema.children[0]); auto list_type = make_uniq(LogicalType::LIST(child_type->GetDuckType()), fixed_size); list_type->AddChild(std::move(child_type)); return list_type; @@ -109,7 +109,7 @@ unique_ptr ArrowTableFunction::GetArrowLogicalType(ArrowSchema &schem child_list_t child_types; vector> children; for (idx_t type_idx = 0; type_idx < (idx_t)schema.n_children; type_idx++) { - children.emplace_back(GetArrowLogicalType(*schema.children[type_idx])); + children.emplace_back(ArrowTableFunction::GetArrowLogicalType(*schema.children[type_idx])); child_types.emplace_back(schema.children[type_idx]->name, children.back()->GetDuckType()); } auto struct_type = make_uniq(LogicalType::STRUCT(std::move(child_types))); @@ -130,7 +130,7 @@ unique_ptr ArrowTableFunction::GetArrowLogicalType(ArrowSchema &schem for (idx_t type_idx = 0; type_idx < (idx_t)schema.n_children; type_idx++) { auto type = schema.children[type_idx]; - children.emplace_back(GetArrowLogicalType(*type)); + children.emplace_back(ArrowTableFunction::GetArrowLogicalType(*type)); members.emplace_back(type->name, children.back()->GetDuckType()); } @@ -140,8 +140,8 @@ unique_ptr ArrowTableFunction::GetArrowLogicalType(ArrowSchema &schem } else if (format == "+m") { auto &arrow_struct_type = *schema.children[0]; D_ASSERT(arrow_struct_type.n_children == 2); - auto key_type = GetArrowLogicalType(*arrow_struct_type.children[0]); - auto value_type = GetArrowLogicalType(*arrow_struct_type.children[1]); + auto key_type = ArrowTableFunction::GetArrowLogicalType(*arrow_struct_type.children[0]); + auto value_type = ArrowTableFunction::GetArrowLogicalType(*arrow_struct_type.children[1]); auto map_type = make_uniq(LogicalType::MAP(key_type->GetDuckType(), value_type->GetDuckType()), ArrowVariableSizeType::NORMAL); child_list_t key_value; @@ -184,6 +184,15 @@ unique_ptr ArrowTableFunction::GetArrowLogicalType(ArrowSchema &schem } } +unique_ptr ArrowTableFunction::GetArrowLogicalType(ArrowSchema &schema) { + auto arrow_type = GetArrowLogicalTypeNoDictionary(schema); + if (schema.dictionary) { + auto dictionary = GetArrowLogicalType(*schema.dictionary); + arrow_type->SetDictionary(std::move(dictionary)); + } + return arrow_type; +} + void ArrowTableFunction::RenameArrowColumns(vector &names) { unordered_map name_map; for (auto &column_name : names) { @@ -216,15 +225,7 @@ void ArrowTableFunction::PopulateArrowTableType(ArrowTableType &arrow_table, Arr throw InvalidInputException("arrow_scan: released schema passed"); } auto arrow_type = GetArrowLogicalType(schema); - if (schema.dictionary) { - auto logical_type = arrow_type->GetDuckType(); - auto dictionary = GetArrowLogicalType(*schema.dictionary); - return_types.emplace_back(dictionary->GetDuckType()); - // The dictionary might have different attributes (size type, datetime precision, etc..) - arrow_type->SetDictionary(std::move(dictionary)); - } else { - return_types.emplace_back(arrow_type->GetDuckType()); - } + return_types.emplace_back(arrow_type->GetDuckType(true)); arrow_table.AddColumn(col_idx, std::move(arrow_type)); auto format = string(schema.format); auto name = string(schema.name); diff --git a/src/function/table/arrow/CMakeLists.txt b/src/function/table/arrow/CMakeLists.txt index 94eac4ffbecf..badf731a842e 100644 --- a/src/function/table/arrow/CMakeLists.txt +++ b/src/function/table/arrow/CMakeLists.txt @@ -1,4 +1,5 @@ -add_library_unity(duckdb_arrow_conversion OBJECT arrow_duck_schema.cpp) +add_library_unity(duckdb_arrow_conversion OBJECT arrow_duck_schema.cpp + arrow_array_scan_state.cpp) set(ALL_OBJECT_FILES ${ALL_OBJECT_FILES} $ PARENT_SCOPE) diff --git a/src/function/table/arrow/arrow_array_scan_state.cpp b/src/function/table/arrow/arrow_array_scan_state.cpp new file mode 100644 index 000000000000..88aa49bfa1db --- /dev/null +++ b/src/function/table/arrow/arrow_array_scan_state.cpp @@ -0,0 +1,32 @@ +#include "duckdb/function/table/arrow.hpp" + +namespace duckdb { + +ArrowArrayScanState::ArrowArrayScanState(ArrowScanLocalState &state) : state(state) { +} + +ArrowArrayScanState &ArrowArrayScanState::GetChild(idx_t child_idx) { + auto it = children.find(child_idx); + if (it == children.end()) { + auto child_p = make_uniq(state); + auto &child = *child_p; + children.emplace(std::make_pair(child_idx, std::move(child_p))); + return child; + } + return *it->second; +} + +void ArrowArrayScanState::AddDictionary(unique_ptr dictionary_p) { + this->dictionary = std::move(dictionary_p); +} + +bool ArrowArrayScanState::HasDictionary() const { + return dictionary != nullptr; +} + +Vector &ArrowArrayScanState::GetDictionary() { + D_ASSERT(HasDictionary()); + return *dictionary; +} + +} // namespace duckdb diff --git a/src/function/table/arrow/arrow_duck_schema.cpp b/src/function/table/arrow/arrow_duck_schema.cpp index 42d04d2ebaf7..933c4da41ef9 100644 --- a/src/function/table/arrow/arrow_duck_schema.cpp +++ b/src/function/table/arrow/arrow_duck_schema.cpp @@ -27,13 +27,57 @@ void ArrowType::SetDictionary(unique_ptr dictionary) { dictionary_type = std::move(dictionary); } +bool ArrowType::HasDictionary() const { + return dictionary_type != nullptr; +} + const ArrowType &ArrowType::GetDictionary() const { D_ASSERT(dictionary_type); return *dictionary_type; } -const LogicalType &ArrowType::GetDuckType() const { - return type; +LogicalType ArrowType::GetDuckType(bool use_dictionary) const { + if (use_dictionary && dictionary_type) { + return dictionary_type->GetDuckType(); + } + if (!use_dictionary) { + return type; + } + // Dictionaries can exist in arbitrarily nested schemas + // have to reconstruct the type + auto id = type.id(); + switch (id) { + case LogicalTypeId::STRUCT: { + child_list_t new_children; + for (idx_t i = 0; i < children.size(); i++) { + auto &child = children[i]; + auto &child_name = StructType::GetChildName(type, i); + new_children.emplace_back(std::make_pair(child_name, child->GetDuckType(true))); + } + return LogicalType::STRUCT(std::move(new_children)); + } + case LogicalTypeId::LIST: { + auto &child = children[0]; + return LogicalType::LIST(child->GetDuckType(true)); + } + case LogicalTypeId::MAP: { + auto &struct_child = children[0]; + auto struct_type = struct_child->GetDuckType(true); + return LogicalType::MAP(StructType::GetChildType(struct_type, 0), StructType::GetChildType(struct_type, 1)); + } + case LogicalTypeId::UNION: { + child_list_t new_children; + for (idx_t i = 0; i < children.size(); i++) { + auto &child = children[i]; + auto &child_name = UnionType::GetMemberName(type, i); + new_children.emplace_back(std::make_pair(child_name, child->GetDuckType(true))); + } + return LogicalType::UNION(std::move(new_children)); + } + default: { + return type; + } + } } ArrowVariableSizeType ArrowType::GetSizeType() const { diff --git a/src/function/table/arrow_conversion.cpp b/src/function/table/arrow_conversion.cpp index ba7d011ad3d9..204078357e43 100644 --- a/src/function/table/arrow_conversion.cpp +++ b/src/function/table/arrow_conversion.cpp @@ -80,14 +80,20 @@ static void SetValidityMask(Vector &vector, ArrowArray &array, ArrowScanLocalSta GetValidityMask(mask, array, scan_state, size, nested_offset, add_null); } -static void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state, idx_t size, +static void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowArrayScanState &array_state, idx_t size, const ArrowType &arrow_type, int64_t nested_offset = -1, ValidityMask *parent_mask = nullptr, uint64_t parent_offset = 0); -static void ArrowToDuckDBList(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state, idx_t size, +static void ColumnArrowToDuckDBDictionary(Vector &vector, ArrowArray &array, ArrowArrayScanState &array_state, + idx_t size, const ArrowType &arrow_type, int64_t nested_offset = -1, + ValidityMask *parent_mask = nullptr, uint64_t parent_offset = 0); + +static void ArrowToDuckDBList(Vector &vector, ArrowArray &array, ArrowArrayScanState &array_state, idx_t size, const ArrowType &arrow_type, int64_t nested_offset, ValidityMask *parent_mask) { auto size_type = arrow_type.GetSizeType(); idx_t list_size = 0; + auto &scan_state = array_state.state; + SetValidityMask(vector, array, scan_state, size, nested_offset); idx_t start_offset = 0; idx_t cur_offset = 0; @@ -152,10 +158,19 @@ static void ArrowToDuckDBList(Vector &vector, ArrowArray &array, ArrowScanLocalS } } } + auto &child_state = array_state.GetChild(0); + auto &child_array = *array.children[0]; + auto &child_type = arrow_type[0]; if (list_size == 0 && start_offset == 0) { - ColumnArrowToDuckDB(child_vector, *array.children[0], scan_state, list_size, arrow_type[0], -1); + D_ASSERT(!child_array.dictionary); + ColumnArrowToDuckDB(child_vector, child_array, child_state, list_size, child_type, -1); } else { - ColumnArrowToDuckDB(child_vector, *array.children[0], scan_state, list_size, arrow_type[0], start_offset); + if (child_array.dictionary) { + // TODO: add support for offsets + ColumnArrowToDuckDBDictionary(child_vector, child_array, child_state, list_size, child_type, start_offset); + } else { + ColumnArrowToDuckDB(child_vector, child_array, child_state, list_size, child_type, start_offset); + } } } @@ -343,9 +358,11 @@ static void IntervalConversionMonthDayNanos(Vector &vector, ArrowArray &array, A } } -static void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state, idx_t size, +static void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowArrayScanState &array_state, idx_t size, const ArrowType &arrow_type, int64_t nested_offset, ValidityMask *parent_mask, uint64_t parent_offset) { + auto &scan_state = array_state.state; + D_ASSERT(!array.dictionary); switch (vector.GetType().id()) { case LogicalTypeId::SQLNULL: vector.Reference(Value()); @@ -601,11 +618,11 @@ static void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanLoca break; } case LogicalTypeId::LIST: { - ArrowToDuckDBList(vector, array, scan_state, size, arrow_type, nested_offset, parent_mask); + ArrowToDuckDBList(vector, array, array_state, size, arrow_type, nested_offset, parent_mask); break; } case LogicalTypeId::MAP: { - ArrowToDuckDBList(vector, array, scan_state, size, arrow_type, nested_offset, parent_mask); + ArrowToDuckDBList(vector, array, array_state, size, arrow_type, nested_offset, parent_mask); ArrowToDuckDBMapVerify(vector, size); break; } @@ -613,18 +630,29 @@ static void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanLoca //! Fill the children auto &child_entries = StructVector::GetEntries(vector); auto &struct_validity_mask = FlatVector::Validity(vector); - for (idx_t type_idx = 0; type_idx < static_cast(array.n_children); type_idx++) { - SetValidityMask(*child_entries[type_idx], *array.children[type_idx], scan_state, size, nested_offset); + for (int64_t child_idx = 0; child_idx < array.n_children; child_idx++) { + auto &child_entry = *child_entries[child_idx]; + auto &child_array = *array.children[child_idx]; + auto &child_type = arrow_type[child_idx]; + auto &child_state = array_state.GetChild(child_idx); + + SetValidityMask(child_entry, child_array, scan_state, size, nested_offset); if (!struct_validity_mask.AllValid()) { - auto &child_validity_mark = FlatVector::Validity(*child_entries[type_idx]); + auto &child_validity_mark = FlatVector::Validity(child_entry); for (idx_t i = 0; i < size; i++) { if (!struct_validity_mask.RowIsValid(i)) { child_validity_mark.SetInvalid(i); } } } - ColumnArrowToDuckDB(*child_entries[type_idx], *array.children[type_idx], scan_state, size, - arrow_type[type_idx], nested_offset, &struct_validity_mask, array.offset); + if (child_array.dictionary) { + // TODO: add support for offsets + ColumnArrowToDuckDBDictionary(child_entry, child_array, child_state, size, child_type, nested_offset, + &struct_validity_mask, array.offset); + } else { + ColumnArrowToDuckDB(child_entry, child_array, child_state, size, child_type, nested_offset, + &struct_validity_mask, array.offset); + } } break; } @@ -636,14 +664,19 @@ static void ColumnArrowToDuckDB(Vector &vector, ArrowArray &array, ArrowScanLoca auto &validity_mask = FlatVector::Validity(vector); duckdb::vector children; - for (idx_t type_idx = 0; type_idx < static_cast(array.n_children); type_idx++) { - Vector child(members[type_idx].second); - auto arrow_array = array.children[type_idx]; - auto &child_type = arrow_type[type_idx]; + for (int64_t child_idx = 0; child_idx < array.n_children; child_idx++) { + Vector child(members[child_idx].second, size); + auto &child_array = *array.children[child_idx]; + auto &child_state = array_state.GetChild(child_idx); + auto &child_type = arrow_type[child_idx]; - SetValidityMask(child, *arrow_array, scan_state, size, nested_offset); + SetValidityMask(child, child_array, scan_state, size, nested_offset); - ColumnArrowToDuckDB(child, *arrow_array, scan_state, size, child_type, nested_offset, &validity_mask); + if (child_array.dictionary) { + ColumnArrowToDuckDBDictionary(child, child_array, child_state, size, child_type); + } else { + ColumnArrowToDuckDB(child, child_array, child_state, size, child_type, nested_offset, &validity_mask); + } children.push_back(std::move(child)); } @@ -790,30 +823,31 @@ static void SetSelectionVector(SelectionVector &sel, data_ptr_t indices_p, Logic } } -static void ColumnArrowToDuckDBDictionary(Vector &vector, ArrowArray &array, ArrowScanLocalState &scan_state, - idx_t size, const ArrowType &arrow_type, idx_t col_idx) { +static void ColumnArrowToDuckDBDictionary(Vector &vector, ArrowArray &array, ArrowArrayScanState &array_state, + idx_t size, const ArrowType &arrow_type, int64_t nested_offset, + ValidityMask *parent_mask, uint64_t parent_offset) { SelectionVector sel; - auto &dict_vectors = scan_state.arrow_dictionary_vectors; - if (!dict_vectors.count(col_idx)) { + auto &scan_state = array_state.state; + if (!array_state.HasDictionary()) { //! We need to set the dictionary data for this column auto base_vector = make_uniq(vector.GetType(), array.dictionary->length); SetValidityMask(*base_vector, *array.dictionary, scan_state, array.dictionary->length, 0, array.null_count > 0); - ColumnArrowToDuckDB(*base_vector, *array.dictionary, scan_state, array.dictionary->length, + ColumnArrowToDuckDB(*base_vector, *array.dictionary, array_state, array.dictionary->length, arrow_type.GetDictionary()); - dict_vectors[col_idx] = std::move(base_vector); + array_state.AddDictionary(std::move(base_vector)); } - auto dictionary_type = arrow_type.GetDuckType(); + auto offset_type = arrow_type.GetDuckType(); //! Get Pointer to Indices of Dictionary auto indices = ArrowBufferData(array, 1) + - GetTypeIdSize(dictionary_type.InternalType()) * (scan_state.chunk_offset + array.offset); + GetTypeIdSize(offset_type.InternalType()) * (scan_state.chunk_offset + array.offset); if (array.null_count > 0) { ValidityMask indices_validity; GetValidityMask(indices_validity, array, scan_state, size); - SetSelectionVector(sel, indices, dictionary_type, size, &indices_validity, array.dictionary->length); + SetSelectionVector(sel, indices, offset_type, size, &indices_validity, array.dictionary->length); } else { - SetSelectionVector(sel, indices, dictionary_type, size); + SetSelectionVector(sel, indices, offset_type, size); } - vector.Slice(*dict_vectors[col_idx], sel, size); + vector.Slice(array_state.GetDictionary(), sel, size); } void ArrowTableFunction::ArrowToDuckDB(ArrowScanLocalState &scan_state, const arrow_column_map_t &arrow_convert_data, @@ -849,11 +883,13 @@ void ArrowTableFunction::ArrowToDuckDB(ArrowScanLocalState &scan_state, const ar D_ASSERT(arrow_convert_data.find(col_idx) != arrow_convert_data.end()); auto &arrow_type = *arrow_convert_data.at(col_idx); + auto &array_state = scan_state.GetState(col_idx); + if (array.dictionary) { - ColumnArrowToDuckDBDictionary(output.data[idx], array, scan_state, output.size(), arrow_type, col_idx); + ColumnArrowToDuckDBDictionary(output.data[idx], array, array_state, output.size(), arrow_type); } else { SetValidityMask(output.data[idx], array, scan_state, output.size(), -1); - ColumnArrowToDuckDB(output.data[idx], array, scan_state, output.size(), arrow_type); + ColumnArrowToDuckDB(output.data[idx], array, array_state, output.size(), arrow_type); } } } diff --git a/src/function/table/copy_csv.cpp b/src/function/table/copy_csv.cpp index b1f1b9f1e967..e721fe465d6d 100644 --- a/src/function/table/copy_csv.cpp +++ b/src/function/table/copy_csv.cpp @@ -91,15 +91,15 @@ void BaseCSVData::Finalize() { } } -static unique_ptr WriteCSVBind(ClientContext &context, CopyInfo &info, vector &names, - vector &sql_types) { +static unique_ptr WriteCSVBind(ClientContext &context, const CopyInfo &info, const vector &names, + const vector &sql_types) { auto bind_data = make_uniq(info.file_path, sql_types, names); // check all the options in the copy info for (auto &option : info.options) { auto loption = StringUtil::Lower(option.first); auto &set = option.second; - bind_data->options.SetWriteOption(loption, ConvertVectorToValue(std::move(set))); + bind_data->options.SetWriteOption(loption, ConvertVectorToValue(set)); } // verify the parsed options if (bind_data->options.force_quote.empty()) { diff --git a/src/function/table/version/CMakeLists.txt b/src/function/table/version/CMakeLists.txt index 2c54094c4503..54f2c3768760 100644 --- a/src/function/table/version/CMakeLists.txt +++ b/src/function/table/version/CMakeLists.txt @@ -1,5 +1,8 @@ add_definitions(-DDUCKDB_SOURCE_ID="\""${GIT_COMMIT_HASH}"\"") add_definitions(-DDUCKDB_VERSION="\""${DUCKDB_VERSION}"\"") +add_definitions(-DUCKDB_MAJOR_VERSION=${DUCKDB_MAJOR_VERSION}) +add_definitions(-DUCKDB_MINOR_VERSION=${DUCKDB_MINOR_VERSION}) +add_definitions(-DUCKDB_PATCH_VERSION=${DUCKDB_PATCH_VERSION}) add_library_unity(duckdb_func_table_version OBJECT pragma_version.cpp) diff --git a/src/include/duckdb/common/filename_pattern.hpp b/src/include/duckdb/common/filename_pattern.hpp index 3795fc364857..98899fcd2cc4 100644 --- a/src/include/duckdb/common/filename_pattern.hpp +++ b/src/include/duckdb/common/filename_pattern.hpp @@ -13,7 +13,11 @@ namespace duckdb { +class Serializer; +class Deserializer; + class FilenamePattern { + friend Deserializer; public: FilenamePattern() : _base("data_"), _pos(_base.length()), _uuid(false) { @@ -25,6 +29,9 @@ class FilenamePattern { void SetFilenamePattern(const string &pattern); string CreateFilename(FileSystem &fs, const string &path, const string &extension, idx_t offset) const; + void Serialize(Serializer &serializer) const; + static FilenamePattern Deserialize(Deserializer &deserializer); + private: string _base; idx_t _pos; diff --git a/src/include/duckdb/common/types/vector.hpp b/src/include/duckdb/common/types/vector.hpp index 885aabee228a..2ef4de986d3c 100644 --- a/src/include/duckdb/common/types/vector.hpp +++ b/src/include/duckdb/common/types/vector.hpp @@ -447,7 +447,14 @@ struct StructVector { DUCKDB_API static vector> &GetEntries(Vector &vector); }; -enum class UnionInvalidReason : uint8_t { VALID, TAG_OUT_OF_RANGE, NO_MEMBERS, VALIDITY_OVERLAP, TAG_MISMATCH }; +enum class UnionInvalidReason : uint8_t { + VALID, + TAG_OUT_OF_RANGE, + NO_MEMBERS, + VALIDITY_OVERLAP, + TAG_MISMATCH, + NULL_TAG +}; struct UnionVector { // Unions are stored as structs, but the first child is always the "tag" @@ -460,7 +467,12 @@ struct UnionVector { // 2. The validity of the tag vector always matches the validity of the // union vector itself. // - // 3. For each tag in the tag vector, 0 <= tag < |members| + // 3. A valid union cannot have a NULL tag, but the selected member can + // be NULL. therefore, there is a difference between a union that "is" + // NULL and a union that "holds" a NULL. The latter still has a valid + // tag. + // + // 4. For each tag in the tag vector, 0 <= tag < |members| //! Get the tag vector of a union vector DUCKDB_API static const Vector &GetTags(const Vector &v); diff --git a/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp b/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp index 10f57b11875b..12c9bc61345e 100644 --- a/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp +++ b/src/include/duckdb/execution/operator/persistent/csv_rejects_table.hpp @@ -1,13 +1,16 @@ #pragma once -#include "duckdb.hpp" -#ifndef DUCKDB_AMALGAMATION #include "duckdb/storage/object_cache.hpp" -#endif +#include "duckdb/common/mutex.hpp" +#include "duckdb/common/typedefs.hpp" +#include "duckdb/common/shared_ptr.hpp" +#include "duckdb/common/string.hpp" namespace duckdb { struct ReadCSVData; +class TableCatalogEntry; +class ClientContext; class CSVRejectsTable : public ObjectCacheEntry { public: diff --git a/src/include/duckdb/function/copy_function.hpp b/src/include/duckdb/function/copy_function.hpp index 1dd25eb026b5..8f0e5f758c6a 100644 --- a/src/include/duckdb/function/copy_function.hpp +++ b/src/include/duckdb/function/copy_function.hpp @@ -71,8 +71,8 @@ struct PreparedBatchData { enum class CopyFunctionExecutionMode { REGULAR_COPY_TO_FILE, PARALLEL_COPY_TO_FILE, BATCH_COPY_TO_FILE }; typedef BoundStatement (*copy_to_plan_t)(Binder &binder, CopyStatement &stmt); -typedef unique_ptr (*copy_to_bind_t)(ClientContext &context, CopyInfo &info, vector &names, - vector &sql_types); +typedef unique_ptr (*copy_to_bind_t)(ClientContext &context, const CopyInfo &info, + const vector &names, const vector &sql_types); typedef unique_ptr (*copy_to_initialize_local_t)(ExecutionContext &context, FunctionData &bind_data); typedef unique_ptr (*copy_to_initialize_global_t)(ClientContext &context, FunctionData &bind_data, const string &file_path); diff --git a/src/include/duckdb/function/table/arrow.hpp b/src/include/duckdb/function/table/arrow.hpp index df6e49953835..86caf5889ee3 100644 --- a/src/include/duckdb/function/table/arrow.hpp +++ b/src/include/duckdb/function/table/arrow.hpp @@ -63,10 +63,30 @@ struct ArrowScanFunctionData : public PyTableFunctionData { ArrowTableType arrow_table; }; +struct ArrowScanLocalState; +struct ArrowArrayScanState { +public: + ArrowArrayScanState(ArrowScanLocalState &state); + +public: + ArrowScanLocalState &state; + unordered_map> children; + // Cache the (optional) dictionary of this array + unique_ptr dictionary; + +public: + ArrowArrayScanState &GetChild(idx_t child_idx); + void AddDictionary(unique_ptr dictionary_p); + bool HasDictionary() const; + Vector &GetDictionary(); +}; + struct ArrowScanLocalState : public LocalTableFunctionState { +public: explicit ArrowScanLocalState(unique_ptr current_chunk) : chunk(current_chunk.release()) { } +public: unique_ptr stream; shared_ptr chunk; // This vector hold the Arrow Vectors owned by DuckDB to allow for zero-copy @@ -75,11 +95,22 @@ struct ArrowScanLocalState : public LocalTableFunctionState { idx_t chunk_offset = 0; idx_t batch_index = 0; vector column_ids; - //! Store child vectors for Arrow Dictionary Vectors (col-idx,vector) - unordered_map> arrow_dictionary_vectors; + unordered_map> array_states; TableFilterSet *filters = nullptr; //! The DataChunk containing all read columns (even filter columns that are immediately removed) DataChunk all_columns; + +public: + ArrowArrayScanState &GetState(idx_t child_idx) { + auto it = array_states.find(child_idx); + if (it == array_states.end()) { + auto child_p = make_uniq(*this); + auto &child = *child_p; + array_states.emplace(std::make_pair(child_idx, std::move(child_p))); + return child; + } + return *it->second; + } }; struct ArrowScanGlobalState : public GlobalTableFunctionState { @@ -150,6 +181,8 @@ struct ArrowTableFunction { const GlobalTableFunctionState *global_state); //! Renames repeated columns and case sensitive columns static void RenameArrowColumns(vector &names); + +public: //! Helper function to get the DuckDB logical type static unique_ptr GetArrowLogicalType(ArrowSchema &schema); }; diff --git a/src/include/duckdb/function/table/arrow/arrow_duck_schema.hpp b/src/include/duckdb/function/table/arrow/arrow_duck_schema.hpp index bd15f89dfaea..d475875f0db2 100644 --- a/src/include/duckdb/function/table/arrow/arrow_duck_schema.hpp +++ b/src/include/duckdb/function/table/arrow/arrow_duck_schema.hpp @@ -57,7 +57,7 @@ class ArrowType { void AssignChildren(vector> children); - const LogicalType &GetDuckType() const; + LogicalType GetDuckType(bool use_dictionary = false) const; ArrowVariableSizeType GetSizeType() const; @@ -65,6 +65,8 @@ class ArrowType { void SetDictionary(unique_ptr dictionary); + bool HasDictionary() const; + ArrowDateTimeType GetDateTimeType() const; const ArrowType &GetDictionary() const; diff --git a/src/include/duckdb/optimizer/filter_combiner.hpp b/src/include/duckdb/optimizer/filter_combiner.hpp index 3764915d7f3d..f759165731b3 100644 --- a/src/include/duckdb/optimizer/filter_combiner.hpp +++ b/src/include/duckdb/optimizer/filter_combiner.hpp @@ -54,7 +54,7 @@ class FilterCombiner { private: FilterResult AddFilter(Expression &expr); FilterResult AddBoundComparisonFilter(Expression &expr); - FilterResult AddTransitiveFilters(BoundComparisonExpression &comparison); + FilterResult AddTransitiveFilters(BoundComparisonExpression &comparison, bool is_root = true); unique_ptr FindTransitiveFilter(Expression &expr); // unordered_map> // FindZonemapChecks(vector &column_ids, unordered_set ¬_constants, Expression *filter); diff --git a/src/include/duckdb/parser/parsed_data/create_info.hpp b/src/include/duckdb/parser/parsed_data/create_info.hpp index 3fd941288236..70e047286d42 100644 --- a/src/include/duckdb/parser/parsed_data/create_info.hpp +++ b/src/include/duckdb/parser/parsed_data/create_info.hpp @@ -10,6 +10,7 @@ #include "duckdb/common/enums/catalog_type.hpp" #include "duckdb/parser/parsed_data/parse_info.hpp" +#include "duckdb/common/enum_util.hpp" namespace duckdb { struct AlterInfo; @@ -61,6 +62,10 @@ struct CreateInfo : public ParseInfo { DUCKDB_API void CopyProperties(CreateInfo &other) const; //! Generates an alter statement from the create statement - used for OnCreateConflict::ALTER_ON_CONFLICT DUCKDB_API virtual unique_ptr GetAlterInfo() const; + virtual string ToString() const { + throw InternalException("ToString not supported for this type of CreateInfo: '%s'", + EnumUtil::ToString(info_type)); + } }; } // namespace duckdb diff --git a/src/include/duckdb/parser/parsed_data/create_view_info.hpp b/src/include/duckdb/parser/parsed_data/create_view_info.hpp index 4f9ff34ba5b7..c70a273c092c 100644 --- a/src/include/duckdb/parser/parsed_data/create_view_info.hpp +++ b/src/include/duckdb/parser/parsed_data/create_view_info.hpp @@ -15,11 +15,13 @@ namespace duckdb { class SchemaCatalogEntry; struct CreateViewInfo : public CreateInfo { +public: CreateViewInfo(); CreateViewInfo(SchemaCatalogEntry &schema, string view_name); CreateViewInfo(string catalog_p, string schema_p, string view_name); - //! Table name to insert to +public: + //! View name string view_name; //! Aliases of the view vector aliases; @@ -38,6 +40,7 @@ struct CreateViewInfo : public CreateInfo { DUCKDB_API void Serialize(Serializer &serializer) const override; DUCKDB_API static unique_ptr Deserialize(Deserializer &deserializer); + string ToString() const override; }; } // namespace duckdb diff --git a/src/include/duckdb/parser/statement/create_statement.hpp b/src/include/duckdb/parser/statement/create_statement.hpp index 362af2642b9f..74177d873113 100644 --- a/src/include/duckdb/parser/statement/create_statement.hpp +++ b/src/include/duckdb/parser/statement/create_statement.hpp @@ -27,6 +27,7 @@ class CreateStatement : public SQLStatement { public: unique_ptr Copy() const override; + string ToString() const override; }; } // namespace duckdb diff --git a/src/include/duckdb/planner/operator/logical_copy_to_file.hpp b/src/include/duckdb/planner/operator/logical_copy_to_file.hpp index 8bac275713c5..0e1a7789d92b 100644 --- a/src/include/duckdb/planner/operator/logical_copy_to_file.hpp +++ b/src/include/duckdb/planner/operator/logical_copy_to_file.hpp @@ -20,12 +20,14 @@ class LogicalCopyToFile : public LogicalOperator { static constexpr const LogicalOperatorType TYPE = LogicalOperatorType::LOGICAL_COPY_TO_FILE; public: - LogicalCopyToFile(CopyFunction function, unique_ptr bind_data) - : LogicalOperator(LogicalOperatorType::LOGICAL_COPY_TO_FILE), function(function), - bind_data(std::move(bind_data)) { + LogicalCopyToFile(CopyFunction function, unique_ptr bind_data, unique_ptr copy_info) + : LogicalOperator(LogicalOperatorType::LOGICAL_COPY_TO_FILE), function(std::move(function)), + bind_data(std::move(bind_data)), copy_info(std::move(copy_info)) { } CopyFunction function; unique_ptr bind_data; + unique_ptr copy_info; + std::string file_path; bool use_tmp_file; FilenamePattern filename_pattern; @@ -39,10 +41,6 @@ class LogicalCopyToFile : public LogicalOperator { public: idx_t EstimateCardinality(ClientContext &context) override; - //! Skips the serialization check in VerifyPlan - bool SupportSerialization() const override { - return false; - } void Serialize(Serializer &serializer) const override; static unique_ptr Deserialize(Deserializer &deserializer); diff --git a/src/include/duckdb/storage/serialization/logical_operator.json b/src/include/duckdb/storage/serialization/logical_operator.json index 43dd03999a9f..3eee4894b6ca 100644 --- a/src/include/duckdb/storage/serialization/logical_operator.json +++ b/src/include/duckdb/storage/serialization/logical_operator.json @@ -906,5 +906,32 @@ "base": "LogicalOperator", "enum": "LOGICAL_EXTENSION_OPERATOR", "custom_implementation": true + }, + { + "class": "FilenamePattern", + "pointer_type": "none", + "members": [ + { + "id": 200, + "name": "base", + "type": "string", + "serialize_property": "_base", + "deserialize_property": "_base" + }, + { + "id": 201, + "name": "pos", + "type": "idx_t", + "serialize_property": "_pos", + "deserialize_property": "_pos" + }, + { + "id": 202, + "name": "uuid", + "type": "bool", + "serialize_property": "_uuid", + "deserialize_property": "_uuid" + } + ] } ] diff --git a/src/main/config.cpp b/src/main/config.cpp index 89d5e1ba4fe2..347a8b9195a8 100644 --- a/src/main/config.cpp +++ b/src/main/config.cpp @@ -169,6 +169,13 @@ void DBConfig::SetOptionByName(const string &name, const Value &value) { auto option = DBConfig::GetOptionByName(name); if (option) { SetOption(*option, value); + return; + } + + auto param = extension_parameters.find(name); + if (param != extension_parameters.end()) { + Value target_value = value.DefaultCastAs(param->second.type); + SetOption(name, std::move(target_value)); } else { options.unrecognized_options[name] = value; } diff --git a/src/optimizer/filter_combiner.cpp b/src/optimizer/filter_combiner.cpp index 776cffe24fe6..9461c20006fc 100644 --- a/src/optimizer/filter_combiner.cpp +++ b/src/optimizer/filter_combiner.cpp @@ -782,7 +782,7 @@ FilterResult FilterCombiner::AddFilter(Expression &expr) { * Create and add new transitive filters from a two non-scalar filter such as j > i, j >= i, j < i, and j <= i * It's missing to create another method to add transitive filters from scalar filters, e.g, i > 10 */ -FilterResult FilterCombiner::AddTransitiveFilters(BoundComparisonExpression &comparison) { +FilterResult FilterCombiner::AddTransitiveFilters(BoundComparisonExpression &comparison, bool is_root) { D_ASSERT(IsGreaterThan(comparison.type) || IsLessThan(comparison.type)); // get the LHS and RHS nodes auto &left_node = GetNode(*comparison.left); @@ -886,14 +886,16 @@ FilterResult FilterCombiner::AddTransitiveFilters(BoundComparisonExpression &com is_successful = true; } if (is_successful) { - // now check for remaining trasitive filters from the left column - auto transitive_filter = FindTransitiveFilter(*comparison.left); - if (transitive_filter != nullptr) { - // try to add transitive filters - if (AddTransitiveFilters(transitive_filter->Cast()) == - FilterResult::UNSUPPORTED) { - // in case of unsuccessful re-add filter into remaining ones - remaining_filters.push_back(std::move(transitive_filter)); + if (is_root) { + // now check for remaining transitive filters from the left column + auto transitive_filter = FindTransitiveFilter(*comparison.left); + if (transitive_filter != nullptr) { + // try to add transitive filters + auto &transitive_cast = transitive_filter->Cast(); + if (AddTransitiveFilters(transitive_cast, false) == FilterResult::UNSUPPORTED) { + // in case of unsuccessful re-add filter into remaining ones + remaining_filters.push_back(std::move(transitive_filter)); + } } } return FilterResult::SUCCESS; diff --git a/src/optimizer/statistics/operator/propagate_join.cpp b/src/optimizer/statistics/operator/propagate_join.cpp index 38dcc278b7bf..d1d7bd5b7a11 100644 --- a/src/optimizer/statistics/operator/propagate_join.cpp +++ b/src/optimizer/statistics/operator/propagate_join.cpp @@ -67,6 +67,21 @@ void StatisticsPropagator::PropagateStatistics(LogicalComparisonJoin &join, uniq break; case FilterPropagateResult::FILTER_ALWAYS_TRUE: // filter is always true + // If this is the inequality for an AsOf join, + // then we must leave it in because it also flags + // the semantics of restricting to a single match + // so we can't replace it with an equi-join on the remaining conditions. + if (join.type == LogicalOperatorType::LOGICAL_ASOF_JOIN) { + switch (condition.comparison) { + case ExpressionType::COMPARE_GREATERTHAN: + case ExpressionType::COMPARE_GREATERTHANOREQUALTO: + case ExpressionType::COMPARE_LESSTHAN: + case ExpressionType::COMPARE_LESSTHANOREQUALTO: + continue; + default: + break; + } + } if (join.conditions.size() > 1) { // there are multiple conditions: erase this condition join.conditions.erase(join.conditions.begin() + i); diff --git a/src/parser/parsed_data/create_view_info.cpp b/src/parser/parsed_data/create_view_info.cpp index fde4a2c8c4bc..792f2547bb09 100644 --- a/src/parser/parsed_data/create_view_info.cpp +++ b/src/parser/parsed_data/create_view_info.cpp @@ -19,6 +19,33 @@ CreateViewInfo::CreateViewInfo(SchemaCatalogEntry &schema, string view_name) : CreateViewInfo(schema.catalog.GetName(), schema.name, std::move(view_name)) { } +string CreateViewInfo::ToString() const { + string result; + + result += "CREATE"; + if (on_conflict == OnCreateConflict::REPLACE_ON_CONFLICT) { + result += " OR REPLACE"; + } + if (temporary) { + result += " TEMPORARY"; + } + result += " VIEW "; + if (schema != DEFAULT_SCHEMA) { + result += KeywordHelper::WriteOptionallyQuoted(schema); + result += "."; + } + result += KeywordHelper::WriteOptionallyQuoted(view_name); + if (!aliases.empty()) { + result += " ("; + result += StringUtil::Join(aliases, aliases.size(), ", ", + [](const string &name) { return KeywordHelper::WriteOptionallyQuoted(name); }); + result += ")"; + } + result += " AS "; + result += query->ToString(); + return result; +} + unique_ptr CreateViewInfo::Copy() const { auto result = make_uniq(catalog, schema, view_name); CopyProperties(*result); diff --git a/src/parser/statement/create_statement.cpp b/src/parser/statement/create_statement.cpp index 514807f0de2a..fbc86c874716 100644 --- a/src/parser/statement/create_statement.cpp +++ b/src/parser/statement/create_statement.cpp @@ -12,4 +12,8 @@ unique_ptr CreateStatement::Copy() const { return unique_ptr(new CreateStatement(*this)); } +string CreateStatement::ToString() const { + return info->ToString(); +} + } // namespace duckdb diff --git a/src/planner/binder/statement/bind_copy.cpp b/src/planner/binder/statement/bind_copy.cpp index 35cc798e936f..251fb01e0474 100644 --- a/src/planner/binder/statement/bind_copy.cpp +++ b/src/planner/binder/statement/bind_copy.cpp @@ -141,12 +141,13 @@ BoundStatement Binder::BindCopyTo(CopyStatement &stmt) { } auto unique_column_names = GetUniqueNames(select_node.names); + auto file_path = stmt.info->file_path; auto function_data = copy_function.function.copy_to_bind(context, *stmt.info, unique_column_names, select_node.types); // now create the copy information - auto copy = make_uniq(copy_function.function, std::move(function_data)); - copy->file_path = stmt.info->file_path; + auto copy = make_uniq(copy_function.function, std::move(function_data), std::move(stmt.info)); + copy->file_path = file_path; copy->use_tmp_file = use_tmp_file; copy->overwrite_or_ignore = overwrite_or_ignore; copy->filename_pattern = filename_pattern; diff --git a/src/planner/operator/logical_copy_to_file.cpp b/src/planner/operator/logical_copy_to_file.cpp index c3654b862f2d..93572634c624 100644 --- a/src/planner/operator/logical_copy_to_file.cpp +++ b/src/planner/operator/logical_copy_to_file.cpp @@ -2,15 +2,89 @@ #include "duckdb/catalog/catalog_entry/copy_function_catalog_entry.hpp" #include "duckdb/function/copy_function.hpp" +#include "duckdb/function/function_serialization.hpp" + +#include "duckdb/common/serializer/serializer.hpp" +#include "duckdb/common/serializer/deserializer.hpp" namespace duckdb { void LogicalCopyToFile::Serialize(Serializer &serializer) const { - throw SerializationException("LogicalCopyToFile not implemented yet"); + LogicalOperator::Serialize(serializer); + serializer.WriteProperty(200, "file_path", file_path); + serializer.WriteProperty(201, "use_tmp_file", use_tmp_file); + serializer.WriteProperty(202, "filename_pattern", filename_pattern); + serializer.WriteProperty(203, "overwrite_or_ignore", overwrite_or_ignore); + serializer.WriteProperty(204, "per_thread_output", per_thread_output); + serializer.WriteProperty(205, "partition_output", partition_output); + serializer.WriteProperty(206, "partition_columns", partition_columns); + serializer.WriteProperty(207, "names", names); + serializer.WriteProperty(208, "expected_types", expected_types); + serializer.WriteProperty(209, "copy_info", copy_info); + + // Serialize function + serializer.WriteProperty(210, "function_name", function.name); + + bool has_serialize = function.serialize; + serializer.WriteProperty(211, "function_has_serialize", has_serialize); + if (has_serialize) { + D_ASSERT(function.deserialize); // if serialize is set, deserialize should be set as well + serializer.WriteObject(212, "function_data", + [&](Serializer &obj) { function.serialize(obj, *bind_data, function); }); + } } unique_ptr LogicalCopyToFile::Deserialize(Deserializer &deserializer) { - throw SerializationException("LogicalCopyToFile not implemented yet"); + auto file_path = deserializer.ReadProperty(200, "file_path"); + auto use_tmp_file = deserializer.ReadProperty(201, "use_tmp_file"); + auto filename_pattern = deserializer.ReadProperty(202, "filename_pattern"); + auto overwrite_or_ignore = deserializer.ReadProperty(203, "overwrite_or_ignore"); + auto per_thread_output = deserializer.ReadProperty(204, "per_thread_output"); + auto partition_output = deserializer.ReadProperty(205, "partition_output"); + auto partition_columns = deserializer.ReadProperty>(206, "partition_columns"); + auto names = deserializer.ReadProperty>(207, "names"); + auto expected_types = deserializer.ReadProperty>(208, "expected_types"); + auto copy_info = + unique_ptr_cast(deserializer.ReadProperty>(209, "copy_info")); + + // Deserialize function + auto &context = deserializer.Get(); + auto name = deserializer.ReadProperty(210, "function_name"); + + auto &func_catalog_entry = + Catalog::GetEntry(context, CatalogType::COPY_FUNCTION_ENTRY, SYSTEM_CATALOG, DEFAULT_SCHEMA, name); + if (func_catalog_entry.type != CatalogType::COPY_FUNCTION_ENTRY) { + throw InternalException("DeserializeFunction - cant find catalog entry for function %s", name); + } + auto &function_entry = func_catalog_entry.Cast(); + auto function = function_entry.function; + // Deserialize function data + unique_ptr bind_data; + auto has_serialize = deserializer.ReadProperty(211, "function_has_serialize"); + if (has_serialize) { + // Just deserialize the bind data + deserializer.ReadObject(212, "function_data", + [&](Deserializer &obj) { bind_data = function.deserialize(obj, function); }); + } else { + // Otherwise, re-bind with the copy info + if (!function.copy_to_bind) { + throw InternalException("Copy function \"%s\" has neither bind nor (de)serialize", function.name); + } + bind_data = function.copy_to_bind(context, *copy_info, names, expected_types); + } + + auto result = make_uniq(function, std::move(bind_data), std::move(copy_info)); + result->file_path = file_path; + result->use_tmp_file = use_tmp_file; + result->filename_pattern = filename_pattern; + result->overwrite_or_ignore = overwrite_or_ignore; + result->per_thread_output = per_thread_output; + result->partition_output = partition_output; + result->partition_columns = partition_columns; + result->names = names; + result->expected_types = expected_types; + + return std::move(result); } idx_t LogicalCopyToFile::EstimateCardinality(ClientContext &context) { diff --git a/src/storage/serialization/serialize_logical_operator.cpp b/src/storage/serialization/serialize_logical_operator.cpp index f64dcab5a95d..ab8dee9874f4 100644 --- a/src/storage/serialization/serialize_logical_operator.cpp +++ b/src/storage/serialization/serialize_logical_operator.cpp @@ -189,6 +189,20 @@ unique_ptr LogicalOperator::Deserialize(Deserializer &deseriali return result; } +void FilenamePattern::Serialize(Serializer &serializer) const { + serializer.WritePropertyWithDefault(200, "base", _base); + serializer.WritePropertyWithDefault(201, "pos", _pos); + serializer.WritePropertyWithDefault(202, "uuid", _uuid); +} + +FilenamePattern FilenamePattern::Deserialize(Deserializer &deserializer) { + FilenamePattern result; + deserializer.ReadPropertyWithDefault(200, "base", result._base); + deserializer.ReadPropertyWithDefault(201, "pos", result._pos); + deserializer.ReadPropertyWithDefault(202, "uuid", result._uuid); + return result; +} + void LogicalAggregate::Serialize(Serializer &serializer) const { LogicalOperator::Serialize(serializer); serializer.WritePropertyWithDefault>>(200, "expressions", expressions); diff --git a/src/storage/storage_info.cpp b/src/storage/storage_info.cpp index e3f0fffc5956..0e124b5ef00f 100644 --- a/src/storage/storage_info.cpp +++ b/src/storage/storage_info.cpp @@ -9,7 +9,8 @@ struct StorageVersionInfo { idx_t storage_version; }; -static StorageVersionInfo storage_version_info[] = {{"v0.8.0 or v0.8.1", 51}, +static StorageVersionInfo storage_version_info[] = {{"v0.9.0 or v0.9.1", 64}, + {"v0.8.0 or v0.8.1", 51}, {"v0.7.0 or v0.7.1", 43}, {"v0.6.0 or v0.6.1", 39}, {"v0.5.0 or v0.5.1", 38}, diff --git a/test/fuzzer/afl/issue_8185.test b/test/fuzzer/afl/issue_8185.test new file mode 100644 index 000000000000..aaa4e46fe566 --- /dev/null +++ b/test/fuzzer/afl/issue_8185.test @@ -0,0 +1,13 @@ +# name: test/fuzzer/afl/issue_8185.test +# description: Issue #8185 - DuckDB binary crashed at duckdb::ExpressionIterator::EnumerateChildren +# group: [afl] + +statement ok +PRAGMA enable_verification + +statement ok +CREATE TABLE v0 ( v1 INTEGER , v2 INTEGER) ; + +query II +SELECT * FROM v0 WHERE v2 <= 2 AND v2 <= v1 AND v1 < v2 ORDER BY v1 DESC ; +---- diff --git a/test/fuzzer/pedro/art_prefix_error.test b/test/fuzzer/pedro/art_prefix_error.test index 158ce59ae605..3dfaf266cb13 100644 --- a/test/fuzzer/pedro/art_prefix_error.test +++ b/test/fuzzer/pedro/art_prefix_error.test @@ -8,4 +8,4 @@ CREATE TABLE t0 (c0 BLOB PRIMARY KEY); statement error INSERT INTO t0(c0) VALUES (BLOB '\x00a'), (BLOB ''); ---- -Not implemented Error: Indexes cannot contain BLOBs that contain null-terminated bytes. \ No newline at end of file +ART indexes cannot contain BLOBs with zero bytes. \ No newline at end of file diff --git a/test/fuzzer/sqlsmith/strptime_null_statistics.test b/test/fuzzer/sqlsmith/strptime_null_statistics.test new file mode 100644 index 000000000000..279a3f0d1998 --- /dev/null +++ b/test/fuzzer/sqlsmith/strptime_null_statistics.test @@ -0,0 +1,12 @@ +# name: test/fuzzer/sqlsmith/strptime_null_statistics.test +# description: have you seen the fnords? +# group: [sqlsmith] + +statement ok +create table all_types as select * exclude(small_enum, medium_enum, large_enum) from test_all_types() limit 0; + +statement error +SELECT (COLUMNS(list_filter(*, (c6 -> strptime(c6, TRY_CAST(c3 AS BIGINT))))) BETWEEN c3 AND 6509) +FROM duckdb_databases() AS t5(c1, c2, c3, c4) +---- +Binder Error: Star expression diff --git a/test/sql/catalog/view/test_view.test b/test/sql/catalog/view/test_view.test index d1979f04afe0..0a34542e58ec 100644 --- a/test/sql/catalog/view/test_view.test +++ b/test/sql/catalog/view/test_view.test @@ -13,10 +13,14 @@ statement ok INSERT INTO t1 VALUES (41), (42), (43) statement ok -CREATE VIEW v1 AS SELECT i AS j FROM t1 WHERE i < 43 +CREATE VIEW v1 AS SELECT + i AS j +FROM t1 WHERE i < 43 statement error CREATE VIEW v1 AS SELECT 'whatever' +---- +Catalog Error: View with name "v1" already exists! query I SELECT j FROM v1 WHERE j > 41 diff --git a/test/sql/catalog/view/test_view_schema_change.test b/test/sql/catalog/view/test_view_schema_change.test index ba2bb4f7105d..07567620c7be 100644 --- a/test/sql/catalog/view/test_view_schema_change.test +++ b/test/sql/catalog/view/test_view_schema_change.test @@ -68,3 +68,12 @@ query I SELECT * FROM v1 ---- +# Changing the types of the table that the view references also makes the view unusable + +statement ok +ALTER TABLE t1 ALTER i TYPE VARCHAR; + +statement error +select * from v1; +---- +Binder Error: Contents of view were altered: types don't match! diff --git a/test/sql/catalog/view/test_view_sql.test b/test/sql/catalog/view/test_view_sql.test new file mode 100644 index 000000000000..6419dc3ee766 --- /dev/null +++ b/test/sql/catalog/view/test_view_sql.test @@ -0,0 +1,92 @@ +# name: test/sql/catalog/view/test_view_sql.test +# description: Test behavior of 'sql' on various different views +# group: [view] + +statement ok +PRAGMA enable_verification + +statement ok +create schema my_schema; + +# X contains columns `a` and `y` +statement ok +CREATE VIEW my_schema.X (a) AS SELECT 'x' as x, 'y' as y; + +query I +select trim(sql, chr(10)) from duckdb_views() where internal = false; +---- +CREATE VIEW my_schema.X (a, y) AS SELECT 'x' AS x, 'y' AS y; + +statement ok +alter view my_schema.X rename to Y; + +# Properly renamed to Y +query I +select trim(sql, chr(10)) from duckdb_views() where internal = false; +---- +CREATE VIEW my_schema.Y (a, y) AS SELECT 'x' AS x, 'y' AS y; + +statement ok +drop schema my_schema cascade; + +query I +select trim(sql, chr(10)) from duckdb_views() where internal = false; +---- + +statement ok +create table tbl ( + a integer, + b varchar +) + +statement ok +create view vw as select * from tbl; + +# sql is not affected by the column names of the table +query I +select trim(sql, chr(10)) from duckdb_views() where internal = false; +---- +CREATE VIEW vw (a, b) AS SELECT * FROM tbl; + +statement ok +alter table tbl rename column b to x; + +# sql is not affected by the column names of the table +query I +select trim(sql, chr(10)) from duckdb_views() where internal = false; +---- +CREATE VIEW vw (a, b) AS SELECT * FROM tbl; + +statement ok +create or replace view vw (c1, c2) as select * from tbl; + +statement ok +create or replace table "table name" ( + "column name 1" integer, + "column name 2" varchar +) + +statement ok +create or replace view "view name" as select * from "table name"; + +statement ok +drop view vw; + +query I +select trim(sql, chr(10)) from duckdb_views() where internal = false; +---- +CREATE VIEW "view name" ("column name 1", "column name 2") AS SELECT * FROM "table name"; + +statement ok +drop view "view name" + +statement ok +create schema "schema name"; + +statement ok +CREATE VIEW "schema name"."view name" ("other name 1", "column name 2") AS SELECT * FROM "table name"; + +query I +select trim(sql, chr(10)) from duckdb_views() where internal = false; +---- +CREATE VIEW "schema name"."view name" ("other name 1", "column name 2") AS SELECT * FROM "table name"; diff --git a/test/sql/copy/csv/data/test/struct_padding.csv b/test/sql/copy/csv/data/test/struct_padding.csv new file mode 100644 index 000000000000..97ccc1f883ee --- /dev/null +++ b/test/sql/copy/csv/data/test/struct_padding.csv @@ -0,0 +1,15 @@ +"{'val':x}" +"{'val':x }" +"{'val': x}" +"{'val': x }" +"{'val':'y'}" +"{'val':'y' }" +"{'val': 'y'}" +"{'val': 'y' }" +"{'val':''}" +"{'val':'' }" +"{'val': ''}" +"{'val': '' }" +"{'val':}" +"{'val': }" +"{'val': }" diff --git a/test/sql/copy/csv/struct_padding.test b/test/sql/copy/csv/struct_padding.test new file mode 100644 index 000000000000..89e1859bfed6 --- /dev/null +++ b/test/sql/copy/csv/struct_padding.test @@ -0,0 +1,25 @@ +# name: test/sql/copy/csv/struct_padding.test +# description: Verify that whitespace padding in struct VARCHAR fields are properly removed +# group: [csv] + +statement ok +PRAGMA enable_verification + +query I +SELECT * FROM read_csv('test/sql/copy/csv/data/test/struct_padding.csv', columns={'col': 'STRUCT(val VARCHAR)'}) ORDER BY 1; +---- +{'val': } +{'val': } +{'val': } +{'val': } +{'val': } +{'val': } +{'val': } +{'val': x} +{'val': x} +{'val': x} +{'val': x} +{'val': y} +{'val': y} +{'val': y} +{'val': y} diff --git a/test/sql/index/art/issues/test_art_fuzzer.test b/test/sql/index/art/issues/test_art_fuzzer.test index 73a40a0cc0d0..80b550db7c3e 100644 --- a/test/sql/index/art/issues/test_art_fuzzer.test +++ b/test/sql/index/art/issues/test_art_fuzzer.test @@ -30,7 +30,7 @@ CREATE INDEX i2 ON t2 (c1); statement error INSERT INTO t2 VALUES (decode('g\x00'::BLOB)::VARCHAR),('g'); ---- -Not implemented Error: Indexes cannot contain BLOBs that contain null-terminated bytes. +ART indexes cannot contain BLOBs with zero bytes. statement ok INSERT INTO t2 VALUES ('\0'); @@ -92,7 +92,7 @@ CREATE INDEX i21 ON t21 (c1, "decode"('\x00'::BLOB)); statement error INSERT INTO t21 VALUES (1); ---- -Not implemented Error: Indexes cannot contain BLOBs that contain null-terminated bytes. +ART indexes cannot contain BLOBs with zero bytes. statement error CREATE INDEX i21 ON t21 (c1); \ No newline at end of file diff --git a/test/sql/join/asof/test_asof_join.test b/test/sql/join/asof/test_asof_join.test index 82651457f383..762d57936dbc 100644 --- a/test/sql/join/asof/test_asof_join.test +++ b/test/sql/join/asof/test_asof_join.test @@ -17,6 +17,26 @@ INSERT INTO events0 VALUES (8, 3) ; +# Prevent optimiser from removing true inequalities +statement ok +create table prices("when" timestamp, symbol int, price int); + +statement ok +insert into prices values ('2020-01-01 00:00:00', 1, 42); + +statement ok +create table trades("when" timestamp, symbol int); + +statement ok +insert into trades values ('2020-01-01 00:00:03', 1); + +query III +SELECT t.*, p.price +FROM trades t ASOF JOIN prices p + ON t.symbol = p.symbol AND t.when >= p.when; +---- +2020-01-01 00:00:03 1 42 + # Use an ASOF join inside of a correlated subquery diff --git a/test/sql/pg_catalog/sqlalchemy.test b/test/sql/pg_catalog/sqlalchemy.test index 8d910cc6589d..c1775bd65ff5 100644 --- a/test/sql/pg_catalog/sqlalchemy.test +++ b/test/sql/pg_catalog/sqlalchemy.test @@ -158,7 +158,7 @@ JOIN pg_namespace n ON n.oid = c.relnamespace WHERE n.nspname = 'myschema' AND c.relname = 'v1' AND c.relkind IN ('v', 'm') ---- -CREATE VIEW myschema.v1 AS SELECT 42; +CREATE VIEW myschema.v1 ("42") AS SELECT 42; # get_columns query IIII diff --git a/test/sql/storage/null_byte_storage.test b/test/sql/storage/null_byte_storage.test index c4f753e4f8ab..0ac0643c07df 100644 --- a/test/sql/storage/null_byte_storage.test +++ b/test/sql/storage/null_byte_storage.test @@ -30,7 +30,7 @@ goo\042 statement error CREATE INDEX i_index ON null_byte(v) ---- -Not implemented Error: Indexes cannot contain BLOBs that contain null-terminated bytes. +ART indexes cannot contain BLOBs with zero bytes. query I SELECT * FROM null_byte WHERE v=concat('goo', chr(0), 42) diff --git a/test/sql/table_function/duckdb_columns.test b/test/sql/table_function/duckdb_columns.test index 6b03d66f1db3..f9dc3c3a18f5 100644 --- a/test/sql/table_function/duckdb_columns.test +++ b/test/sql/table_function/duckdb_columns.test @@ -26,3 +26,59 @@ memory integers i INTEGER NULL True 32 2 0 memory test i INTEGER NULL False 32 2 0 memory test j DECIMAL(18,3) NULL True 18 10 3 memory test k VARCHAR 'hello' True NULL NULL NULL + +statement ok +create view v1 as select * from test + +query II +select table_name, column_name from duckdb_columns where table_name = 'v1' +---- +v1 i +v1 j +v1 k + +statement ok +alter table test rename column j to renamed + +# Rename of the base table is not reflected in the view's info +query II +select table_name, column_name from duckdb_columns where table_name = 'v1' +---- +v1 i +v1 j +v1 k + +statement ok +alter table test rename column renamed to j + +statement ok +create or replace view v1 (a, b) as select * from test; + +query II +select table_name, column_name from duckdb_columns where table_name = 'v1' +---- +v1 a +v1 b +v1 k + +statement ok +alter table test rename column j to renamed + +# The rename of 'j' is not reflected in the view's info because it was aliased to 'b' +query II +select table_name, column_name from duckdb_columns where table_name = 'v1' +---- +v1 a +v1 b +v1 k + +statement ok +alter table test rename column k to not_k + +# The rename of 'k' is also not reflected in the view's info even though it was not aliased +query II +select table_name, column_name from duckdb_columns where table_name = 'v1' +---- +v1 a +v1 b +v1 k diff --git a/test/sql/types/nested/list/list_aggregates.test b/test/sql/types/nested/list/list_aggregates.test index bc5e4c3f25c7..76f3b0196aa9 100644 --- a/test/sql/types/nested/list/list_aggregates.test +++ b/test/sql/types/nested/list/list_aggregates.test @@ -122,3 +122,11 @@ select i, i % 2, list(i) over(partition by i % 2 order by i rows between 1 prece 5 1 [3, 5, 7] 7 1 [5, 7, 9] 9 1 [7, 9] + +# parameter not resolved issue (#484) + +statement ok +PREPARE rebind_stmt AS SELECT list(list_value({'foo': [?]})); + +statement ok +EXECUTE rebind_stmt(10); diff --git a/test/sql/types/union/struct_to_union.test b/test/sql/types/union/struct_to_union.test index df6be2fd3422..d634f027bd67 100644 --- a/test/sql/types/union/struct_to_union.test +++ b/test/sql/types/union/struct_to_union.test @@ -53,7 +53,7 @@ statement ok INSERT INTO struct_tbl VALUES (ROW(0, True, NULL, NULL)), (ROW(1, NULL, 23423, NULL)), - (ROW(0, True, NULL, NULL)) + (ROW(0, True, NULL, NULL)) # Verify case-insensitive statement ok diff --git a/test/sql/types/union/union_cast.test b/test/sql/types/union/union_cast.test index e86b8ec66d52..b3697b48b839 100644 --- a/test/sql/types/union/union_cast.test +++ b/test/sql/types/union/union_cast.test @@ -176,8 +176,29 @@ query III SELECT union_tag(u), union_tag(u::UNION(i SMALLINT, b INT)), u::UNION(i SMALLINT, b INT) FROM tbl4 ORDER BY ALL;; ---- NULL NULL NULL -NULL NULL NULL i i 1 i i 3 b b NULL b b NULL +b b NULL + +# DuckDB internal issue #477 +query I +select [ + true::UNION( + a integer, + c bool + ) +] as a from range(10); +---- +[true] +[true] +[true] +[true] +[true] +[true] +[true] +[true] +[true] +[true] + diff --git a/test/sql/types/union/union_validity.test b/test/sql/types/union/union_validity.test new file mode 100644 index 000000000000..bedc8943a651 --- /dev/null +++ b/test/sql/types/union/union_validity.test @@ -0,0 +1,24 @@ +# name: test/sql/types/union/union_validity.test +# group: [union] + +statement ok +CREATE TABLE tbl (u UNION(a INT, b VARCHAR)); + +statement ok +INSERT INTO tbl VALUES (1), (NULL), (NULL::VARCHAR), (NULL::INT); + +statement ok +DELETE FROM tbl + +statement ok +INSERT INTO tbl VALUES (1), (NULL), (NULL::VARCHAR), (NULL::INT); + +query II rowsort +SELECT union_tag(u) as tag, u as val FROM tbl; +---- +NULL NULL +a 1 +a NULL +b NULL + + diff --git a/test/sql/window/test_dense_rank.test b/test/sql/window/test_dense_rank.test index 2454d43f3204..48da63759046 100644 --- a/test/sql/window/test_dense_rank.test +++ b/test/sql/window/test_dense_rank.test @@ -34,3 +34,180 @@ SELECT COUNT(*), MIN(d), MAX(d), MIN(c), MAX(c) FROM w ---- 50 1 50 180 180 + +statement ok +CREATE TABLE issue9416(idx VARCHAR, source VARCHAR, project VARCHAR, specimen VARCHAR, sample_id VARCHAR); + +statement ok +INSERT INTO issue9416 VALUES + ('197bc9528efbc76a523d796b749a69f6','json','0bf0b46fb9c01829c55e','e4de2878',NULL), + ('0f62e5fa923495012f3863e7ea05f566','json','d98171d6fe06b3','440ce2bf','9fc93ee404d6bccb69'), + ('9b15a709814582ecbec00d8397852865','json','24ed1657','c3d1f46c','06c234e260a7484'), + ('8569d67b0ccbbf316b360be3bb4fe418','json','d98171d6fe06b3','14875a37','3416100f300c7bd'), + ('d2f02b24d59696079e3d649b403fbb22','json','82e092e750a','e7deeb7f','6d8dded6f044'), + ('60a2b8898440b2f4531268e27b7d3495','json','f657d34b6','46afa8e7','7bb186ce013b'), + ('5aa1982136f3991ad5ed537793d40a0f','json','d58e2b351518','a98b2b0c','ac594941b5d9'), + ('cc70cc91af828c833b5bf20785b91918','json','4953ff4b','8450467c','d1509d0abde0'), + ('7cf99d6372183aab6a9a22205e1b0e96','json','14b5b262c52400','e7deeb7f','6d8dded6f044'), + ('3cf6f7ec6609c6456f6587c441310ace','json','0c155a1ba5','e984dd5b','d374014b756d'), + ('e1223eb2cc51161d32495ff0ad8b34ae','json','f05964c5c4','4f3354c3','10eebe991cf9'), + ('7b8b64bac7c7dc692d1fe76f6eeff2bb','json','6bd9ce7f1d8','492f260c','314d3e061be7'), + ('7b8b64bac7c7dc692d1fe76f6eeff2bb','json','249f40c5d97','492f260c','314d3e061be7'), + ('cd29186ef73b658079d848fca1ebe839','json','6bd9ce7f1d8','492f260c','314d3e061be7'), + ('d67a74eb29392e06b97161d9f3121705','json','0bf0b46fb9c01829c55e','2d28e9ee','0deb6a6b189d309'), + ('9dcc686429408e3319161c39b008f705','json','24ed1657','8450467c','d1509d0abde0'), + ('11788bb5a0477c1bfb31a9f33af80c40','json','4ea4e97b39c4b','4f3354c3','10eebe991cf9'), + ('59bceab687b4004dbeed6291f07af37d','json','6d00cb7409','4f3354c3','10eebe991cf9'), + ('76b23210847e653b6060104da0e86d5b','json','24ed1657','22f4528f','7632cf8f4849404'), + ('2605143ff05ae1ce66b1cc70f80fe53d','json','249f40c5d97','b93c50ce','be70d8b88fff'), + ('1a1864b5f4ed27acfbbf6e5069348a5a','json','6bd9ce7f1d8','6c082f61','ee109745d498'), + ('92200c3306e18e53a41550c6306a3ee4','json','8271fea91bc236c','d6f24fd5',NULL), + ('dd39e08b282cf4a6429bcfefaa2af071','json','249f40c5d97','1396d8b6','3826343264acc9'), + ('3afcae2318313f112b62536fa160678d','json','24ed1657','5311f290','129c0a0fd3e82a8'), + ('3afcae2318313f112b62536fa160678d','json','24ed1657','08b8de7c543d','129c0a0fd3e82a8'), + ('3afcae2318313f112b62536fa160678d','json','24ed1657','ce001fa3a2a4','129c0a0fd3e82a8'), + ('79acd6669071e95a5b2fe5456216ab32','json','24ed1657','5311f290','129c0a0fd3e82a8'), + ('79acd6669071e95a5b2fe5456216ab32','json','24ed1657','08b8de7c543d','129c0a0fd3e82a8'), + ('79acd6669071e95a5b2fe5456216ab32','json','24ed1657','ce001fa3a2a4','129c0a0fd3e82a8'), + ('d0a7e9d3eda115120021a895a81db8be','json','24ed1657','5311f290','129c0a0fd3e82a8'), + ('d0a7e9d3eda115120021a895a81db8be','json','24ed1657','08b8de7c543d','129c0a0fd3e82a8'), + ('d0a7e9d3eda115120021a895a81db8be','json','24ed1657','ce001fa3a2a4','129c0a0fd3e82a8'), + ('a59a3a4ad8d2ab867c9b830974588645','json','24ed1657','5311f290','129c0a0fd3e82a8'), + ('a59a3a4ad8d2ab867c9b830974588645','json','24ed1657','08b8de7c543d','129c0a0fd3e82a8'), + ('a59a3a4ad8d2ab867c9b830974588645','json','24ed1657','ce001fa3a2a4','129c0a0fd3e82a8'), + ('6193ffd18b0da96e80e2a38baac9a7e4','json','14b5b262c52400','3c03d64c34','1b5cfdd6a5de'), + ('6193ffd18b0da96e80e2a38baac9a7e4','rprt','1a5cf3833',NULL,'1b5cfdd6a5de'), + ('ecf1739fed72151784dab88dbe2f2aa9','json','14b5b262c52400','3c03d64c34','1b5cfdd6a5de'), + ('ecf1739fed72151784dab88dbe2f2aa9','rprt','1a5cf3833',NULL,'1b5cfdd6a5de'), + ('204cd9b011e2cab64bcdf1b3d668a9ef','json','7d9a79908fcc','8274fbb94a','5a928f187ed19b2'), + ('c8360bd0e28ea5bbffd66e76886bbccb','json','d6b3921920','a63d8','0e06e1f9f6580fb'), + ('c8360bd0e28ea5bbffd66e76886bbccb','rprt','d6b3921920',NULL,'0e06e1f9f6580fb'), + ('c6eb00fb5a023557439a9b898c7cc3ea','json','d6b3921920','a63d8','f891b965f2561d9'), + ('c6eb00fb5a023557439a9b898c7cc3ea','rprt','d6b3921920',NULL,'f891b965f2561d9'), + ('f2dacff642ad3f805229d7e976810f1d','rprt','d6b3921920',NULL,'6817ec9d3b7b726'), + ('8def2cd0450b56c3e0c9bb6da67b842b','rprt','d6b3921920',NULL,'6817ec9d3b7b726'), + ('6db7ef8b4a9e41bb41971dced546131b','rprt','d6b3921920',NULL,'bc32d9059dde8ba'), + ('4524efca2bf1aa0565f03a9aaf9771d2','json','14b5b262c52400','cf3b1945e2','5c0157ef5367'), + ('6f63a84401944c32b9a329af08d6473c','json','8b736466c7adc6','d0acb13cd9','d734a9d755ef6276'), + ('8ef4bc6ac39585b2ec45218ad1d06464','rprt','67b7fd541ae7e','c117f7db3b','cf94993616ef'), + ('01899ea72c60bd5e614132c99fffd48e','json','14b5b262c52400','2a50feb98b','eead79cf6ef0'), + ('b1407bdda20fad91cb9072c08c5c23a8','json','3608008ba4c9','e4840a8e75','139e04ae890beb8'), + ('2091d4939af33d3911b057ed446367f1','rprt','6522e2c00f5b87d5b','f2b8d4d02d','23de2ff19778'), + ('81f36975a777a353b0861874e03d0f95','rprt','14b5b262c52400','e05f1a1ec2','acf577df3840'), + ('5b3961bf4255e83ee1e7e795e14c8119','rprt','b9cbf09f3366297','1109e52066','47afce7dacb5'), + ('0b53312f91b22db1bf7c18251a199d36','json','14b5b262c52400','c8561fe22f','6e30638eaaf6'), + ('e277322f26cd477bae52240c46678286','json','14b5b262c52400','d185c22b68','42a062d827c'), + ('71150d87b4e7852448a524e03817efc4','json','bb87c32c765d1','3e60145162','8e072527a7cf82d'), + ('1039fc7de3c12dad1e7d3bd9e73827de','json','d6b3921920','65c0a3e2a9','9ff56f55c850390'), + ('3c67c976516f8a5a1044ad9a8935cf02','rprt','bdc5a7fd6ca','dfc9fd824b','bfbef96674e73829'), + ('56ab3e25a40913b6e961cff217a83750','json','24ed1657','724e7df1','fc81c8a39465'), + ('5f726fff8b638d0ac1ba9dcb9a4037be','json','14b5b262c52400','b1bddeb160','1e2b4afd36f'), + ('4448f84ff7496b6d1a0d311838be238d','json','14b5b262c52400','50a45c4db0','83ef23a7f827'), + ('216414a29307f00aecfc9e5cba7ac821','json','c05bced980e6381','949ae57ce4','05f77bf546f'), + ('5327f9ec2dc334bde0222b52de4d65ef','json','67b7fd541ae7e','8aea85ada0','c0048c2b539e'), + ('d3c9b836ce61a53daf39d813c97a36b9','json','249f40c5d97','35d05d68','db4853c8a41'), + ('6d4affc7041c65d0f56551f4d4370a7d','rprt','d6b3921920',NULL,'84624aa9753a681'), + ('caab5b21770a321067fb2052c2eea792','rprt','14b5b262c52400','8b8da80085','d427763bd611'), + ('eecdf1e7e87c04c56328b0d37fb06349','json','14b5b262c52400','3bfabeb9d9','7c613b2d73cc'), + ('b533aa0c674433a09cee8555b35b7ca6','json','9c3b3335f959','f9d8c52aa2','d082926c94a8a60'), + ('6dfe749835d6a895a3a116580dc4217a','rprt','14b5b262c52400','6dd9b2d650','b3d88f29e3e5'), + ('6150133032c53a35ce28c6308931137d','rprt','b04a2a75f0c4a9','9f3026e2','a1bcb7232a50'), + ('47e77fd2d027114df5ac9daa17237934','json','b04a2a75f0c4a9','6e07291b','564347d748e0'), + ('0d66c06fd2a29247b4bc798591f15cbc','rprt','093a316f6c9c0856b','ebcca53e20','4c767b833785b25b072e'), + ('997371252646aed7ac3fa43da1f69ef2','rprt','d6b3921920','5be2b052','612fc8691ec7852'), + ('e963d96d34e35ba06cea05ff78e84e41','json','33debfe262d7','114a0c85','f6d1ea3976b0a03'), + ('f50959f1079cd24b7dcb6370d8e63344','rprt','1a5cf3833',NULL,'a1b77be48d05'), + ('4e44d4c96d3d26290d13e5f9bc14d8dd','rprt','67b7fd541ae7e','c117f7db3b','18d653ec3c0'), + ('797c887ce1edab55fefaa7a690065843','rprt','14b5b262c52400','22efccc05e','7a9348e1538f'), + ('ffbc9337bb6f6c7d43ab32a9398474da','rprt','b04a2a75f0c4a9','6e07291b','564347d748e0'), + ('3ac840afe9d088e5c490ed4cd48d2269','json','67b7fd541ae7e','ffaa35275c','c58867f82d10'), + ('72cedda51ecfb6678f4e3a3956066311','json','402423768220bca1f','9a28c664','eed0f9697609'), + ('92488464899a3b31ea1bc61a2ebc2013','json','14b5b262c52400','1a10cdadd7','ee4cacc7ce10'), + ('c85e95cfec9f42fff138d498101cd7ea','json','14b5b262c52400','f1b4cf931e','3b4f71a3ddde'), + ('399edac903f69ac760fa36a8b68cdfb0','rprt','67b7fd541ae7e','a539fb31c3','4c920da298bd'), + ('a223c0e6017570f5a1039003e767e692','json','67b7fd541ae7e','7bfb6b3721','5ae5c617d126'), + ('1503860c3c6391385807ab9b6cdd1934','json','67b7fd541ae7e','4936ad40b0','94fbcb7cd167'), + ('6f269d7f6cf850a9cd0d4d804eef24a0','rprt','14b5b262c52400','943c04e54b','cc79fc503d80'), + ('732a12aa44489aeef05b614a1e8dbd2d','rprt','14b5b262c52400','a2335b4159','45e7e30aa621'), + ('b876617f4b7bdb3abc694da31b09d838','rprt','14b5b262c52400','93a91bf863','b824ed7a5f67'), + ('fd63b4bf7ee546b2c0c55200ae968872','rprt','67b7fd541ae7e','62cd05887b','9c1940a4032a'), + ('50a00a903778fb65ef92a736bd9fe262','json','67b7fd541ae7e','7e81c8b2f3','00eb98252668113'), + ('053891bc9d52d48986302c5e13adf276','json','67b7fd541ae7e','a1762f3d79','e06b767a6ed2'), + ('f537b4d753bb441436ff8d73af423691','json','efdfcef7da0','98c6db64','4c9b34c566ae'), + ('8dc4f5e5bb2663f09218b369be5bf524','rprt','03b000865c98e','f31af55c63','a02983ae108ced0449cb4'), + ('d69d899aba162c4f14593f9c6a062bdd','rprt','67b7fd541ae7e','7bfb6b3721','c32aa62b7207'), + ('88b784ce065a5cf2360e7616c4b3f7f6','json','62769691cd4e','ebdf919e','37e16f2e5319832f'), + ('cdda71f56ad05dae20b1e22ee19b227b','rprt','67b7fd541ae7e','17d7c8f29a','21c1f8fadde3'), + ('5e1d22685085f0d85553eb2b7b4155a6','rprt','14b5b262c52400','737542af23','092dcc6fdef7'), + ('7f387dad4f9bef7c2301977590cec0f2','json','67b7fd541ae7e','09591aea45','ab584388528d'), + ('a83145a960baebcf1bff9c462f8489e5','json','14b5b262c52400','2ea6e3e6fb','4fe26b0e2203'), + ('4d7a36c58267592481297676d57c9e84','json','581d813a840d3d6391','54239e1a8e','232d99055474'), + ('31f0b71e67e64d42079098a53374e094','json','14b5b262c52400','a43ad72889','7d702f310fbe'), + ('6ddc75b9771136d9a6366aaa5d951f1a','rprt','14b5b262c52400','75263a6f0c','7309f2e8695a'), + ('4595e59a1225042680842f63736481d4','json','14b5b262c52400','44f5fdb8b1','9c2c14ec6924'), + ('66c1f24117ee34a1b3d587a22047fad1','rprt','14b5b262c52400','bfe39ca56f','b49aa5fad4d5'), + ('37027bc152a681b87d5ffb9a37c325a5','rprt','efdfcef7da0','9f1668a8','682ff39acb86'), + ('c3b46edd87eb14842b6444c001ae6456','json','3608008ba4c9','f650844d13','5326d2a94e28825'), + ('82beaa8e1c8c482d792f601b37a40b8a','rprt','14b5b262c52400','c86e0093c2','5ae33221b17'), + ('333aa3a45ab3f01ad95b2a312870aa1e','json','14b5b262c52400','57f7ec0030','8545146eeba5'), + ('16b64ffcb514bf69c6936eaf4e86889e','rprt','14b5b262c52400','2a50feb98b','64bb80701037'), + ('01d42ee5515c3b500018e723278e27c1','json','67b7fd541ae7e','958967a48a','97453818ba51'), + ('2a031d3176c7d4f19c532e5d2e7b411e','rprt','14b5b262c52400','164c3bb214','3389fe2776be'), + ('8d3b5d415e43df82b6b560effeb6ee80','json','67b7fd541ae7e','9205577d7c','bc96b93082c6'), + ('339690825234f32fd7da02fd567d5109','rprt','b04a2a75f0c4a9','9f3026e2','a1bcb7232a50'), + ('13c6d4555db02b653d8f2b5ce06bb143','json','402423768220bca1f','49d58dba','59b0906f7fcb'), + ('39a39a7e3c48c1b3b262e8653b1a3ec4','rprt','14b5b262c52400','7b34590a85','eec88226d871'), + ('fdd9d71a087b9048b8ac7dd29186cedf','rprt','315316c7af745a97','8a7c0917d4','743680a0303171bbd'), + ('f37e684c9ec0d0690a3c6feeaf6b1301','json','14b5b262c52400','0059c84703','8426f8984729'), + ('3787d0c9ead3866324d7586044747d65','rprt','b9cbf09f3366297','3822b4212e','611f4b0f498e') +; + +# dup n_dup n_spcmn idx source project specimen sample_id +query IIIIIIII +WITH dups AS ( + SELECT ROW_NUMBER() OVER same_idx AS dup + , COUNT(*) OVER same_idx AS n_dup + , (DENSE_RANK() OVER asc_spcmn) + (DENSE_RANK() OVER desc_spcmn) - 1 AS n_spcmn + , * + FROM issue9416 + WINDOW same_idx AS ( + PARTITION BY idx + ORDER BY source, project, specimen + ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + ) + , asc_spcmn AS ( + PARTITION BY idx + ORDER BY specimen ASC NULLS FIRST + ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + ) + , desc_spcmn AS ( + PARTITION BY idx + ORDER BY specimen DESC NULLS LAST + ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + ) +) +SELECT * +FROM dups +WHERE n_spcmn > 1 +ORDER BY idx, dup; +---- +1 3 3 3afcae2318313f112b62536fa160678d json 24ed1657 08b8de7c543d 129c0a0fd3e82a8 +2 3 3 3afcae2318313f112b62536fa160678d json 24ed1657 5311f290 129c0a0fd3e82a8 +3 3 3 3afcae2318313f112b62536fa160678d json 24ed1657 ce001fa3a2a4 129c0a0fd3e82a8 +1 2 2 6193ffd18b0da96e80e2a38baac9a7e4 json 14b5b262c52400 3c03d64c34 1b5cfdd6a5de +2 2 2 6193ffd18b0da96e80e2a38baac9a7e4 rprt 1a5cf3833 NULL 1b5cfdd6a5de +1 3 3 79acd6669071e95a5b2fe5456216ab32 json 24ed1657 08b8de7c543d 129c0a0fd3e82a8 +2 3 3 79acd6669071e95a5b2fe5456216ab32 json 24ed1657 5311f290 129c0a0fd3e82a8 +3 3 3 79acd6669071e95a5b2fe5456216ab32 json 24ed1657 ce001fa3a2a4 129c0a0fd3e82a8 +1 3 3 a59a3a4ad8d2ab867c9b830974588645 json 24ed1657 08b8de7c543d 129c0a0fd3e82a8 +2 3 3 a59a3a4ad8d2ab867c9b830974588645 json 24ed1657 5311f290 129c0a0fd3e82a8 +3 3 3 a59a3a4ad8d2ab867c9b830974588645 json 24ed1657 ce001fa3a2a4 129c0a0fd3e82a8 +1 2 2 c6eb00fb5a023557439a9b898c7cc3ea json d6b3921920 a63d8 f891b965f2561d9 +2 2 2 c6eb00fb5a023557439a9b898c7cc3ea rprt d6b3921920 NULL f891b965f2561d9 +1 2 2 c8360bd0e28ea5bbffd66e76886bbccb json d6b3921920 a63d8 0e06e1f9f6580fb +2 2 2 c8360bd0e28ea5bbffd66e76886bbccb rprt d6b3921920 NULL 0e06e1f9f6580fb +1 3 3 d0a7e9d3eda115120021a895a81db8be json 24ed1657 08b8de7c543d 129c0a0fd3e82a8 +2 3 3 d0a7e9d3eda115120021a895a81db8be json 24ed1657 5311f290 129c0a0fd3e82a8 +3 3 3 d0a7e9d3eda115120021a895a81db8be json 24ed1657 ce001fa3a2a4 129c0a0fd3e82a8 +1 2 2 ecf1739fed72151784dab88dbe2f2aa9 json 14b5b262c52400 3c03d64c34 1b5cfdd6a5de +2 2 2 ecf1739fed72151784dab88dbe2f2aa9 rprt 1a5cf3833 NULL 1b5cfdd6a5de diff --git a/tools/jdbc/src/jni/duckdb_java.cpp b/tools/jdbc/src/jni/duckdb_java.cpp index 5f2e5856835d..1a2fede07eef 100644 --- a/tools/jdbc/src/jni/duckdb_java.cpp +++ b/tools/jdbc/src/jni/duckdb_java.cpp @@ -306,9 +306,14 @@ static Connection *get_connection(JNIEnv *env, jobject conn_ref_buf) { //! The database instance cache, used so that multiple connections to the same file point to the same database object duckdb::DBInstanceCache instance_cache; +static const char *const JDBC_STREAM_RESULTS = "jdbc_stream_results"; jobject _duckdb_jdbc_startup(JNIEnv *env, jclass, jbyteArray database_j, jboolean read_only, jobject props) { auto database = byte_array_to_string(env, database_j); DBConfig config; + config.AddExtensionOption( + JDBC_STREAM_RESULTS, + "Whether to stream results. Only one ResultSet on a connection can be open at once when true", + LogicalType::BOOLEAN); if (read_only) { config.options.access_mode = AccessMode::READ_ONLY; } @@ -555,7 +560,11 @@ jobject _duckdb_jdbc_execute(JNIEnv *env, jclass, jobject stmt_ref_buf, jobjectA } } - res_ref->res = stmt_ref->stmt->Execute(duckdb_params, false); + Value result; + bool stream_results = + stmt_ref->stmt->context->TryGetCurrentSetting(JDBC_STREAM_RESULTS, result) ? result.GetValue() : false; + + res_ref->res = stmt_ref->stmt->Execute(duckdb_params, stream_results); if (res_ref->res->HasError()) { string error_msg = string(res_ref->res->GetError()); res_ref->res = nullptr; diff --git a/tools/jdbc/src/main/java/org/duckdb/DuckDBDriver.java b/tools/jdbc/src/main/java/org/duckdb/DuckDBDriver.java index 4fb62773d6be..646d48fa0596 100644 --- a/tools/jdbc/src/main/java/org/duckdb/DuckDBDriver.java +++ b/tools/jdbc/src/main/java/org/duckdb/DuckDBDriver.java @@ -10,7 +10,8 @@ public class DuckDBDriver implements java.sql.Driver { - static final String DUCKDB_READONLY_PROPERTY = "duckdb.read_only"; + public static final String DUCKDB_READONLY_PROPERTY = "duckdb.read_only"; + public static final String JDBC_STREAM_RESULTS = "jdbc_stream_results"; static { try { diff --git a/tools/jdbc/src/test/java/org/duckdb/test/TestDuckDBJDBC.java b/tools/jdbc/src/test/java/org/duckdb/test/TestDuckDBJDBC.java index a362f8bae0cb..75231d520808 100644 --- a/tools/jdbc/src/test/java/org/duckdb/test/TestDuckDBJDBC.java +++ b/tools/jdbc/src/test/java/org/duckdb/test/TestDuckDBJDBC.java @@ -82,6 +82,7 @@ import static java.util.Collections.emptyList; import static java.util.Collections.singletonList; import static java.util.stream.Collectors.toMap; +import static org.duckdb.DuckDBDriver.JDBC_STREAM_RESULTS; public class TestDuckDBJDBC { @@ -3729,6 +3730,20 @@ public static void test_race() throws Exception { } } + public static void test_stream_multiple_open_results() throws Exception { + Properties props = new Properties(); + props.setProperty(JDBC_STREAM_RESULTS, String.valueOf(true)); + + String QUERY = "SELECT * FROM range(100000)"; + try (Connection conn = DriverManager.getConnection("jdbc:duckdb:", props); + Statement stmt1 = conn.createStatement(); Statement stmt2 = conn.createStatement()) { + + try (ResultSet rs1 = stmt1.executeQuery(QUERY); ResultSet ignored = stmt2.executeQuery(QUERY)) { + assertThrows(rs1::next, SQLException.class); + } + } + } + public static void test_offset_limit() throws Exception { try (Connection connection = DriverManager.getConnection("jdbc:duckdb:"); Statement s = connection.createStatement()) { @@ -3751,6 +3766,20 @@ public static void test_offset_limit() throws Exception { } } + public static void test_result_streaming() throws Exception { + Properties props = new Properties(); + props.setProperty(JDBC_STREAM_RESULTS, String.valueOf(true)); + + try (Connection conn = DriverManager.getConnection("jdbc:duckdb:", props); + PreparedStatement stmt1 = conn.prepareStatement("SELECT * FROM range(100000)"); + ResultSet rs = stmt1.executeQuery()) { + while (rs.next()) { + rs.getInt(1); + } + assertFalse(rs.next()); // is exhausted + } + } + public static void main(String[] args) throws Exception { // Woo I can do reflection too, take this, JUnit! Method[] methods = TestDuckDBJDBC.class.getMethods(); diff --git a/tools/pythonpkg/duckdb/query_graph/__main__.py b/tools/pythonpkg/duckdb/query_graph/__main__.py new file mode 100644 index 000000000000..974ac89e20de --- /dev/null +++ b/tools/pythonpkg/duckdb/query_graph/__main__.py @@ -0,0 +1,321 @@ +import json +import os +import sys +import webbrowser +from functools import reduce +import argparse + +qgraph_css = """ +.styled-table { + border-collapse: collapse; + margin: 25px 0; + font-size: 0.9em; + font-family: sans-serif; + min-width: 400px; + box-shadow: 0 0 20px rgba(0, 0, 0, 0.15); +} +.styled-table thead tr { + background-color: #009879; + color: #ffffff; + text-align: left; +} +.styled-table th, +.styled-table td { + padding: 12px 15px; +} +.styled-table tbody tr { + border-bottom: 1px solid #dddddd; +} + +.styled-table tbody tr:nth-of-type(even) { + background-color: #f3f3f3; +} + +.styled-table tbody tr:last-of-type { + border-bottom: 2px solid #009879; +} + +.node-body { + font-size:15px; +} +.tf-nc { + position: relative; + width: 250px; + text-align: center; + background-color: #fff100; +} +""" + + +class NodeTiming: + + def __init__(self, phase: str, time: float) -> object: + self.phase = phase + self.time = time + # percentage is determined later. + self.percentage = 0 + + def calculate_percentage(self, total_time: float) -> None: + self.percentage = self.time / total_time + + def combine_timing(l: object, r: object) -> object: + # TODO: can only add timings for same-phase nodes + total_time = l.time + r.time + return NodeTiming(l.phase, total_time) + + +class AllTimings: + + def __init__(self): + self.phase_to_timings = {} + + def add_node_timing(self, node_timing: NodeTiming): + if node_timing.phase in self.phase_to_timings: + self.phase_to_timings[node_timing.phase].append(node_timing) + return + self.phase_to_timings[node_timing.phase] = [node_timing] + + def get_phase_timings(self, phase: str): + return self.phase_to_timings[phase] + + def get_summary_phase_timings(self, phase: str): + return reduce(NodeTiming.combine_timing, self.phase_to_timings[phase]) + + def get_phases(self): + phases = list(self.phase_to_timings.keys()) + phases.sort(key=lambda x: (self.get_summary_phase_timings(x)).time) + phases.reverse() + return phases + + def get_sum_of_all_timings(self): + total_timing_sum = 0 + for phase in self.phase_to_timings.keys(): + total_timing_sum += self.get_summary_phase_timings(phase).time + return total_timing_sum + + +def open_utf8(fpath: str, flags: str) -> object: + return open(fpath, flags, encoding="utf8") + + +def get_child_timings(top_node: object, query_timings: object) -> str: + node_timing = NodeTiming(top_node['name'], float(top_node['timing'])) + query_timings.add_node_timing(node_timing) + for child in top_node['children']: + get_child_timings(child, query_timings) + + +color_map = { + "HASH_JOIN": "#ffffba", + "PROJECTION": "#ffb3ba", + "SEQ_SCAN": "#baffc9", + "UNGROUPED_AGGREGATE": "#ffdfba", + "FILTER": "#bae1ff", + "ORDER_BY": "#facd60", + "PERFECT_HASH_GROUP_BY": "#ffffba", + "HASH_GROUP_BY": "#ffffba", + "NESTED_LOOP_JOIN": "#ffffba", + "STREAMING_LIMIT": "#facd60", + "COLUMN_DATA_SCAN": "#1ac0c6", + "TOP_N": "#ffdfba" +} + + +def get_node_body(name: str, result: str, cardinality: float, extra_info: str, timing: object) -> str: + node_style = "" + stripped_name = name.strip() + if stripped_name in color_map: + node_style = f"background-color: {color_map[stripped_name]};" + + body = f"" + body += "
" + new_name = name.replace("_", " ") + body += f"

{new_name} ({result}s)

" + if extra_info: + extra_info = extra_info.replace("[INFOSEPARATOR]", "----") + extra_info = extra_info.replace("

", "
") + body += f"

{extra_info}

" + body += f"

cardinality = {cardinality}

" + # TODO: Expand on timing. Usually available from a detailed profiling + body += "
" + body += "
" + return body + + +def generate_tree_recursive(json_graph: object) -> str: + node_prefix_html = "
  • " + node_suffix_html = "
  • " + node_body = get_node_body(json_graph["name"], + json_graph["timing"], + json_graph["cardinality"], + json_graph["extra_info"].replace("\n", "
    "), + json_graph["timings"]) + + children_html = "" + if len(json_graph['children']) >= 1: + children_html += "
      " + for child in json_graph["children"]: + children_html += generate_tree_recursive(child) + children_html += "
    " + return node_prefix_html + node_body + children_html + node_suffix_html + + +# For generating the table in the top left. +def generate_timing_html(graph_json: object, query_timings: object) -> object: + json_graph = json.loads(graph_json) + gather_timing_information(json_graph, query_timings) + total_time = float(json_graph['timing']) + table_head = """ + + + + + + + + """ + + table_body = "" + table_end = "
    PhaseTimePercentage
    " + + execution_time = query_timings.get_sum_of_all_timings() + + all_phases = query_timings.get_phases() + query_timings.add_node_timing(NodeTiming("TOTAL TIME", total_time)) + query_timings.add_node_timing(NodeTiming("Execution Time", execution_time)) + all_phases = ["TOTAL TIME", "Execution Time"] + all_phases + for phase in all_phases: + summarized_phase = query_timings.get_summary_phase_timings(phase) + summarized_phase.calculate_percentage(total_time) + phase_column = f"{phase}" if phase == "TOTAL TIME" or phase == "Execution Time" else phase + table_body += f""" + + {phase_column} + {summarized_phase.time} + {str(summarized_phase.percentage * 100)[:6]}% + +""" + table_body += table_end + return table_head + table_body + + +def generate_tree_html(graph_json: object) -> str: + json_graph = json.loads(graph_json) + tree_prefix = "
    \n
      " + tree_suffix = "
    " + # first level of json is general overview + # FIXME: make sure json output first level always has only 1 level + tree_body = generate_tree_recursive(json_graph['children'][0]) + return tree_prefix + tree_body + tree_suffix + + +def generate_ipython(json_input: str) -> str: + from IPython.core.display import HTML + + html_output = generate_html(json_input, False) + + return HTML(("\n" + " ${CSS}\n" + " ${LIBRARIES}\n" + "
    \n" + " ${CHART_SCRIPT}\n" + " ").replace("${CSS}", html_output['css']).replace('${CHART_SCRIPT}', + html_output['chart_script']).replace( + '${LIBRARIES}', html_output['libraries'])) + + +def generate_style_html(graph_json: str, include_meta_info: bool) -> None: + treeflex_css = "\n" + css = "\n" + return { + 'treeflex_css': treeflex_css, + 'duckdb_css': css, + 'libraries': '', + 'chart_script': '' + } + + +def gather_timing_information(json: str, query_timings: object) -> None: + # add up all of the times + # measure each time as a percentage of the total time. + # then you can return a list of [phase, time, percentage] + get_child_timings(json['children'][0], query_timings) + + +def translate_json_to_html(input_file: str, output_file: str) -> None: + query_timings = AllTimings() + with open_utf8(input_file, 'r') as f: + text = f.read() + + html_output = generate_style_html(text, True) + timing_table = generate_timing_html(text, query_timings) + tree_output = generate_tree_html(text) + + # finally create and write the html + with open_utf8(output_file, "w+") as f: + html = """ + + + + + Query Profile Graph for Query + ${TREEFLEX_CSS} + + + +
    +
    + ${TIMING_TABLE} +
    + ${TREE} + + +""" + html = html.replace("${TREEFLEX_CSS}", html_output['treeflex_css']) + html = html.replace("${DUCKDB_CSS}", html_output['duckdb_css']) + html = html.replace("${TIMING_TABLE}", timing_table) + html = html.replace('${TREE}', tree_output) + f.write(html) + + +def main() -> None: + if sys.version_info[0] < 3: + print("Please use python3") + exit(1) + parser = argparse.ArgumentParser( + prog='Query Graph Generator', + description='Given a json profile output, generate a html file showing the query graph and timings of operators') + parser.add_argument('profile_input', help='profile input in json') + parser.add_argument('--out', required=False, default=False) + parser.add_argument('--open', required=False, action='store_true', default=True) + args = parser.parse_args() + + input = args.profile_input + output = args.out + if not args.out: + if ".json" in input: + output = input.replace(".json", ".html") + else: + print("please provide profile output in json") + exit(1) + else: + if ".html" in args.out: + output = args.out + else: + print("please provide valid .html file for output name") + exit(1) + + open_output = args.open + + translate_json_to_html(input, output) + + if open_output: + webbrowser.open('file://' + os.path.abspath(output), new=2) + + +if __name__ == '__main__': + main() diff --git a/tools/pythonpkg/setup.py b/tools/pythonpkg/setup.py index 30f1e1ccddb3..c836548928ae 100644 --- a/tools/pythonpkg/setup.py +++ b/tools/pythonpkg/setup.py @@ -319,6 +319,7 @@ def setup_data_files(data_files): packages = [ lib_name, 'duckdb.typing', + 'duckdb.query_graph', 'duckdb.functional', 'duckdb.value', 'duckdb-stubs', diff --git a/tools/pythonpkg/src/numpy/array_wrapper.cpp b/tools/pythonpkg/src/numpy/array_wrapper.cpp index 4aedf295e063..3b1acbea84e1 100644 --- a/tools/pythonpkg/src/numpy/array_wrapper.cpp +++ b/tools/pythonpkg/src/numpy/array_wrapper.cpp @@ -83,7 +83,7 @@ struct TimestampConvertNano { struct DateConvert { template static int64_t ConvertValue(date_t val) { - return Date::EpochNanoseconds(val); + return Date::EpochMicroseconds(val); } template diff --git a/tools/pythonpkg/src/numpy/raw_array_wrapper.cpp b/tools/pythonpkg/src/numpy/raw_array_wrapper.cpp index f3166acdcd2d..8838bd1c85af 100644 --- a/tools/pythonpkg/src/numpy/raw_array_wrapper.cpp +++ b/tools/pythonpkg/src/numpy/raw_array_wrapper.cpp @@ -114,8 +114,7 @@ string RawArrayWrapper::DuckDBToNumpyDtype(const LogicalType &type) { case LogicalTypeId::TIMESTAMP_SEC: return "datetime64[s]"; case LogicalTypeId::DATE: - // FIXME: should this not be 'date64[ns]' ? - return "datetime64[ns]"; + return "datetime64[us]"; case LogicalTypeId::INTERVAL: return "timedelta64[ns]"; case LogicalTypeId::TIME: diff --git a/tools/pythonpkg/src/pyconnection.cpp b/tools/pythonpkg/src/pyconnection.cpp index 8161276ccf5a..3c632abfdca0 100644 --- a/tools/pythonpkg/src/pyconnection.cpp +++ b/tools/pythonpkg/src/pyconnection.cpp @@ -47,6 +47,7 @@ #include "duckdb_python/pybind11/conversions/exception_handling_enum.hpp" #include "duckdb/parser/parsed_data/drop_info.hpp" #include "duckdb/catalog/catalog_entry/scalar_function_catalog_entry.hpp" +#include "duckdb/main/pending_query_result.hpp" #include @@ -101,10 +102,10 @@ py::object ArrowTableFromDataframe(const py::object &df) { try { return py::module_::import("pyarrow").attr("lib").attr("Table").attr("from_pandas")(df); } catch (py::error_already_set &e) { - // We don't fetch the original python exception because it can cause a segfault + // We don't fetch the original Python exception because it can cause a segfault // The cause of this is not known yet, for now we just side-step the issue. throw InvalidInputException( - "The dataframe could not be converted to a pyarrow.lib.Table, because a python exception occurred."); + "The dataframe could not be converted to a pyarrow.lib.Table, because a Python exception occurred."); } } @@ -120,7 +121,7 @@ static void InitializeConnectionMethods(py::class_ DuckDBPyConnection::ExecuteInternal(const string &query, // if there are multiple statements, we directly execute the statements besides the last one // we only return the result of the last statement to the user, unless one of the previous statements fails for (idx_t i = 0; i + 1 < statements.size(); i++) { - // TODO: this doesn't take in any prepared parameters? + if (statements[i]->n_param != 0) { + throw NotImplementedException( + "Prepared parameters are only supported for the last statement, please split your query up into " + "separate 'execute' calls if you want to use prepared parameters"); + } auto pending_query = connection->PendingQuery(std::move(statements[i]), false); auto res = CompletePendingQuery(*pending_query); diff --git a/tools/pythonpkg/src/python_udf.cpp b/tools/pythonpkg/src/python_udf.cpp index 66ae25e01da2..54494aeb39af 100644 --- a/tools/pythonpkg/src/python_udf.cpp +++ b/tools/pythonpkg/src/python_udf.cpp @@ -279,9 +279,20 @@ struct PythonUDFData { } } - void AnalyzeSignature(const py::object &udf) { + py::object GetSignature(const py::object &udf) { + const int32_t PYTHON_3_10_HEX = 0x030a00f0; + auto python_version = PY_VERSION_HEX; + auto signature_func = py::module_::import("inspect").attr("signature"); - auto signature = signature_func(udf); + if (python_version >= PYTHON_3_10_HEX) { + return signature_func(udf, py::arg("eval_str") = true); + } else { + return signature_func(udf); + } + } + + void AnalyzeSignature(const py::object &udf) { + auto signature = GetSignature(udf); auto sig_params = signature.attr("parameters"); auto return_annotation = signature.attr("return_annotation"); if (!py::none().is(return_annotation)) { diff --git a/tools/pythonpkg/tests/fast/api/test_dbapi09.py b/tools/pythonpkg/tests/fast/api/test_dbapi09.py index 02899a3868cc..dde8ebff0b96 100644 --- a/tools/pythonpkg/tests/fast/api/test_dbapi09.py +++ b/tools/pythonpkg/tests/fast/api/test_dbapi09.py @@ -18,5 +18,5 @@ def test_fetchnumpy_date(self, duckdb_cursor): def test_fetchdf_date(self, duckdb_cursor): res = duckdb_cursor.execute("SELECT DATE '2020-01-10' as test_date").fetchdf() - ser = pandas.Series(numpy.array(['2020-01-10'], dtype="datetime64[ns]"), name="test_date") + ser = pandas.Series(numpy.array(['2020-01-10'], dtype="datetime64[us]"), name="test_date") pandas.testing.assert_series_equal(res['test_date'], ser) diff --git a/tools/pythonpkg/tests/fast/api/test_duckdb_execute.py b/tools/pythonpkg/tests/fast/api/test_duckdb_execute.py new file mode 100644 index 000000000000..5e365c8fee7f --- /dev/null +++ b/tools/pythonpkg/tests/fast/api/test_duckdb_execute.py @@ -0,0 +1,38 @@ +import duckdb +import pytest + + +class TestDuckDBExecute(object): + def test_execute_basic(self, duckdb_cursor): + duckdb_cursor.execute('create table t as select 5') + res = duckdb_cursor.table('t').fetchall() + assert res == [(5,)] + + def test_execute_many_basic(self, duckdb_cursor): + duckdb_cursor.execute("create table t(x int);") + + # This works because prepared parameter is only present in the last statement + duckdb_cursor.execute( + """ + delete from t where x=5; + insert into t(x) values($1); + """, + (99,), + ) + res = duckdb_cursor.table('t').fetchall() + assert res == [(99,)] + + def test_execute_many_error(self, duckdb_cursor): + duckdb_cursor.execute("create table t(x int);") + + # Prepared parameter used in a statement that is not the last + with pytest.raises( + duckdb.NotImplementedException, match='Prepared parameters are only supported for the last statement' + ): + duckdb_cursor.execute( + """ + delete from t where x=$1; + insert into t(x) values($1); + """, + (99,), + ) diff --git a/tools/pythonpkg/tests/fast/arrow/test_dictionary_arrow.py b/tools/pythonpkg/tests/fast/arrow/test_dictionary_arrow.py index f389836529fe..8b51daca339b 100644 --- a/tools/pythonpkg/tests/fast/arrow/test_dictionary_arrow.py +++ b/tools/pythonpkg/tests/fast/arrow/test_dictionary_arrow.py @@ -1,27 +1,23 @@ import duckdb -try: - import pyarrow as pa - import pyarrow.parquet - import numpy as np - from pandas import Timestamp - import datetime - import pandas as pd +import pytest - can_run = True -except: - can_run = False +pa = pytest.importorskip("pyarrow") +pq = pytest.importorskip("pyarrow.parquet") +np = pytest.importorskip("numpy") +pd = pytest.importorskip("pandas") +import datetime + +Timestamp = pd.Timestamp class TestArrowDictionary(object): def test_dictionary(self, duckdb_cursor): - if not can_run: - return indices = pa.array([0, 1, 0, 1, 2, 1, 0, 2]) dictionary = pa.array([10, 100, None]) dict_array = pa.DictionaryArray.from_arrays(indices, dictionary) arrow_table = pa.Table.from_arrays([dict_array], ['a']) - rel = duckdb.from_arrow(arrow_table) + rel = duckdb_cursor.from_arrow(arrow_table) assert rel.execute().fetchall() == [(10,), (100,), (10,), (100,), (None,), (100,), (10,), (None,)] @@ -31,25 +27,23 @@ def test_dictionary(self, duckdb_cursor): dictionary = pa.array([10, 100, None, 999999]) dict_array = pa.DictionaryArray.from_arrays(indices, dictionary) arrow_table = pa.Table.from_arrays([dict_array], ['a']) - rel = duckdb.from_arrow(arrow_table) + rel = duckdb_cursor.from_arrow(arrow_table) result = [(10,), (100,), (10,), (100,), (None,), (100,), (10,), (None,), (999999,)] * 10000 assert rel.execute().fetchall() == result # Table with dictionary and normal array arrow_table = pa.Table.from_arrays([dict_array, pa.array(indices_list)], ['a', 'b']) - rel = duckdb.from_arrow(arrow_table) + rel = duckdb_cursor.from_arrow(arrow_table) result = [(10, 0), (100, 1), (10, 0), (100, 1), (None, 2), (100, 1), (10, 0), (None, 2), (999999, 3)] * 10000 assert rel.execute().fetchall() == result def test_dictionary_null_index(self, duckdb_cursor): - if not can_run: - return indices = pa.array([None, 1, 0, 1, 2, 1, 0, 2]) dictionary = pa.array([10, 100, None]) dict_array = pa.DictionaryArray.from_arrays(indices, dictionary) arrow_table = pa.Table.from_arrays([dict_array], ['a']) - rel = duckdb.from_arrow(arrow_table) + rel = duckdb_cursor.from_arrow(arrow_table) assert rel.execute().fetchall() == [(None,), (100,), (10,), (100,), (None,), (100,), (10,), (None,)] @@ -57,7 +51,7 @@ def test_dictionary_null_index(self, duckdb_cursor): dictionary = pa.array([10, 100, 100]) dict_array = pa.DictionaryArray.from_arrays(indices, dictionary) arrow_table = pa.Table.from_arrays([dict_array], ['a']) - rel = duckdb.from_arrow(arrow_table) + rel = duckdb_cursor.from_arrow(arrow_table) print(rel.execute().fetchall()) assert rel.execute().fetchall() == [(None,), (100,), (None,), (100,), (100,), (100,), (10,)] @@ -67,94 +61,147 @@ def test_dictionary_null_index(self, duckdb_cursor): dictionary = pa.array([10, 100, 100]) dict_array = pa.DictionaryArray.from_arrays(indices, dictionary) arrow_table = pa.Table.from_arrays([dict_array], ['a']) - rel = duckdb.from_arrow(arrow_table) + rel = duckdb_cursor.from_arrow(arrow_table) result = [(None,), (100,), (None,), (100,), (100,), (100,), (10,)] * 1000 assert rel.execute().fetchall() == result # Table with dictionary and normal array arrow_table = pa.Table.from_arrays([dict_array, indices], ['a', 'b']) - rel = duckdb.from_arrow(arrow_table) + rel = duckdb_cursor.from_arrow(arrow_table) result = [(None, None), (100, 1), (None, None), (100, 1), (100, 2), (100, 1), (10, 0)] * 1000 assert rel.execute().fetchall() == result - def test_dictionary_batches(self, duckdb_cursor): - if not can_run: - return + @pytest.mark.parametrize( + 'element', + [ + # list + """ + ['hello'::ENUM('hello', 'bye')] + """, + # struct + """ + {'a': 'hello'::ENUM('hello', 'bye')} + """, + # union + """ + {'a': 'hello'::ENUM('hello', 'bye')}::UNION(a integer, b bool, c struct(a enum('hello', 'bye'))) + """, + # map (key) + """ + map {'hello'::ENUM('hello', 'bye') : 'test'} + """, + # map (val) + """ + map {'test': 'hello'::ENUM('hello', 'bye')} + """, + # list of struct(enum) + """ + [{'a': 'hello'::ENUM('hello', 'bye')}] + """, + # list of union(enum) + """ + [{'a': 'hello'::ENUM('hello', 'bye')}::UNION(a integer, b bool, c struct(a enum('hello', 'bye')))] + """, + # list of list + """ + [['hello'::ENUM('hello', 'bye')], [], NULL, ['hello'::ENUM('hello', 'bye'), 'bye'::ENUM('hello', 'bye')]] + """, + ], + ) + @pytest.mark.parametrize( + 'count', + [ + 1, + 10, + 1024, + # 2048, + # 2047, + # 2049, + # 4000, + # 4096, + 5000, + ], + ) + @pytest.mark.parametrize('query', ["select {} as a from range({})", "select [{} for x in range({})] as a"]) + def test_dictionary_roundtrip(self, query, element, duckdb_cursor, count): + query = query.format(element, count) + original_rel = duckdb_cursor.sql(query) + expected = original_rel.fetchall() + arrow_res = original_rel.arrow() + + roundtrip_rel = duckdb_cursor.sql('select * from arrow_res') + actual = roundtrip_rel.fetchall() + assert expected == actual + assert original_rel.columns == roundtrip_rel.columns + # Note: we can't check the types, because originally these are ENUM + # but because the dictionary of the ENUM can not be known before execution we output VARCHAR instead. + def test_dictionary_batches(self, duckdb_cursor): indices_list = [None, 1, None, 1, 2, 1, 0] indices = pa.array(indices_list * 10000) dictionary = pa.array([10, 100, 100]) dict_array = pa.DictionaryArray.from_arrays(indices, dictionary) arrow_table = pa.Table.from_arrays([dict_array], ['a']) - batch_arrow_table = pyarrow.Table.from_batches(arrow_table.to_batches(10)) - rel = duckdb.from_arrow(batch_arrow_table) + batch_arrow_table = pa.Table.from_batches(arrow_table.to_batches(10)) + rel = duckdb_cursor.from_arrow(batch_arrow_table) result = [(None,), (100,), (None,), (100,), (100,), (100,), (10,)] * 10000 assert rel.execute().fetchall() == result # Table with dictionary and normal array arrow_table = pa.Table.from_arrays([dict_array, indices], ['a', 'b']) - batch_arrow_table = pyarrow.Table.from_batches(arrow_table.to_batches(10)) - rel = duckdb.from_arrow(batch_arrow_table) + batch_arrow_table = pa.Table.from_batches(arrow_table.to_batches(10)) + rel = duckdb_cursor.from_arrow(batch_arrow_table) result = [(None, None), (100, 1), (None, None), (100, 1), (100, 2), (100, 1), (10, 0)] * 10000 assert rel.execute().fetchall() == result def test_dictionary_batches_parallel(self, duckdb_cursor): - if not can_run: - return - - duckdb_conn = duckdb.connect() - duckdb_conn.execute("PRAGMA threads=4") - duckdb_conn.execute("PRAGMA verify_parallelism") + duckdb_cursor.execute("PRAGMA threads=4") + duckdb_cursor.execute("PRAGMA verify_parallelism") indices_list = [None, 1, None, 1, 2, 1, 0] indices = pa.array(indices_list * 10000) dictionary = pa.array([10, 100, 100]) dict_array = pa.DictionaryArray.from_arrays(indices, dictionary) arrow_table = pa.Table.from_arrays([dict_array], ['a']) - batch_arrow_table = pyarrow.Table.from_batches(arrow_table.to_batches(10)) - rel = duckdb_conn.from_arrow(batch_arrow_table) + batch_arrow_table = pa.Table.from_batches(arrow_table.to_batches(10)) + rel = duckdb_cursor.from_arrow(batch_arrow_table) result = [(None,), (100,), (None,), (100,), (100,), (100,), (10,)] * 10000 assert rel.execute().fetchall() == result # Table with dictionary and normal array arrow_table = pa.Table.from_arrays([dict_array, indices], ['a', 'b']) - batch_arrow_table = pyarrow.Table.from_batches(arrow_table.to_batches(10)) - rel = duckdb_conn.from_arrow(batch_arrow_table) + batch_arrow_table = pa.Table.from_batches(arrow_table.to_batches(10)) + rel = duckdb_cursor.from_arrow(batch_arrow_table) result = [(None, None), (100, 1), (None, None), (100, 1), (100, 2), (100, 1), (10, 0)] * 10000 assert rel.execute().fetchall() == result def test_dictionary_index_types(self, duckdb_cursor): - if not can_run: - return indices_list = [None, 1, None, 1, 2, 1, 0] - dictionary = pa.array([10, 100, 100], type=pyarrow.uint8()) + dictionary = pa.array([10, 100, 100], type=pa.uint8()) index_types = [] - index_types.append(pa.array(indices_list * 10000, type=pyarrow.uint8())) - index_types.append(pa.array(indices_list * 10000, type=pyarrow.uint16())) - index_types.append(pa.array(indices_list * 10000, type=pyarrow.uint32())) - index_types.append(pa.array(indices_list * 10000, type=pyarrow.uint64())) - index_types.append(pa.array(indices_list * 10000, type=pyarrow.int8())) - index_types.append(pa.array(indices_list * 10000, type=pyarrow.int16())) - index_types.append(pa.array(indices_list * 10000, type=pyarrow.int32())) - index_types.append(pa.array(indices_list * 10000, type=pyarrow.int64())) + index_types.append(pa.array(indices_list * 10000, type=pa.uint8())) + index_types.append(pa.array(indices_list * 10000, type=pa.uint16())) + index_types.append(pa.array(indices_list * 10000, type=pa.uint32())) + index_types.append(pa.array(indices_list * 10000, type=pa.uint64())) + index_types.append(pa.array(indices_list * 10000, type=pa.int8())) + index_types.append(pa.array(indices_list * 10000, type=pa.int16())) + index_types.append(pa.array(indices_list * 10000, type=pa.int32())) + index_types.append(pa.array(indices_list * 10000, type=pa.int64())) for index_type in index_types: dict_array = pa.DictionaryArray.from_arrays(index_type, dictionary) arrow_table = pa.Table.from_arrays([dict_array], ['a']) - rel = duckdb.from_arrow(arrow_table) + rel = duckdb_cursor.from_arrow(arrow_table) result = [(None,), (100,), (None,), (100,), (100,), (100,), (10,)] * 10000 assert rel.execute().fetchall() == result def test_dictionary_strings(self, duckdb_cursor): - if not can_run: - return - indices_list = [None, 0, 1, 2, 3, 4, None] indices = pa.array(indices_list * 1000) dictionary = pa.array(['Matt Daaaaaaaaamon', 'Alec Baldwin', 'Sean Penn', 'Tim Robbins', 'Samuel L. Jackson']) dict_array = pa.DictionaryArray.from_arrays(indices, dictionary) arrow_table = pa.Table.from_arrays([dict_array], ['a']) - rel = duckdb.from_arrow(arrow_table) + rel = duckdb_cursor.from_arrow(arrow_table) result = [ (None,), ('Matt Daaaaaaaaamon',), @@ -167,8 +214,6 @@ def test_dictionary_strings(self, duckdb_cursor): assert rel.execute().fetchall() == result def test_dictionary_timestamps(self, duckdb_cursor): - if not can_run: - return indices_list = [None, 0, 1, 2, None] indices = pa.array(indices_list * 1000) dictionary = pa.array( @@ -181,7 +226,7 @@ def test_dictionary_timestamps(self, duckdb_cursor): ) dict_array = pa.DictionaryArray.from_arrays(indices, dictionary) arrow_table = pa.Table.from_arrays([dict_array], ['a']) - rel = duckdb.from_arrow(arrow_table) + rel = duckdb_cursor.from_arrow(arrow_table) print(rel.execute().fetchall()) expected = [ (None,), diff --git a/tools/pythonpkg/tests/fast/pandas/test_datetime_time.py b/tools/pythonpkg/tests/fast/pandas/test_datetime_time.py index 12045a06c065..ccbe7003c7a8 100644 --- a/tools/pythonpkg/tests/fast/pandas/test_datetime_time.py +++ b/tools/pythonpkg/tests/fast/pandas/test_datetime_time.py @@ -4,6 +4,8 @@ from conftest import NumpyPandas, ArrowPandas from datetime import datetime, timezone, time, timedelta +_ = pytest.importorskip("pandas", minversion="2.0.0") + class TestDateTimeTime(object): @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()]) @@ -23,14 +25,17 @@ def test_time_low(self, duckdb_cursor, pandas): pandas.testing.assert_frame_equal(df_out, duckdb_time) @pytest.mark.parametrize('pandas', [NumpyPandas(), ArrowPandas()]) - def test_pandas_datetime_overflow(self, pandas): + @pytest.mark.parametrize('input', ['2263-02-28', '9999-01-01']) + def test_pandas_datetime_big(self, pandas, input): duckdb_con = duckdb.connect() duckdb_con.execute("create table test (date DATE)") - duckdb_con.execute("INSERT INTO TEST VALUES ('2263-02-28')") + duckdb_con.execute(f"INSERT INTO TEST VALUES ('{input}')") - with pytest.raises(duckdb.ConversionException): - res = duckdb_con.execute("select * from test").df() + res = duckdb_con.execute("select * from test").df() + date_value = np.array([f'{input}'], dtype='datetime64[us]') + df = pandas.DataFrame({'date': date_value}) + pandas.testing.assert_frame_equal(res, df) def test_timezone_datetime(self): con = duckdb.connect() diff --git a/tools/pythonpkg/tests/fast/test_string_annotation.py b/tools/pythonpkg/tests/fast/test_string_annotation.py new file mode 100644 index 000000000000..c5500c663ee1 --- /dev/null +++ b/tools/pythonpkg/tests/fast/test_string_annotation.py @@ -0,0 +1,53 @@ +import duckdb +import pytest +import sys +from typing import Union + + +def make_annotated_function(type: str): + def test_base(): + return None + + import types + + test_function = types.FunctionType( + test_base.__code__, test_base.__globals__, test_base.__name__, test_base.__defaults__, test_base.__closure__ + ) + # Add the 'type' string as return_annotation + test_function.__annotations__ = {'return': type} + return test_function + + +def python_version_lower_than_3_10(): + import sys + + if sys.version_info[0] < 3: + return True + if sys.version_info[1] < 10: + return True + return False + + +class TestStringAnnotation(object): + @pytest.mark.skipif( + python_version_lower_than_3_10(), reason="inspect.signature(eval_str=True) only supported since 3.10 and higher" + ) + @pytest.mark.parametrize( + ['input', 'expected'], + [ + ('str', 'VARCHAR'), + ('list[str]', 'VARCHAR[]'), + ('dict[str, str]', 'MAP(VARCHAR, VARCHAR)'), + ('dict[Union[str, bool], str]', 'MAP(UNION(u1 VARCHAR, u2 BOOLEAN), VARCHAR)'), + ], + ) + def test_string_annotations(self, duckdb_cursor, input, expected): + from inspect import signature + + func = make_annotated_function(input) + sig = signature(func) + assert sig.return_annotation.__class__ == str + + duckdb_cursor.create_function("foo", func) + rel = duckdb_cursor.sql("select foo()") + assert rel.types == [expected] diff --git a/tools/pythonpkg/tests/fast/types/test_numpy.py b/tools/pythonpkg/tests/fast/types/test_numpy.py index fe23d1fcfa43..6c8c13243755 100644 --- a/tools/pythonpkg/tests/fast/types/test_numpy.py +++ b/tools/pythonpkg/tests/fast/types/test_numpy.py @@ -17,11 +17,12 @@ def test_numpy_datetime64(self, duckdb_cursor): "select * from tbl" ).fetchall() - def test_numpy_datetime_overflow(self): + def test_numpy_datetime_big(self): duckdb_con = duckdb.connect() duckdb_con.execute("create table test (date DATE)") duckdb_con.execute("INSERT INTO TEST VALUES ('2263-02-28')") - with pytest.raises(duckdb.ConversionException): - res1 = duckdb_con.execute("select * from test").fetchnumpy() + res1 = duckdb_con.execute("select * from test").fetchnumpy() + date_value = {'date': np.array(['2263-02-28'], dtype='datetime64[us]')} + assert res1 == date_value