Skip to content

Commit

Permalink
Merge branch 'main' into arrow_support_large_offsets
Browse files Browse the repository at this point in the history
  • Loading branch information
Tishj committed Oct 25, 2023
2 parents e6bb791 + 10f9bd9 commit a594ecf
Show file tree
Hide file tree
Showing 81 changed files with 1,722 additions and 239 deletions.
1 change: 1 addition & 0 deletions .github/config/out_of_tree_extensions.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ duckdb_extension_load(spatial
GIT_TAG 36e5a126976ac3b66716893360ef7e6295707082
INCLUDE_DIR spatial/include
TEST_DIR test/sql
APPLY_PATCHES
)

################# SQLITE_SCANNER
Expand Down
15 changes: 15 additions & 0 deletions .github/patches/extensions/spatial/const_copy_param.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
diff --git a/spatial/src/spatial/gdal/functions/st_write.cpp b/spatial/src/spatial/gdal/functions/st_write.cpp
index 36a71da..15ebcf4 100644
--- a/spatial/src/spatial/gdal/functions/st_write.cpp
+++ b/spatial/src/spatial/gdal/functions/st_write.cpp
@@ -55,8 +55,8 @@ struct GlobalState : public GlobalFunctionData {
//===--------------------------------------------------------------------===//
// Bind
//===--------------------------------------------------------------------===//
-static unique_ptr<FunctionData> Bind(ClientContext &context, CopyInfo &info, vector<string> &names,
- vector<LogicalType> &sql_types) {
+static unique_ptr<FunctionData> Bind(ClientContext &context, const CopyInfo &info, const vector<string> &names,
+ const vector<LogicalType> &sql_types) {

GdalFileHandler::SetLocalClientContext(context);

2 changes: 1 addition & 1 deletion .github/workflows/InternalIssuesCreateMirror.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ jobs:
- name: Get mirror issue number
run: |
gh issue list --repo duckdblabs/duckdb-internal --json title,number --jq ".[] | select(.title | startswith(\"$TITLE_PREFIX\")).number" > mirror_issue_number.txt
gh issue list --repo duckdblabs/duckdb-internal --search "${TITLE_PREFIX}" --json title,number --jq ".[] | select(.title | startswith(\"$TITLE_PREFIX\")).number" > mirror_issue_number.txt
echo "MIRROR_ISSUE_NUMBER=$(cat mirror_issue_number.txt)" >> $GITHUB_ENV
- name: Print whether mirror issue exists
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/InternalIssuesUpdateMirror.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
steps:
- name: Get mirror issue number
run: |
gh issue list --repo duckdblabs/duckdb-internal --json title,number --jq ".[] | select(.title | startswith(\"$TITLE_PREFIX\")).number" > mirror_issue_number.txt
gh issue list --repo duckdblabs/duckdb-internal --search "${TITLE_PREFIX}" --json title,number --jq ".[] | select(.title | startswith(\"$TITLE_PREFIX\")).number" > mirror_issue_number.txt
echo "MIRROR_ISSUE_NUMBER=$(cat mirror_issue_number.txt)" >> $GITHUB_ENV
- name: Print whether mirror issue exists
Expand Down
37 changes: 37 additions & 0 deletions .github/workflows/NeedsDocumentation.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
name: Create Documentation issue for the Needs Documentation label
on:
issues:
types:
- labeled
pull_request:
types:
- labeled

env:
GH_TOKEN: ${{ secrets.DUCKDBLABS_BOT_TOKEN }}
TITLE_PREFIX: "duckdb/#${{ github.event.issue.number }}]"
PUBLIC_ISSUE_TITLE: ${{ github.event.issue.title }}

jobs:
create_documentation_issue:
if: github.event.label.name == 'Needs Documentation'
runs-on: ubuntu-latest
steps:
- name: Get mirror issue number
run: |
gh issue list --repo duckdb/duckdb-web --json title,number --jq ".[] | select(.title | startswith(\"${TITLE_PREFIX}\")).number" > mirror_issue_number.txt
echo "MIRROR_ISSUE_NUMBER=$(cat mirror_issue_number.txt)" >> ${GITHUB_ENV}
- name: Print whether mirror issue exists
run: |
if [ "${MIRROR_ISSUE_NUMBER}" == "" ]; then
echo "Mirror issue with title prefix '${TITLE_PREFIX}' does not exist yet"
else
echo "Mirror issue with title prefix '${TITLE_PREFIX}' exists with number ${MIRROR_ISSUE_NUMBER}"
fi
- name: Create mirror issue if it does not yet exist
run: |
if [ "${MIRROR_ISSUE_NUMBER}" == "" ]; then
gh issue create --repo duckdb/duckdb-web --title "${TITLE_PREFIX} - ${PUBLIC_ISSUE_TITLE} needs documentation" --body "See https://github.com/duckdb/duckdb/issues/${{ github.event.issue.number }}"
fi
9 changes: 9 additions & 0 deletions .github/workflows/R_CMD_CHECK.yml
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,15 @@ jobs:
extra-packages: any::rcmdcheck
needs: check

- name: Apply duckdb-r patches
shell: bash
working-directory: ${{ env.DUCKDB_SRC }}
run: |
shopt -s nullglob
for filename in .github/patches/duckdb-r/*.patch; do
git apply $filename
done
# needed so we can run git commit in vendor.sh
- name: setup github and create parallel builds
shell: bash
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/Regression.yml
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ jobs:

- uses: actions/setup-python@v4
with:
python-version: '3.7'
python-version: '3.11'

- name: Install
shell: bash
Expand Down
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -661,7 +661,7 @@ function(build_loadable_extension_directory NAME OUTPUT_DIRECTORY PARAMETERS)
if(WASM_LOADABLE_EXTENSIONS)
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -sSIDE_MODULE=1 -DWASM_LOADABLE_EXTENSIONS")
elseif (EXTENSION_STATIC_BUILD)
if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU" OR "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU" OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
if (APPLE)
set_target_properties(${TARGET_NAME} PROPERTIES CXX_VISIBILITY_PRESET hidden)
# Note that on MacOS we need to use the -exported_symbol whitelist feature due to a lack of -exclude-libs flag in mac's ld variant
Expand Down
4 changes: 2 additions & 2 deletions extension/parquet/parquet_extension.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -740,8 +740,8 @@ static void GetFieldIDs(const Value &field_ids_value, ChildFieldIDs &field_ids,
}
}

unique_ptr<FunctionData> ParquetWriteBind(ClientContext &context, CopyInfo &info, vector<string> &names,
vector<LogicalType> &sql_types) {
unique_ptr<FunctionData> ParquetWriteBind(ClientContext &context, const CopyInfo &info, const vector<string> &names,
const vector<LogicalType> &sql_types) {
D_ASSERT(names.size() == sql_types.size());
bool row_group_size_bytes_set = false;
auto bind_data = make_uniq<ParquetWriteBindData>();
Expand Down
18 changes: 6 additions & 12 deletions src/catalog/catalog_entry/view_catalog_entry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ unique_ptr<CreateInfo> ViewCatalogEntry::GetInfo() const {
result->query = unique_ptr_cast<SQLStatement, SelectStatement>(query->Copy());
result->aliases = aliases;
result->types = types;
result->temporary = temporary;
return std::move(result);
}

Expand All @@ -58,23 +59,16 @@ string ViewCatalogEntry::ToSQL() const {
//! Return empty sql with view name so pragma view_tables don't complain
return sql;
}
return sql + "\n;";
auto info = GetInfo();
auto result = info->ToString();
return result + ";\n";
}

unique_ptr<CatalogEntry> ViewCatalogEntry::Copy(ClientContext &context) const {
D_ASSERT(!internal);
CreateViewInfo create_info(schema, name);
create_info.query = unique_ptr_cast<SQLStatement, SelectStatement>(query->Copy());
for (idx_t i = 0; i < aliases.size(); i++) {
create_info.aliases.push_back(aliases[i]);
}
for (idx_t i = 0; i < types.size(); i++) {
create_info.types.push_back(types[i]);
}
create_info.temporary = temporary;
create_info.sql = sql;
auto create_info = GetInfo();

return make_uniq<ViewCatalogEntry>(catalog, schema, create_info);
return make_uniq<ViewCatalogEntry>(catalog, schema, create_info->Cast<CreateViewInfo>());
}

} // namespace duckdb
2 changes: 1 addition & 1 deletion src/common/arrow/appender/union_data.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ void ArrowUnionData::Append(ArrowAppendData &append_data, Vector &input, idx_t f

duckdb::vector<Vector> child_vectors;
for (const auto &child : UnionType::CopyMemberTypes(input.GetType())) {
child_vectors.emplace_back(child.second);
child_vectors.emplace_back(child.second, size);
}

for (idx_t input_idx = from; input_idx < to; input_idx++) {
Expand Down
5 changes: 5 additions & 0 deletions src/common/enum_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6010,6 +6010,8 @@ const char* EnumUtil::ToChars<UnionInvalidReason>(UnionInvalidReason value) {
return "VALIDITY_OVERLAP";
case UnionInvalidReason::TAG_MISMATCH:
return "TAG_MISMATCH";
case UnionInvalidReason::NULL_TAG:
return "NULL_TAG";
default:
throw NotImplementedException(StringUtil::Format("Enum value: '%d' not implemented", value));
}
Expand All @@ -6032,6 +6034,9 @@ UnionInvalidReason EnumUtil::FromString<UnionInvalidReason>(const char *value) {
if (StringUtil::Equals(value, "TAG_MISMATCH")) {
return UnionInvalidReason::TAG_MISMATCH;
}
if (StringUtil::Equals(value, "NULL_TAG")) {
return UnionInvalidReason::NULL_TAG;
}
throw NotImplementedException(StringUtil::Format("Enum value: '%s' not implemented", value));
}

Expand Down
4 changes: 4 additions & 0 deletions src/common/types/list_segment.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -462,6 +462,10 @@ void SegmentPrimitiveFunction(ListSegmentFunctions &functions) {

void GetSegmentDataFunctions(ListSegmentFunctions &functions, const LogicalType &type) {

if (type.id() == LogicalTypeId::UNKNOWN) {
throw ParameterNotResolvedException();
}

auto physical_type = type.InternalType();
switch (physical_type) {
case PhysicalType::BIT:
Expand Down
100 changes: 66 additions & 34 deletions src/common/types/vector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1131,9 +1131,12 @@ void Vector::VerifyMap(Vector &vector_p, const SelectionVector &sel_p, idx_t cou

void Vector::VerifyUnion(Vector &vector_p, const SelectionVector &sel_p, idx_t count) {
#ifdef DEBUG

D_ASSERT(vector_p.GetType().id() == LogicalTypeId::UNION);
auto valid_check = UnionVector::CheckUnionValidity(vector_p, count, sel_p);
D_ASSERT(valid_check == UnionInvalidReason::VALID);
if (valid_check != UnionInvalidReason::VALID) {
throw InternalException("Union not valid, reason: %s", EnumUtil::ToString(valid_check));
}
#endif // DEBUG
}

Expand Down Expand Up @@ -1250,7 +1253,8 @@ void Vector::Verify(Vector &vector_p, const SelectionVector &sel_p, idx_t count)
}

if (vector->GetType().id() == LogicalTypeId::UNION) {
VerifyUnion(*vector, *sel, count);
// Pass in raw vector
VerifyUnion(vector_p, sel_p, count);
}
}

Expand Down Expand Up @@ -1911,7 +1915,13 @@ void UnionVector::SetToMember(Vector &union_vector, union_tag_t tag, Vector &mem
// if the member vector is constant, we can set the union to constant as well
union_vector.SetVectorType(VectorType::CONSTANT_VECTOR);
ConstantVector::GetData<union_tag_t>(tag_vector)[0] = tag;
ConstantVector::SetNull(union_vector, ConstantVector::IsNull(member_vector));
if (keep_tags_for_null) {
ConstantVector::SetNull(union_vector, false);
ConstantVector::SetNull(tag_vector, false);
} else {
ConstantVector::SetNull(union_vector, ConstantVector::IsNull(member_vector));
ConstantVector::SetNull(tag_vector, ConstantVector::IsNull(member_vector));
}

} else {
// otherwise flatten and set to flatvector
Expand Down Expand Up @@ -1962,53 +1972,75 @@ union_tag_t UnionVector::GetTag(const Vector &vector, idx_t index) {
return FlatVector::GetData<union_tag_t>(tag_vector)[index];
}

UnionInvalidReason UnionVector::CheckUnionValidity(Vector &vector, idx_t count, const SelectionVector &sel) {
D_ASSERT(vector.GetType().id() == LogicalTypeId::UNION);
auto member_count = UnionType::GetMemberCount(vector.GetType());
//! Raw selection vector passed in (not merged with any other selection vectors)
UnionInvalidReason UnionVector::CheckUnionValidity(Vector &vector_p, idx_t count, const SelectionVector &sel_p) {
D_ASSERT(vector_p.GetType().id() == LogicalTypeId::UNION);

// Will contain the (possibly) merged selection vector
const SelectionVector *sel = &sel_p;
SelectionVector owned_sel;
Vector *vector = &vector_p;
if (vector->GetVectorType() == VectorType::DICTIONARY_VECTOR) {
// In the case of a dictionary vector, unwrap the Vector, and merge the selection vectors.
auto &child = DictionaryVector::Child(*vector);
D_ASSERT(child.GetVectorType() != VectorType::DICTIONARY_VECTOR);
auto &dict_sel = DictionaryVector::SelVector(*vector);
// merge the selection vectors and verify the child
auto new_buffer = dict_sel.Slice(*sel, count);
owned_sel.Initialize(new_buffer);
sel = &owned_sel;
vector = &child;
} else if (vector->GetVectorType() == VectorType::CONSTANT_VECTOR) {
sel = ConstantVector::ZeroSelectionVector(count, owned_sel);
}

auto member_count = UnionType::GetMemberCount(vector_p.GetType());
if (member_count == 0) {
return UnionInvalidReason::NO_MEMBERS;
}

UnifiedVectorFormat union_vdata;
vector.ToUnifiedFormat(count, union_vdata);
UnifiedVectorFormat vector_vdata;
vector_p.ToUnifiedFormat(count, vector_vdata);

UnifiedVectorFormat tags_vdata;
auto &tag_vector = UnionVector::GetTags(vector);
tag_vector.ToUnifiedFormat(count, tags_vdata);
auto &entries = StructVector::GetEntries(vector_p);
duckdb::vector<UnifiedVectorFormat> child_vdata(entries.size());
for (idx_t entry_idx = 0; entry_idx < entries.size(); entry_idx++) {
auto &child = *entries[entry_idx];
child.ToUnifiedFormat(count, child_vdata[entry_idx]);
}

auto &tag_vdata = child_vdata[0];

// check that only one member is valid at a time
for (idx_t row_idx = 0; row_idx < count; row_idx++) {
auto union_mapped_row_idx = sel.get_index(row_idx);
if (!union_vdata.validity.RowIsValid(union_mapped_row_idx)) {
continue;
}
auto mapped_idx = sel->get_index(row_idx);

auto tag_mapped_row_idx = tags_vdata.sel->get_index(row_idx);
if (!tags_vdata.validity.RowIsValid(tag_mapped_row_idx)) {
if (!vector_vdata.validity.RowIsValid(mapped_idx)) {
continue;
}

auto tag = (UnifiedVectorFormat::GetData<union_tag_t>(tags_vdata))[tag_mapped_row_idx];
auto tag_idx = tag_vdata.sel->get_index(sel_p.get_index(row_idx));
if (!tag_vdata.validity.RowIsValid(tag_idx)) {
// we can't have NULL tags!
return UnionInvalidReason::NULL_TAG;
}
auto tag = UnifiedVectorFormat::GetData<union_tag_t>(tag_vdata)[tag_idx];
if (tag >= member_count) {
return UnionInvalidReason::TAG_OUT_OF_RANGE;
}

bool found_valid = false;
for (idx_t member_idx = 0; member_idx < member_count; member_idx++) {

UnifiedVectorFormat member_vdata;
auto &member = UnionVector::GetMember(vector, member_idx);
member.ToUnifiedFormat(count, member_vdata);

auto mapped_row_idx = member_vdata.sel->get_index(row_idx);
if (member_vdata.validity.RowIsValid(mapped_row_idx)) {
if (found_valid) {
return UnionInvalidReason::VALIDITY_OVERLAP;
}
found_valid = true;
if (tag != static_cast<union_tag_t>(member_idx)) {
return UnionInvalidReason::TAG_MISMATCH;
}
for (idx_t i = 0; i < member_count; i++) {
auto &member_vdata = child_vdata[1 + i]; // skip the tag
idx_t member_idx = member_vdata.sel->get_index(sel_p.get_index(row_idx));
if (!member_vdata.validity.RowIsValid(member_idx)) {
continue;
}
if (found_valid) {
return UnionInvalidReason::VALIDITY_OVERLAP;
}
found_valid = true;
if (tag != static_cast<union_tag_t>(i)) {
return UnionInvalidReason::TAG_MISMATCH;
}
}
}
Expand Down
9 changes: 8 additions & 1 deletion src/core_functions/scalar/date/strftime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,14 @@ struct StrpTimeFunction {
auto &func_expr = state.expr.Cast<BoundFunctionExpression>();
auto &info = func_expr.bind_info->Cast<StrpTimeBindData>();

if (args.data[1].GetVectorType() == VectorType::CONSTANT_VECTOR && ConstantVector::IsNull(args.data[1])) {
// There is a bizarre situation where the format column is foldable but not constant
// (i.e., the statistics tell us it has only one value)
// We have to check whether that value is NULL
const auto count = args.size();
UnifiedVectorFormat format_unified;
args.data[1].ToUnifiedFormat(count, format_unified);

if (!format_unified.validity.RowIsValid(0)) {
result.SetVectorType(VectorType::CONSTANT_VECTOR);
ConstantVector::SetNull(result, true);
return;
Expand Down
8 changes: 4 additions & 4 deletions src/execution/index/art/art_key.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,10 @@ ARTKey ARTKey::CreateARTKey(ArenaAllocator &allocator, const LogicalType &type,

// FIXME: rethink this
if (type == LogicalType::BLOB || type == LogicalType::VARCHAR) {
// indexes cannot contain BLOBs (or BLOBs cast to VARCHARs) that contain null-terminated bytes
// indexes cannot contain BLOBs (or BLOBs cast to VARCHARs) that contain zero bytes
for (uint32_t i = 0; i < len - 1; i++) {
if (data[i] == '\0') {
throw NotImplementedException("Indexes cannot contain BLOBs that contain null-terminated bytes.");
throw NotImplementedException("ART indexes cannot contain BLOBs with zero bytes.");
}
}
}
Expand All @@ -45,10 +45,10 @@ void ARTKey::CreateARTKey(ArenaAllocator &allocator, const LogicalType &type, AR

// FIXME: rethink this
if (type == LogicalType::BLOB || type == LogicalType::VARCHAR) {
// indexes cannot contain BLOBs (or BLOBs cast to VARCHARs) that contain null-terminated bytes
// indexes cannot contain BLOBs (or BLOBs cast to VARCHARs) that contain zero bytes
for (uint32_t i = 0; i < key.len - 1; i++) {
if (key.data[i] == '\0') {
throw NotImplementedException("Indexes cannot contain BLOBs that contain null-terminated bytes.");
throw NotImplementedException("ART indexes cannot contain BLOBs with zero bytes.");
}
}
}
Expand Down
Loading

0 comments on commit a594ecf

Please sign in to comment.