Skip to content

Commit

Permalink
Merge branch 'feature' into python_table_support_replacement_scan
Browse files Browse the repository at this point in the history
  • Loading branch information
Tishj committed Oct 22, 2023
2 parents 7ba4638 + 7a091e5 commit df7b067
Show file tree
Hide file tree
Showing 9 changed files with 76 additions and 49 deletions.
7 changes: 7 additions & 0 deletions src/common/types/validity_mask.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ string ValidityMask::ToString(idx_t count) const {

void ValidityMask::Resize(idx_t old_size, idx_t new_size) {
D_ASSERT(new_size >= old_size);
target_count = new_size;
if (validity_mask) {
auto new_size_count = EntryCount(new_size);
auto old_size_count = EntryCount(old_size);
Expand All @@ -67,10 +68,15 @@ void ValidityMask::Resize(idx_t old_size, idx_t new_size) {
validity_data = std::move(new_validity_data);
validity_mask = validity_data->owned_data.get();
} else {
// TODO: We shouldn't have to initialize here, just update the target count
Initialize(new_size);
}
}

idx_t ValidityMask::TargetCount() {
return target_count;
}

void ValidityMask::Slice(const ValidityMask &other, idx_t source_offset, idx_t count) {
if (other.AllValid()) {
validity_mask = nullptr;
Expand All @@ -91,6 +97,7 @@ bool ValidityMask::IsAligned(idx_t count) {
}

void ValidityMask::SliceInPlace(const ValidityMask &other, idx_t target_offset, idx_t source_offset, idx_t count) {
EnsureWritable();
if (IsAligned(source_offset) && IsAligned(target_offset)) {
auto target_validity = GetData();
auto source_validity = other.GetData();
Expand Down
6 changes: 4 additions & 2 deletions src/common/types/vector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -247,8 +247,9 @@ void Vector::Initialize(bool zero_data, idx_t capacity) {
memset(data, 0, capacity * type_size);
}
}
if (capacity > STANDARD_VECTOR_SIZE) {
validity.Resize(STANDARD_VECTOR_SIZE, capacity);

if (capacity > validity.TargetCount()) {
validity.Resize(validity.TargetCount(), capacity);
}
}

Expand Down Expand Up @@ -1050,6 +1051,7 @@ void Vector::Serialize(Serializer &serializer, idx_t count) {
serializer.WriteProperty(100, "all_valid", all_valid);
if (all_valid) {
ValidityMask flat_mask(count);
flat_mask.Initialize();
for (idx_t i = 0; i < count; ++i) {
auto row_idx = vdata.sel->get_index(i);
flat_mask.Set(i, vdata.validity.RowIsValid(row_idx));
Expand Down
2 changes: 1 addition & 1 deletion src/common/types/vector_cache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ class VectorCacheBuffer : public VectorBuffer {
auto &child_type = ArrayType::GetChildType(type);
auto array_size = ArrayType::GetSize(type);
child_caches.push_back(make_buffer<VectorCacheBuffer>(allocator, child_type, array_size * capacity));
auto child_vector = make_uniq<Vector>(child_type, false, false, array_size * capacity);
auto child_vector = make_uniq<Vector>(child_type, true, false, array_size * capacity);
auxiliary = make_shared<VectorArrayBuffer>(std::move(child_vector), array_size, capacity);
break;
}
Expand Down
25 changes: 14 additions & 11 deletions src/function/cast/list_casts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,6 @@ static bool ListToArrayCast(Vector &source, Vector &result, idx_t count, CastPar
} else {
source.Flatten(count);
result.SetVectorType(VectorType::FLAT_VECTOR);
FlatVector::SetValidity(result, FlatVector::Validity(source));

auto child_type = ArrayType::GetChildType(result.GetType());
auto &source_cc = ListVector::GetEntry(source);
Expand All @@ -187,15 +186,13 @@ static bool ListToArrayCast(Vector &source, Vector &result, idx_t count, CastPar
for (idx_t i = 0; i < count; i++) {
// If the list is null, set the entire array to null
if (FlatVector::IsNull(source, i)) {
FlatVector::SetNull(result, i, true);
for (idx_t array_elem = 0; array_elem < array_size; array_elem++) {
FlatVector::SetNull(payload_vector, i * array_size + array_elem, true);
// just select whatever, it won't be used anyway
sel.set_index(i * array_size + array_elem, i * array_size + array_elem);
// just select the first value, it won't be used anyway
sel.set_index(i * array_size + array_elem, 0);
}
continue;
}

if (ldata[i].length != array_size) {
} else if (ldata[i].length != array_size) {
if (all_lengths_match) {
// Cant cast to array, list size mismatch
all_lengths_match = false;
Expand All @@ -204,10 +201,16 @@ static bool ListToArrayCast(Vector &source, Vector &result, idx_t count, CastPar
HandleCastError::AssignError(msg, parameters.error_message);
}
FlatVector::SetNull(result, i, true);
}
// Set the selection vector to point to the correct offsets
for (idx_t array_elem = 0; array_elem < array_size; array_elem++) {
sel.set_index(i * array_size + array_elem, ldata[i].offset + array_elem);
for (idx_t array_elem = 0; array_elem < array_size; array_elem++) {
FlatVector::SetNull(payload_vector, i * array_size + array_elem, true);
// just select the first value, it won't be used anyway
sel.set_index(i * array_size + array_elem, 0);
}
} else {
// Set the selection vector to point to the correct offsets
for (idx_t array_elem = 0; array_elem < array_size; array_elem++) {
sel.set_index(i * array_size + array_elem, ldata[i].offset + array_elem);
}
}
}

Expand Down
26 changes: 17 additions & 9 deletions src/include/duckdb/common/types/validity_mask.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,12 +65,11 @@ struct TemplatedValidityMask {
static constexpr const int STANDARD_MASK_SIZE = STANDARD_ENTRY_COUNT * sizeof(validity_t);

public:
inline TemplatedValidityMask() : validity_mask(nullptr) {
inline TemplatedValidityMask() : validity_mask(nullptr), target_count(STANDARD_VECTOR_SIZE) {
}
inline explicit TemplatedValidityMask(idx_t max_count) {
Initialize(max_count);
inline explicit TemplatedValidityMask(idx_t target_count) : validity_mask(nullptr), target_count(target_count) {
}
inline explicit TemplatedValidityMask(V *ptr) : validity_mask(ptr) {
inline explicit TemplatedValidityMask(V *ptr) : validity_mask(ptr), target_count(STANDARD_VECTOR_SIZE) {
}
inline TemplatedValidityMask(const TemplatedValidityMask &original, idx_t count) {
Copy(original, count);
Expand Down Expand Up @@ -139,6 +138,7 @@ struct TemplatedValidityMask {
inline void Reset() {
validity_mask = nullptr;
validity_data.reset();
target_count = STANDARD_VECTOR_SIZE;
}

static inline idx_t EntryCount(idx_t count) {
Expand Down Expand Up @@ -229,8 +229,8 @@ struct TemplatedValidityMask {
//! Marks the entry at the specified row index as invalid (i.e. null)
inline void SetInvalid(idx_t row_idx) {
if (!validity_mask) {
D_ASSERT(row_idx <= STANDARD_VECTOR_SIZE);
Initialize(STANDARD_VECTOR_SIZE);
D_ASSERT(row_idx <= target_count);
Initialize(target_count);
}
SetInvalidUnsafe(row_idx);
}
Expand Down Expand Up @@ -295,12 +295,18 @@ struct TemplatedValidityMask {
inline void Initialize(const TemplatedValidityMask &other) {
validity_mask = other.validity_mask;
validity_data = other.validity_data;
target_count = other.target_count;
}
inline void Initialize(idx_t count = STANDARD_VECTOR_SIZE) {
inline void Initialize(idx_t count) {
target_count = count;
validity_data = make_buffer<ValidityBuffer>(count);
validity_mask = validity_data->owned_data.get();
}
inline void Initialize() {
Initialize(target_count);
}
inline void Copy(const TemplatedValidityMask &other, idx_t count) {
target_count = count;
if (other.AllValid()) {
validity_data = nullptr;
validity_mask = nullptr;
Expand All @@ -313,13 +319,15 @@ struct TemplatedValidityMask {
protected:
V *validity_mask;
buffer_ptr<ValidityBuffer> validity_data;
// The size to initialize the validity mask to when/if the mask is lazily initialized
idx_t target_count;
};

struct ValidityMask : public TemplatedValidityMask<validity_t> {
public:
inline ValidityMask() : TemplatedValidityMask(nullptr) {
}
inline explicit ValidityMask(idx_t max_count) : TemplatedValidityMask(max_count) {
inline explicit ValidityMask(idx_t target_count) : TemplatedValidityMask(target_count) {
}
inline explicit ValidityMask(validity_t *ptr) : TemplatedValidityMask(ptr) {
}
Expand All @@ -328,7 +336,7 @@ struct ValidityMask : public TemplatedValidityMask<validity_t> {

public:
DUCKDB_API void Resize(idx_t old_size, idx_t new_size);

DUCKDB_API idx_t TargetCount();
DUCKDB_API void SliceInPlace(const ValidityMask &other, idx_t target_offset, idx_t source_offset, idx_t count);
DUCKDB_API void Slice(const ValidityMask &other, idx_t source_offset, idx_t count);
DUCKDB_API void Combine(const ValidityMask &other, idx_t count);
Expand Down
4 changes: 2 additions & 2 deletions test/sql/types/nested/array/array_aggregate.test
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@ statement ok
INSERT INTO tbl1 VALUES ([1, 2, 3]), ([4, NULL, 6]), ([7, 8, 9]), (NULL), ([10, 11, 12])

query II
SELECT FIRST(a), LAST(a) FROM tbl1;
SELECT FIRST(a ORDER BY ALL), LAST(a ORDER BY ALL) FROM tbl1;
----
[1, 2, 3] [10, 11, 12]
[1, 2, 3] NULL

query II rowsort
SELECT COUNT(*), max(a) FROM tbl1 GROUP BY list_sum(a::INT[]) % 2 == 0;
Expand Down
6 changes: 3 additions & 3 deletions test/sql/types/nested/array/array_joins.test
Original file line number Diff line number Diff line change
Expand Up @@ -104,11 +104,11 @@ statement ok
CREATE OR REPLACE TABLE t1 AS SELECT * FROM (VALUES (1, [1,2,3]::${COLLECTION}), (2, [4,5,6]::${COLLECTION}), (3, [1,2,3]::${COLLECTION}));

query IIII
SELECT * FROM t1 as a JOIN t1 as b ON (a.col1 != b.col1);
SELECT * FROM t1 as a JOIN t1 as b ON (a.col1 != b.col1) ORDER BY ALL;
----
2 [4, 5, 6] 1 [1, 2, 3]
1 [1, 2, 3] 2 [4, 5, 6]
3 [1, 2, 3] 2 [4, 5, 6]
2 [4, 5, 6] 1 [1, 2, 3]
2 [4, 5, 6] 3 [1, 2, 3]
3 [1, 2, 3] 2 [4, 5, 6]

endloop
21 changes: 0 additions & 21 deletions test/sql/types/nested/array/array_try_cast.test
Original file line number Diff line number Diff line change
Expand Up @@ -14,27 +14,6 @@ SELECT CAST(array_value(1,2) as INTEGER[3]);
----
Conversion Error: Cannot cast array of size 2 to array of size 3

# Array try cast
query I rowsort
SELECT TRY_CAST(test_vector AS INT[2]) AS a FROM test_vector_types(NULL::INTEGER[])
----
NULL
NULL
NULL
NULL
NULL
NULL
[-2147483648, 2147483647]
[-2147483648, 2147483647]
[-2147483648, 2147483647]
[-2147483648, 2147483647]
[3, 5]

statement error
SELECT CAST(test_vector AS INT[2]) AS a FROM test_vector_types(NULL::INTEGER[])
----
Cannot cast list with length 0 to array with length 2

# Nested array try cast
query I
SELECT TRY_CAST(x as INT[2][2]) FROM (VALUES ([[1,2],[3,4]]), ([[5,6],[7,8]])) AS t(x)
Expand Down
28 changes: 28 additions & 0 deletions test/sql/types/nested/array/array_try_cast_vector_types.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# name: test/sql/types/nested/array/array_try_cast_vector_types.test
# group: [array]

require vector_size 2048

statement ok
PRAGMA enable_verification

# Array try cast
query I rowsort
SELECT TRY_CAST(test_vector AS INT[2]) AS a FROM test_vector_types(NULL::INTEGER[])
----
NULL
NULL
NULL
NULL
NULL
NULL
[-2147483648, 2147483647]
[-2147483648, 2147483647]
[-2147483648, 2147483647]
[-2147483648, 2147483647]
[3, 5]

statement error
SELECT CAST(test_vector AS INT[2]) AS a FROM test_vector_types(NULL::INTEGER[])
----
Cannot cast list with length 0 to array with length 2

0 comments on commit df7b067

Please sign in to comment.