Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into dict_fsst_compressi…
Browse files Browse the repository at this point in the history
…on_unified
  • Loading branch information
Tishj committed Jan 7, 2025
2 parents 39f9990 + 12d1051 commit c0ca18b
Show file tree
Hide file tree
Showing 52 changed files with 9,613 additions and 2,290 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/Android.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ jobs:
android:
name: Android
runs-on: ubuntu-latest
container: ubuntu:18.04
container: ubuntu:20.04
if: ${{ github.ref == 'refs/heads/main' || github.ref == 'refs/heads/feature' }}

strategy:
Expand Down
393 changes: 393 additions & 0 deletions data/csv/tpcc_results.csv

Large diffs are not rendered by default.

12 changes: 10 additions & 2 deletions scripts/run_tests_one_by_one.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ def valid_timeout(value):

parser = argparse.ArgumentParser(description='Run tests one by one with optional flags.')
parser.add_argument('unittest_program', help='Path to the unittest program')
parser.add_argument('--no-exit', action='store_true', help='Do not exit after running tests')
parser.add_argument('--no-exit', action='store_true', help='Execute all tests, without stopping on first error')
parser.add_argument('--fast-fail', action='store_true', help='Terminate on first error')
parser.add_argument('--profile', action='store_true', help='Enable profiling')
parser.add_argument('--no-assertions', action='store_false', help='Disable assertions')
parser.add_argument('--time_execution', action='store_true', help='Measure and print the execution time of each test')
Expand Down Expand Up @@ -47,6 +48,13 @@ def valid_timeout(value):
# Access the arguments
unittest_program = args.unittest_program
no_exit = args.no_exit
fast_fail = args.fast_fail

if no_exit:
if fast_fail:
print("--no-exit and --fast-fail can't be combined")
exit(1)

profile = args.profile
assertions = args.no_assertions
time_execution = args.time_execution
Expand Down Expand Up @@ -87,7 +95,7 @@ def valid_timeout(value):
def fail():
global all_passed
all_passed = False
if not no_exit:
if fast_fail:
exit(1)


Expand Down
6 changes: 3 additions & 3 deletions scripts/sqllogictest/result.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,9 +332,9 @@ def compare_rows(a, b):
hash_compare_error = values[0] != hash_value

if hash_compare_error:
expected_result = self.result_label_map.get(query_label)
logger.wrong_result_hash(expected_result, self)
self.fail_query(query)
expected_result = runner.result_label_map.get(query_label)
# logger.wrong_result_hash(expected_result, self)
context.fail(query)

assert not hash_compare_error

Expand Down
92 changes: 67 additions & 25 deletions src/common/types.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -988,6 +988,72 @@ static bool CombineUnequalTypes(const LogicalType &left, const LogicalType &righ
return false;
}

template <class OP>
static bool CombineStructTypes(const LogicalType &left, const LogicalType &right, LogicalType &result) {
auto &left_children = StructType::GetChildTypes(left);
auto &right_children = StructType::GetChildTypes(right);

auto left_unnamed = StructType::IsUnnamed(left);
auto is_unnamed = left_unnamed || StructType::IsUnnamed(right);
child_list_t<LogicalType> child_types;

// At least one side is unnamed, so we attempt positional casting.
if (is_unnamed) {
if (left_children.size() != right_children.size()) {
// We can't cast, or create the super-set.
return false;
}

for (idx_t i = 0; i < left_children.size(); i++) {
LogicalType child_type;
if (!OP::Operation(left_children[i].second, right_children[i].second, child_type)) {
return false;
}
auto &child_name = left_unnamed ? right_children[i].first : left_children[i].first;
child_types.emplace_back(child_name, std::move(child_type));
}
result = LogicalType::STRUCT(child_types);
return true;
}

// Create a super-set of the STRUCT fields.
// First, create a name->index map of the right children.
case_insensitive_map_t<idx_t> right_children_map;
for (idx_t i = 0; i < right_children.size(); i++) {
auto &name = right_children[i].first;
right_children_map[name] = i;
}

for (idx_t i = 0; i < left_children.size(); i++) {
auto &left_child = left_children[i];
auto right_child_it = right_children_map.find(left_child.first);

if (right_child_it == right_children_map.end()) {
// We can directly put the left child.
child_types.emplace_back(left_child.first, left_child.second);
continue;
}

// We need to recurse to ensure the children have a maximum logical type.
LogicalType child_type;
auto &right_child = right_children[right_child_it->second];
if (!OP::Operation(left_child.second, right_child.second, child_type)) {
return false;
}
child_types.emplace_back(left_child.first, std::move(child_type));
right_children_map.erase(right_child_it);
}

// Add all remaining right children.
for (const auto &right_child_it : right_children_map) {
auto &right_child = right_children[right_child_it.second];
child_types.emplace_back(right_child.first, right_child.second);
}

result = LogicalType::STRUCT(child_types);
return true;
}

template <class OP>
static bool CombineEqualTypes(const LogicalType &left, const LogicalType &right, LogicalType &result) {
// Since both left and right are equal we get the left type as our type_id for checks
Expand Down Expand Up @@ -1059,31 +1125,7 @@ static bool CombineEqualTypes(const LogicalType &left, const LogicalType &right,
return true;
}
case LogicalTypeId::STRUCT: {
// struct: perform recursively on each child
auto &left_child_types = StructType::GetChildTypes(left);
auto &right_child_types = StructType::GetChildTypes(right);
bool left_unnamed = StructType::IsUnnamed(left);
auto any_unnamed = left_unnamed || StructType::IsUnnamed(right);
if (left_child_types.size() != right_child_types.size()) {
// child types are not of equal size, we can't cast
// return false
return false;
}
child_list_t<LogicalType> child_types;
for (idx_t i = 0; i < left_child_types.size(); i++) {
LogicalType child_type;
// Child names must be in the same order OR either one of the structs must be unnamed
if (!any_unnamed && !StringUtil::CIEquals(left_child_types[i].first, right_child_types[i].first)) {
return false;
}
if (!OP::Operation(left_child_types[i].second, right_child_types[i].second, child_type)) {
return false;
}
auto &child_name = left_unnamed ? right_child_types[i].first : left_child_types[i].first;
child_types.emplace_back(child_name, std::move(child_type));
}
result = LogicalType::STRUCT(child_types);
return true;
return CombineStructTypes<OP>(left, right, result);
}
case LogicalTypeId::UNION: {
auto left_member_count = UnionType::GetMemberCount(left);
Expand Down
46 changes: 26 additions & 20 deletions src/common/types/vector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1374,10 +1374,10 @@ void Vector::Deserialize(Deserializer &deserializer, idx_t count) {
}

void Vector::SetVectorType(VectorType vector_type_p) {
this->vector_type = vector_type_p;
vector_type = vector_type_p;
auto physical_type = GetType().InternalType();
if (TypeIsConstantSize(physical_type) &&
(GetVectorType() == VectorType::CONSTANT_VECTOR || GetVectorType() == VectorType::FLAT_VECTOR)) {
auto flat_or_const = GetVectorType() == VectorType::CONSTANT_VECTOR || GetVectorType() == VectorType::FLAT_VECTOR;
if (TypeIsConstantSize(physical_type) && flat_or_const) {
auxiliary.reset();
}
if (vector_type == VectorType::CONSTANT_VECTOR && physical_type == PhysicalType::STRUCT) {
Expand Down Expand Up @@ -1782,23 +1782,29 @@ void Vector::DebugShuffleNestedVector(Vector &vector, idx_t count) {
void FlatVector::SetNull(Vector &vector, idx_t idx, bool is_null) {
D_ASSERT(vector.GetVectorType() == VectorType::FLAT_VECTOR);
vector.validity.Set(idx, !is_null);
if (is_null) {
auto &type = vector.GetType();
auto internal_type = type.InternalType();
if (internal_type == PhysicalType::STRUCT) {
// set all child entries to null as well
auto &entries = StructVector::GetEntries(vector);
for (auto &entry : entries) {
FlatVector::SetNull(*entry, idx, is_null);
}
} else if (internal_type == PhysicalType::ARRAY) {
// set the child element in the array to null as well
auto &child = ArrayVector::GetEntry(vector);
auto array_size = ArrayType::GetSize(type);
auto child_offset = idx * array_size;
for (idx_t i = 0; i < array_size; i++) {
FlatVector::SetNull(child, child_offset + i, is_null);
}
if (!is_null) {
return;
}

auto &type = vector.GetType();
auto internal_type = type.InternalType();

// Set all child entries to NULL.
if (internal_type == PhysicalType::STRUCT) {
auto &entries = StructVector::GetEntries(vector);
for (auto &entry : entries) {
FlatVector::SetNull(*entry, idx, is_null);
}
return;
}

// Set all child entries to NULL.
if (internal_type == PhysicalType::ARRAY) {
auto &child = ArrayVector::GetEntry(vector);
auto array_size = ArrayType::GetSize(type);
auto child_offset = idx * array_size;
for (idx_t i = 0; i < array_size; i++) {
FlatVector::SetNull(child, child_offset + i, is_null);
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/execution/physical_plan/plan_cte.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalMaterializ
auto right = CreatePlan(*op.children[1]);

unique_ptr<PhysicalCTE> cte;
cte = make_uniq<PhysicalCTE>(op.ctename, op.table_index, op.children[1]->types, std::move(left), std::move(right),
cte = make_uniq<PhysicalCTE>(op.ctename, op.table_index, right->types, std::move(left), std::move(right),
op.estimated_cardinality);
cte->working_table = working_table;
cte->cte_scans = materialized_ctes[op.table_index];
Expand Down
129 changes: 80 additions & 49 deletions src/function/cast/struct_cast.cpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#include "duckdb/common/exception/binder_exception.hpp"
#include "duckdb/function/cast/default_casts.hpp"
#include "duckdb/function/cast/cast_function_set.hpp"
#include "duckdb/function/cast/bound_cast_data.hpp"
Expand All @@ -7,51 +8,67 @@ namespace duckdb {
unique_ptr<BoundCastData> StructBoundCastData::BindStructToStructCast(BindCastInput &input, const LogicalType &source,
const LogicalType &target) {
vector<BoundCastInfo> child_cast_info;
auto &source_child_types = StructType::GetChildTypes(source);
auto &result_child_types = StructType::GetChildTypes(target);
auto &source_children = StructType::GetChildTypes(source);
auto &target_children = StructType::GetChildTypes(target);

auto target_is_unnamed = StructType::IsUnnamed(target);
auto source_is_unnamed = StructType::IsUnnamed(source);

if (source_child_types.size() != result_child_types.size()) {
auto is_unnamed = target_is_unnamed || source_is_unnamed;
if (is_unnamed && source_children.size() != target_children.size()) {
throw TypeMismatchException(input.query_location, source, target, "Cannot cast STRUCTs of different size");
}
bool named_struct_cast = !source_is_unnamed && !target_is_unnamed;
case_insensitive_map_t<idx_t> target_members;
if (named_struct_cast) {
for (idx_t i = 0; i < result_child_types.size(); i++) {
auto &target_name = result_child_types[i].first;
if (target_members.find(target_name) != target_members.end()) {
throw NotImplementedException("Error while casting - duplicate name \"%s\" in struct", target_name);

case_insensitive_map_t<idx_t> target_children_map;
if (!is_unnamed) {
for (idx_t i = 0; i < target_children.size(); i++) {
auto &name = target_children[i].first;
if (target_children_map.find(name) != target_children_map.end()) {
throw NotImplementedException("Error while casting - duplicate name \"%s\" in struct", name);
}
target_members[target_name] = i;
target_children_map[name] = i;
}
}
vector<idx_t> child_member_map;
child_member_map.reserve(source_child_types.size());
for (idx_t source_idx = 0; source_idx < source_child_types.size(); source_idx++) {
auto &source_child = source_child_types[source_idx];
idx_t target_idx;
if (named_struct_cast) {
// named struct cast - find corresponding member in target
auto entry = target_members.find(source_child.first);
if (entry == target_members.end()) {
throw TypeMismatchException(input.query_location, source, target,
"Cannot cast STRUCTs - element \"" + source_child.first +
"\" in source struct was not found in target struct");

vector<idx_t> source_indexes;
vector<idx_t> target_indexes;
vector<idx_t> target_null_indexes;
bool has_any_match = is_unnamed;

for (idx_t i = 0; i < source_children.size(); i++) {
auto &source_child = source_children[i];
auto target_idx = i;

// Map to the correct index for names structs.
if (!is_unnamed) {
auto target_child = target_children_map.find(source_child.first);
if (target_child == target_children_map.end()) {
// Skip any children that have no target.
continue;
}
target_idx = entry->second;
target_members.erase(entry);
} else {
// unnamed struct cast - positionally cast elements
target_idx = source_idx;
target_idx = target_child->second;
target_children_map.erase(target_child);
has_any_match = true;
}
child_member_map.push_back(target_idx);
auto child_cast = input.GetCastFunction(source_child.second, result_child_types[target_idx].second);

source_indexes.push_back(i);
target_indexes.push_back(target_idx);
auto child_cast = input.GetCastFunction(source_child.second, target_children[target_idx].second);
child_cast_info.push_back(std::move(child_cast));
}
D_ASSERT(child_member_map.size() == source_child_types.size());
return make_uniq<StructBoundCastData>(std::move(child_cast_info), target, std::move(child_member_map));

if (!has_any_match) {
throw BinderException("STRUCT to STRUCT cast must have at least one matching member");
}

// The remaining target children have no match in the source struct.
// Thus, they become NULL.
for (const auto &target_child : target_children_map) {
target_null_indexes.push_back(target_child.second);
}

return make_uniq<StructBoundCastData>(std::move(child_cast_info), target, std::move(source_indexes),
std::move(target_indexes), std::move(target_null_indexes));
}

unique_ptr<FunctionLocalState> StructBoundCastData::InitStructCastLocalState(CastLocalStateParameters &parameters) {
Expand All @@ -71,32 +88,46 @@ unique_ptr<FunctionLocalState> StructBoundCastData::InitStructCastLocalState(Cas

static bool StructToStructCast(Vector &source, Vector &result, idx_t count, CastParameters &parameters) {
auto &cast_data = parameters.cast_data->Cast<StructBoundCastData>();
auto &lstate = parameters.local_state->Cast<StructCastLocalState>();
auto &source_child_types = StructType::GetChildTypes(source.GetType());
auto &source_children = StructVector::GetEntries(source);
D_ASSERT(source_children.size() == StructType::GetChildTypes(result.GetType()).size());
auto &l_state = parameters.local_state->Cast<StructCastLocalState>();

auto &source_vectors = StructVector::GetEntries(source);
auto &target_children = StructVector::GetEntries(result);

auto &result_children = StructVector::GetEntries(result);
bool all_converted = true;
for (idx_t c_idx = 0; c_idx < source_child_types.size(); c_idx++) {
auto source_idx = c_idx;
auto target_idx = cast_data.child_member_map[source_idx];
auto &source_child_vector = *source_children[source_idx];
auto &result_child_vector = *result_children[target_idx];
CastParameters child_parameters(parameters, cast_data.child_cast_info[c_idx].cast_data,
lstate.local_states[c_idx]);
if (!cast_data.child_cast_info[c_idx].function(source_child_vector, result_child_vector, count,
child_parameters)) {
for (idx_t i = 0; i < cast_data.source_indexes.size(); i++) {
auto source_idx = cast_data.source_indexes[i];
auto target_idx = cast_data.target_indexes[i];

auto &source_vector = *source_vectors[source_idx];
auto &target_vector = *target_children[target_idx];

auto &child_cast_info = cast_data.child_cast_info[i];
CastParameters child_parameters(parameters, child_cast_info.cast_data, l_state.local_states[i]);
auto success = child_cast_info.function(source_vector, target_vector, count, child_parameters);
if (!success) {
all_converted = false;
}
}

if (!cast_data.target_null_indexes.empty()) {
for (idx_t i = 0; i < cast_data.target_null_indexes.size(); i++) {
auto target_idx = cast_data.target_null_indexes[i];
auto &target_vector = *target_children[target_idx];

target_vector.SetVectorType(VectorType::CONSTANT_VECTOR);
ConstantVector::SetNull(target_vector, true);
}
}

if (source.GetVectorType() == VectorType::CONSTANT_VECTOR) {
result.SetVectorType(VectorType::CONSTANT_VECTOR);
ConstantVector::SetNull(result, ConstantVector::IsNull(source));
} else {
source.Flatten(count);
FlatVector::Validity(result) = FlatVector::Validity(source);
return all_converted;
}

source.Flatten(count);
auto &result_validity = FlatVector::Validity(result);
result_validity = FlatVector::Validity(source);
return all_converted;
}

Expand Down
Loading

0 comments on commit c0ca18b

Please sign in to comment.