Skip to content

Commit

Permalink
its compiling, too scared to run it yet
Browse files Browse the repository at this point in the history
  • Loading branch information
Tishj committed Jan 6, 2025
1 parent c2d26f2 commit c82f24d
Show file tree
Hide file tree
Showing 4 changed files with 423 additions and 146 deletions.
46 changes: 31 additions & 15 deletions src/include/duckdb/storage/table/column_data_checkpointer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,34 +41,50 @@ struct ColumnDataCheckpointData {
ColumnCheckpointInfo &checkpoint_info;
};

struct CheckpointAnalyzeResult {
public:
//! Default constructor, returned when the column data doesn't require checkpoint
CheckpointAnalyzeResult() {
}
CheckpointAnalyzeResult(unique_ptr<AnalyzeState> &&analyze_state, CompressionFunction &function)
: analyze_state(std::move(analyze_state)), function(function) {
}

public:
unique_ptr<AnalyzeState> analyze_state;
optional_ptr<CompressionFunction> function;
};

class ColumnDataCheckpointer {
public:
ColumnDataCheckpointer(ColumnData &col_data_p, RowGroup &row_group_p, ColumnCheckpointState &state_p,
ColumnDataCheckpointer(vector<reference<ColumnCheckpointState>> &states, DatabaseInstance &db, RowGroup &row_group,
ColumnCheckpointInfo &checkpoint_info);

public:
void Checkpoint(const column_segment_vector_t &nodes);
void FinalizeCheckpoint(column_segment_vector_t &&nodes);
CompressionFunction &GetCompressionFunction(CompressionType type);
void Checkpoint();
void FinalizeCheckpoint();

private:
void ScanSegments(const column_segment_vector_t &nodes, const std::function<void(Vector &, idx_t)> &callback);
unique_ptr<AnalyzeState> DetectBestCompressionMethod(const column_segment_vector_t &nodes, idx_t &compression_idx);
void WriteToDisk(const column_segment_vector_t &nodes);
bool HasChanges(const column_segment_vector_t &nodes);
void WritePersistentSegments(column_segment_vector_t nodes);
void ScanSegments(const std::function<void(Vector &, idx_t)> &callback);
vector<CheckpointAnalyzeResult> DetectBestCompressionMethod();
void WriteToDisk();
bool HasChanges(ColumnData &col_data);
void WritePersistentSegments(ColumnCheckpointState &state);
void InitAnalyze();
void DropSegments();

private:
ColumnData &col_data;
vector<reference<ColumnCheckpointState>> &checkpoint_states;
DatabaseInstance &db;
RowGroup &row_group;
ColumnCheckpointState &state;
bool is_validity;
bool has_changes;
Vector intermediate;
vector<optional_ptr<CompressionFunction>> compression_functions;
ColumnCheckpointInfo &checkpoint_info;

vector<unique_ptr<AnalyzeState>> analyze_states;
vector<bool> has_changes;
//! For every column data that is being checkpointed, the applicable functions
vector<vector<optional_ptr<CompressionFunction>>> compression_functions;
//! For every column data that is being checkpointed, the analyze state of functions being tried
vector<vector<unique_ptr<AnalyzeState>>> analyze_states;
};

} // namespace duckdb
11 changes: 6 additions & 5 deletions src/storage/table/column_data.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -620,21 +620,22 @@ unique_ptr<ColumnCheckpointState> ColumnData::Checkpoint(RowGroup &row_group, Co
auto checkpoint_state = CreateCheckpointState(row_group, checkpoint_info.info.manager);
checkpoint_state->global_stats = BaseStatistics::CreateEmpty(type).ToUnique();

auto l = data.Lock();
auto &nodes = data.ReferenceSegments(l);
auto &nodes = data.ReferenceSegments();
if (nodes.empty()) {
// empty table: flush the empty list
return checkpoint_state;
}

ColumnDataCheckpointer checkpointer(*this, row_group, *checkpoint_state, checkpoint_info);
checkpointer.Checkpoint(nodes);
checkpointer.FinalizeCheckpoint(data.MoveSegments(l));
vector<reference<ColumnCheckpointState>> states {*checkpoint_state};
ColumnDataCheckpointer checkpointer(states, GetDatabase(), row_group, checkpoint_info);
checkpointer.Checkpoint();
checkpointer.FinalizeCheckpoint();

// reset the compression function
compression.reset();
// replace the old tree with the new one
auto new_segments = checkpoint_state->new_tree.MoveSegments();
auto l = data.Lock();
for (auto &new_segment : new_segments) {
AppendSegment(l, std::move(new_segment.node));
}
Expand Down
Loading

0 comments on commit c82f24d

Please sign in to comment.