Skip to content

Commit

Permalink
align compaction output
Browse files Browse the repository at this point in the history
  • Loading branch information
Yuval-Ariel committed Oct 23, 2022
1 parent aa71464 commit 9194650
Show file tree
Hide file tree
Showing 16 changed files with 528 additions and 72 deletions.
3 changes: 3 additions & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
* Fixed an optimistic transaction validation bug caused by DBImpl::GetLatestSequenceForKey() returning non-latest seq for merge (#10724).
* Fixed a bug in iterator refresh which could segfault for DeleteRange users (#10739).

### Performance Improvements
* Try to align the compaction output file boundaries to the next level ones, which can reduce more than 10% compaction load for the default level compaction. The feature is enabled by default, to disable, set `AdvancedColumnFamilyOptions.level_compaction_dynamic_file_size` to false. As a side effect, it can create SSTs larger than the target_file_size (capped at 2x target_file_size) or smaller files.

## 7.7.0 (09/18/2022)
### Bug Fixes
* Fixed a hang when an operation such as `GetLiveFiles` or `CreateNewBackup` is asked to trigger and wait for memtable flush on a read-only DB. Such indirect requests for memtable flush are now ignored on a read-only DB.
Expand Down
10 changes: 9 additions & 1 deletion db/compaction/compaction.cc
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ Compaction::Compaction(
: input_vstorage_(vstorage),
start_level_(_inputs[0].level),
output_level_(_output_level),
max_output_file_size_(_target_file_size),
target_output_file_size_(_target_file_size),
max_compaction_bytes_(_max_compaction_bytes),
max_subcompactions_(_max_subcompactions),
immutable_options_(_immutable_options),
Expand Down Expand Up @@ -268,6 +268,14 @@ Compaction::Compaction(
max_subcompactions_ = _mutable_db_options.max_subcompactions;
}

// for the non-bottommost levels, it tries to build files match the target
// file size, but not guaranteed. It could be 2x the size of the target size.
max_output_file_size_ =
bottommost_level_ || grandparents_.empty() ||
!_immutable_options.level_compaction_dynamic_file_size
? target_output_file_size_
: 2 * target_output_file_size_;

#ifndef NDEBUG
for (size_t i = 1; i < inputs_.size(); ++i) {
assert(inputs_[i].level > inputs_[i - 1].level);
Expand Down
4 changes: 4 additions & 0 deletions db/compaction/compaction.h
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,9 @@ class Compaction {
// Maximum size of files to build during this compaction.
uint64_t max_output_file_size() const { return max_output_file_size_; }

// Target output file size for this compaction
uint64_t target_output_file_size() const { return target_output_file_size_; }

// What compression for output
CompressionType output_compression() const { return output_compression_; }

Expand Down Expand Up @@ -412,6 +415,7 @@ class Compaction {

const int start_level_; // the lowest level to be compacted
const int output_level_; // levels to which output files are stored
uint64_t target_output_file_size_;
uint64_t max_output_file_size_;
uint64_t max_compaction_bytes_;
uint32_t max_subcompactions_;
Expand Down
267 changes: 261 additions & 6 deletions db/compaction/compaction_job_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,9 @@ class CompactionJobTestBase : public testing::Test {
test::CreateEnvFromSystem(ConfigOptions(), &base_env, &env_guard_));
env_ = base_env;
fs_ = env_->GetFileSystem();
// set default for the tests
mutable_cf_options_.target_file_size_base = 1024 * 1024;
mutable_cf_options_.max_compaction_bytes = 10 * 1024 * 1024;
}

void SetUp() override {
Expand Down Expand Up @@ -415,8 +418,9 @@ class CompactionJobTestBase : public testing::Test {

auto cfd = versions_->GetColumnFamilySet()->GetDefault();
if (table_type_ == TableTypeForTest::kMockTable) {
assert(expected_results.size() == 1);
mock_table_factory_->AssertLatestFile(expected_results[0]);
ASSERT_EQ(compaction_job_stats_.num_output_files,
expected_results.size());
mock_table_factory_->AssertLatestFiles(expected_results);
} else {
assert(table_type_ == TableTypeForTest::kBlockBasedTable);
}
Expand All @@ -426,7 +430,8 @@ class CompactionJobTestBase : public testing::Test {
ASSERT_EQ(expected_output_file_num, output_files.size());

if (table_type_ == TableTypeForTest::kMockTable) {
assert(output_files.size() == 1);
assert(output_files.size() ==
static_cast<size_t>(expected_output_file_num));
const FileMetaData* const output_file = output_files[0];
ASSERT_EQ(output_file->oldest_blob_file_number,
expected_oldest_blob_file_numbers[0]);
Expand Down Expand Up @@ -620,12 +625,22 @@ class CompactionJobTestBase : public testing::Test {
num_input_files += level_files.size();
}

std::vector<FileMetaData*> grandparents;
// it should actually be the next non-empty level
const int kGrandparentsLevel = output_level + 1;
if (kGrandparentsLevel < cf_options_.num_levels) {
grandparents =
cfd_->current()->storage_info()->LevelFiles(kGrandparentsLevel);
}

Compaction compaction(
cfd->current()->storage_info(), *cfd->ioptions(),
*cfd->GetLatestMutableCFOptions(), mutable_db_options_,
compaction_input_files, output_level, 1024 * 1024, 10 * 1024 * 1024, 0,
kNoCompression, cfd->GetLatestMutableCFOptions()->compression_opts,
Temperature::kUnknown, max_subcompactions, {}, true);
compaction_input_files, output_level,
mutable_cf_options_.target_file_size_base,
mutable_cf_options_.max_compaction_bytes, 0, kNoCompression,
cfd->GetLatestMutableCFOptions()->compression_opts,
Temperature::kUnknown, max_subcompactions, grandparents, true);
compaction.SetInputVersion(cfd->current());

assert(db_options_.info_log);
Expand Down Expand Up @@ -1721,6 +1736,246 @@ TEST_F(CompactionJobTest, ResultSerialization) {
}
}

class CompactionJobDynamicFileSizeTest
: public CompactionJobTestBase,
public ::testing::WithParamInterface<bool> {
public:
CompactionJobDynamicFileSizeTest()
: CompactionJobTestBase(
test::PerThreadDBPath("compaction_job_dynamic_file_size_test"),
BytewiseComparator(), [](uint64_t /*ts*/) { return ""; },
/*test_io_priority=*/false, TableTypeForTest::kMockTable) {}
};

TEST_P(CompactionJobDynamicFileSizeTest, CutForMaxCompactionBytes) {
// dynamic_file_size option should has no impact on cutting for max compaction
// bytes.
bool enable_dyanmic_file_size = GetParam();
cf_options_.level_compaction_dynamic_file_size = enable_dyanmic_file_size;

NewDB();
mutable_cf_options_.target_file_size_base = 80;
mutable_cf_options_.max_compaction_bytes = 21;

auto file1 = mock::MakeMockFile({
{KeyStr("c", 5U, kTypeValue), "val2"},
{KeyStr("n", 6U, kTypeValue), "val3"},
});
AddMockFile(file1);

auto file2 = mock::MakeMockFile({{KeyStr("h", 3U, kTypeValue), "val"},
{KeyStr("j", 4U, kTypeValue), "val"}});
AddMockFile(file2, 1);

// Create three L2 files, each size 10.
// max_compaction_bytes 21 means the compaction output in L1 will
// be cut to at least two files.
auto file3 = mock::MakeMockFile({{KeyStr("b", 1U, kTypeValue), "val"},
{KeyStr("c", 1U, kTypeValue), "val"},
{KeyStr("c1", 1U, kTypeValue), "val"},
{KeyStr("c2", 1U, kTypeValue), "val"},
{KeyStr("c3", 1U, kTypeValue), "val"},
{KeyStr("c4", 1U, kTypeValue), "val"},
{KeyStr("d", 1U, kTypeValue), "val"},
{KeyStr("e", 2U, kTypeValue), "val"}});
AddMockFile(file3, 2);

auto file4 = mock::MakeMockFile({{KeyStr("h", 1U, kTypeValue), "val"},
{KeyStr("i", 1U, kTypeValue), "val"},
{KeyStr("i1", 1U, kTypeValue), "val"},
{KeyStr("i2", 1U, kTypeValue), "val"},
{KeyStr("i3", 1U, kTypeValue), "val"},
{KeyStr("i4", 1U, kTypeValue), "val"},
{KeyStr("j", 1U, kTypeValue), "val"},
{KeyStr("k", 2U, kTypeValue), "val"}});
AddMockFile(file4, 2);

auto file5 = mock::MakeMockFile({{KeyStr("l", 1U, kTypeValue), "val"},
{KeyStr("m", 1U, kTypeValue), "val"},
{KeyStr("m1", 1U, kTypeValue), "val"},
{KeyStr("m2", 1U, kTypeValue), "val"},
{KeyStr("m3", 1U, kTypeValue), "val"},
{KeyStr("m4", 1U, kTypeValue), "val"},
{KeyStr("n", 1U, kTypeValue), "val"},
{KeyStr("o", 2U, kTypeValue), "val"}});
AddMockFile(file5, 2);

auto expected_file1 =
mock::MakeMockFile({{KeyStr("c", 5U, kTypeValue), "val2"},
{KeyStr("h", 3U, kTypeValue), "val"}});
auto expected_file2 =
mock::MakeMockFile({{KeyStr("j", 4U, kTypeValue), "val"},
{KeyStr("n", 6U, kTypeValue), "val3"}});

SetLastSequence(6U);

const std::vector<int> input_levels = {0, 1};
auto lvl0_files = cfd_->current()->storage_info()->LevelFiles(0);
auto lvl1_files = cfd_->current()->storage_info()->LevelFiles(1);

RunCompaction({lvl0_files, lvl1_files}, input_levels,
{expected_file1, expected_file2});
}

TEST_P(CompactionJobDynamicFileSizeTest, CutToSkipGrandparentFile) {
bool enable_dyanmic_file_size = GetParam();
cf_options_.level_compaction_dynamic_file_size = enable_dyanmic_file_size;

NewDB();
// Make sure the grandparent level file size (10) qualifies skipping.
// Currently, it has to be > 1/8 of target file size.
mutable_cf_options_.target_file_size_base = 70;

auto file1 = mock::MakeMockFile({
{KeyStr("a", 5U, kTypeValue), "val2"},
{KeyStr("z", 6U, kTypeValue), "val3"},
});
AddMockFile(file1);

auto file2 = mock::MakeMockFile({{KeyStr("c", 3U, kTypeValue), "val"},
{KeyStr("x", 4U, kTypeValue), "val"}});
AddMockFile(file2, 1);

auto file3 = mock::MakeMockFile({{KeyStr("b", 1U, kTypeValue), "val"},
{KeyStr("d", 2U, kTypeValue), "val"}});
AddMockFile(file3, 2);

auto file4 = mock::MakeMockFile({{KeyStr("h", 1U, kTypeValue), "val"},
{KeyStr("i", 2U, kTypeValue), "val"}});
AddMockFile(file4, 2);

auto file5 = mock::MakeMockFile({{KeyStr("v", 1U, kTypeValue), "val"},
{KeyStr("y", 2U, kTypeValue), "val"}});
AddMockFile(file5, 2);

auto expected_file1 =
mock::MakeMockFile({{KeyStr("a", 5U, kTypeValue), "val2"},
{KeyStr("c", 3U, kTypeValue), "val"}});
auto expected_file2 =
mock::MakeMockFile({{KeyStr("x", 4U, kTypeValue), "val"},
{KeyStr("z", 6U, kTypeValue), "val3"}});

auto expected_file_disable_dynamic_file_size =
mock::MakeMockFile({{KeyStr("a", 5U, kTypeValue), "val2"},
{KeyStr("c", 3U, kTypeValue), "val"},
{KeyStr("x", 4U, kTypeValue), "val"},
{KeyStr("z", 6U, kTypeValue), "val3"}});

SetLastSequence(6U);
const std::vector<int> input_levels = {0, 1};
auto lvl0_files = cfd_->current()->storage_info()->LevelFiles(0);
auto lvl1_files = cfd_->current()->storage_info()->LevelFiles(1);
if (enable_dyanmic_file_size) {
RunCompaction({lvl0_files, lvl1_files}, input_levels,
{expected_file1, expected_file2});
} else {
RunCompaction({lvl0_files, lvl1_files}, input_levels,
{expected_file_disable_dynamic_file_size});
}
}

TEST_P(CompactionJobDynamicFileSizeTest, CutToAlignGrandparentBoundary) {
bool enable_dyanmic_file_size = GetParam();
cf_options_.level_compaction_dynamic_file_size = enable_dyanmic_file_size;
NewDB();

// MockTable has 1 byte per entry by default and each file is 10 bytes.
// When the file size is smaller than 100, it won't cut file earlier to align
// with its grandparent boundary.
const size_t kKeyValueSize = 10000;
mock_table_factory_->SetKeyValueSize(kKeyValueSize);

mutable_cf_options_.target_file_size_base = 10 * kKeyValueSize;

mock::KVVector file1;
char ch = 'd';
// Add value from d -> o
for (char i = 0; i < 12; i++) {
file1.emplace_back(KeyStr(std::string(1, ch + i), i + 10, kTypeValue),
"val" + std::to_string(i));
}

AddMockFile(file1);

auto file2 = mock::MakeMockFile({{KeyStr("e", 3U, kTypeValue), "val"},
{KeyStr("s", 4U, kTypeValue), "val"}});
AddMockFile(file2, 1);

// the 1st grandparent file should be skipped
auto file3 = mock::MakeMockFile({{KeyStr("a", 1U, kTypeValue), "val"},
{KeyStr("b", 2U, kTypeValue), "val"}});
AddMockFile(file3, 2);

auto file4 = mock::MakeMockFile({{KeyStr("c", 1U, kTypeValue), "val"},
{KeyStr("e", 2U, kTypeValue), "val"}});
AddMockFile(file4, 2);

auto file5 = mock::MakeMockFile({{KeyStr("h", 1U, kTypeValue), "val"},
{KeyStr("j", 2U, kTypeValue), "val"}});
AddMockFile(file5, 2);

auto file6 = mock::MakeMockFile({{KeyStr("k", 1U, kTypeValue), "val"},
{KeyStr("n", 2U, kTypeValue), "val"}});
AddMockFile(file6, 2);

auto file7 = mock::MakeMockFile({{KeyStr("q", 1U, kTypeValue), "val"},
{KeyStr("t", 2U, kTypeValue), "val"}});
AddMockFile(file7, 2);

// The expected outputs are:
// L1: [d,e,f,g,h,i,j] [k,l,m,n,o,s]
// L2: [a, b] [c, e] [h, j] [k, n] [q, t]
// The first output cut earlier at "j", so it could be aligned with L2 files.
// If dynamic_file_size is not enabled, it will be cut based on the
// target_file_size
mock::KVVector expected_file1;
for (char i = 0; i < 7; i++) {
expected_file1.emplace_back(
KeyStr(std::string(1, ch + i), i + 10, kTypeValue),
"val" + std::to_string(i));
}

mock::KVVector expected_file2;
for (char i = 7; i < 12; i++) {
expected_file2.emplace_back(
KeyStr(std::string(1, ch + i), i + 10, kTypeValue),
"val" + std::to_string(i));
}
expected_file2.emplace_back(KeyStr("s", 4U, kTypeValue), "val");

mock::KVVector expected_file_disable_dynamic_file_size1;
for (char i = 0; i < 10; i++) {
expected_file_disable_dynamic_file_size1.emplace_back(
KeyStr(std::string(1, ch + i), i + 10, kTypeValue),
"val" + std::to_string(i));
}

mock::KVVector expected_file_disable_dynamic_file_size2;
for (char i = 10; i < 12; i++) {
expected_file_disable_dynamic_file_size2.emplace_back(
KeyStr(std::string(1, ch + i), i + 10, kTypeValue),
"val" + std::to_string(i));
}

expected_file_disable_dynamic_file_size2.emplace_back(
KeyStr("s", 4U, kTypeValue), "val");

SetLastSequence(22U);
const std::vector<int> input_levels = {0, 1};
auto lvl0_files = cfd_->current()->storage_info()->LevelFiles(0);
auto lvl1_files = cfd_->current()->storage_info()->LevelFiles(1);
if (enable_dyanmic_file_size) {
RunCompaction({lvl0_files, lvl1_files}, input_levels,
{expected_file1, expected_file2});
} else {
RunCompaction({lvl0_files, lvl1_files}, input_levels,
{expected_file_disable_dynamic_file_size1,
expected_file_disable_dynamic_file_size2});
}
}

INSTANTIATE_TEST_CASE_P(CompactionJobDynamicFileSizeTest,
CompactionJobDynamicFileSizeTest, testing::Bool());

class CompactionJobTimestampTest : public CompactionJobTestBase {
public:
Expand Down
Loading

0 comments on commit 9194650

Please sign in to comment.