From b4d83e4447bd186d83a7807560f209f58576a9e9 Mon Sep 17 00:00:00 2001 From: Andrei Strelkovskii Date: Sun, 25 Aug 2024 10:08:07 +0300 Subject: [PATCH] issue-1824: filestore tablet channel balancing by ApproximateFreeSpaceShare (#1858) --- .../partition/part_actor_monitoring.cpp | 1 + .../libs/storage/partition/part_state.cpp | 30 ++--- .../libs/storage/partition/part_state.h | 1 + .../libs/storage/partition2/part2_state.cpp | 25 +--- cloud/filestore/config/storage.proto | 5 + cloud/filestore/libs/storage/core/config.cpp | 2 + cloud/filestore/libs/storage/core/config.h | 3 + .../service/service_actor_writedata.cpp | 8 ++ .../libs/storage/service/service_ut.cpp | 7 ++ .../libs/storage/tablet/model/channels.cpp | 92 +++++++++----- .../libs/storage/tablet/model/channels.h | 12 +- .../libs/storage/tablet/model/channels_ut.cpp | 114 ++++++++++++++++-- .../libs/storage/tablet/tablet_actor.h | 3 +- .../storage/tablet/tablet_actor_adddata.cpp | 7 +- .../storage/tablet/tablet_actor_writeblob.cpp | 15 ++- .../libs/storage/tablet/tablet_state.cpp | 3 + .../libs/storage/tablet/tablet_state.h | 9 +- .../storage/tablet/tablet_state_channels.cpp | 17 +-- .../libs/storage/tablet/tablet_state_data.cpp | 5 +- .../filestore/private/api/protos/tablet.proto | 5 +- .../core/libs/tablet/model/channels.cpp | 1 + .../storage/core/libs/tablet/model/channels.h | 39 ++++++ cloud/storage/core/libs/tablet/model/ya.make | 1 + .../core/libs/viewer/tablet_monitoring.cpp | 5 + .../core/libs/viewer/tablet_monitoring.h | 1 + 25 files changed, 313 insertions(+), 98 deletions(-) create mode 100644 cloud/storage/core/libs/tablet/model/channels.cpp create mode 100644 cloud/storage/core/libs/tablet/model/channels.h diff --git a/cloud/blockstore/libs/storage/partition/part_actor_monitoring.cpp b/cloud/blockstore/libs/storage/partition/part_actor_monitoring.cpp index 50dc0d9844e..dcf4b02e689 100644 --- a/cloud/blockstore/libs/storage/partition/part_actor_monitoring.cpp +++ b/cloud/blockstore/libs/storage/partition/part_actor_monitoring.cpp @@ -124,6 +124,7 @@ void DumpChannels( TStringBuilder() << dataKind, state.CheckPermissions(c, EChannelPermission::UserWritesAllowed), state.CheckPermissions(c, EChannelPermission::SystemWritesAllowed), + state.GetFreeSpaceShare(c), }); } NCloud::NStorage::DumpChannels( diff --git a/cloud/blockstore/libs/storage/partition/part_state.cpp b/cloud/blockstore/libs/storage/partition/part_state.cpp index 632abed6c67..9c0c7c2ce5c 100644 --- a/cloud/blockstore/libs/storage/partition/part_state.cpp +++ b/cloud/blockstore/libs/storage/partition/part_state.cpp @@ -1,5 +1,7 @@ #include "part_state.h" +#include + #include #include @@ -51,19 +53,6 @@ void DumpOperationState(IOutputStream& out, const TOperationState& op) //////////////////////////////////////////////////////////////////////////////// -double Normalize(double x, double lo, double hi) -{ - if (x > hi) { - return 1; - } - - if (x < lo) { - return 0; - } - - return (x - lo) / (hi - lo); -} - double BPFeature(const TBackpressureFeatureConfig& c, double x) { auto nx = Normalize(x, c.InputThreshold, c.InputLimit); @@ -248,6 +237,12 @@ bool TPartitionState::CheckPermissions(ui32 channel, EChannelPermissions permiss return ch ? ch->Permissions.HasFlags(permissions) : true; } +double TPartitionState::GetFreeSpaceShare(ui32 channel) const +{ + const auto* ch = GetChannel(channel); + return ch ? ch->ApproximateFreeSpaceShare : 0; +} + bool TPartitionState::UpdateChannelFreeSpaceShare(ui32 channel, double share) { if (share) { @@ -319,17 +314,14 @@ bool TPartitionState::CheckChannelFreeSpaceShare(ui32 channel) const const auto& fsc = FreeSpaceConfig; const auto* ch = GetChannel(channel); - if (!ch || !ch->ApproximateFreeSpaceShare) { + if (!ch) { return true; } - const auto fss = Normalize( + return NCloud::CheckChannelFreeSpaceShare( ch->ApproximateFreeSpaceShare, fsc.ChannelMinFreeSpace, - fsc.ChannelFreeSpaceThreshold - ); - - return RandomNumber() < fss; + fsc.ChannelFreeSpaceThreshold); } bool TPartitionState::IsCompactionAllowed() const diff --git a/cloud/blockstore/libs/storage/partition/part_state.h b/cloud/blockstore/libs/storage/partition/part_state.h index 293cc2fb03a..6e3c93b25e8 100644 --- a/cloud/blockstore/libs/storage/partition/part_state.h +++ b/cloud/blockstore/libs/storage/partition/part_state.h @@ -441,6 +441,7 @@ class TPartitionState bool UpdatePermissions(ui32 channel, EChannelPermissions permissions); bool CheckPermissions(ui32 channel, EChannelPermissions permissions) const; + double GetFreeSpaceShare(ui32 channel) const; bool UpdateChannelFreeSpaceShare(ui32 channel, double share); bool CheckChannelFreeSpaceShare(ui32 channel) const; bool IsCompactionAllowed() const; diff --git a/cloud/blockstore/libs/storage/partition2/part2_state.cpp b/cloud/blockstore/libs/storage/partition2/part2_state.cpp index 3baac29bfb4..f7723dfdd61 100644 --- a/cloud/blockstore/libs/storage/partition2/part2_state.cpp +++ b/cloud/blockstore/libs/storage/partition2/part2_state.cpp @@ -8,6 +8,7 @@ #include #include +#include #include #include @@ -38,19 +39,6 @@ TJsonValue ToJson(const TOperationState& op) //////////////////////////////////////////////////////////////////////////////// -double Normalize(double x, double lo, double hi) -{ - if (x > hi) { - return 1; - } - - if (x < lo) { - return 0; - } - - return (x - lo) / (hi - lo); -} - double BPFeature(const TBackpressureFeatureConfig& c, double x) { auto nx = Normalize(x, c.InputThreshold, c.InputLimit); @@ -375,19 +363,14 @@ bool TPartitionState::CheckChannelFreeSpaceShare(ui32 channel) const const auto& fsc = FreeSpaceConfig; const auto* ch = GetChannel(channel); - if (!ch || !ch->ApproximateFreeSpaceShare) { + if (!ch) { return true; } - // fss will be something like O(exp(-t)), where t is time - // so fss(t) > 0 for any t and lim(fss) = 0 as t approaches +inf - const auto fss = Normalize( + return NCloud::CheckChannelFreeSpaceShare( ch->ApproximateFreeSpaceShare, fsc.ChannelMinFreeSpace, - fsc.ChannelFreeSpaceThreshold - ); - - return RandomNumber() < fss; + fsc.ChannelFreeSpaceThreshold); } bool TPartitionState::IsCompactionAllowed() const diff --git a/cloud/filestore/config/storage.proto b/cloud/filestore/config/storage.proto index 5a0742002e7..2ba6617410e 100644 --- a/cloud/filestore/config/storage.proto +++ b/cloud/filestore/config/storage.proto @@ -341,4 +341,9 @@ message TStorageConfig repeated TFilestoreAliasEntry Entries = 1; } optional TFilestoreAliases FilestoreAliases = 368; + + // Channel free space threshold - used for write request balancing. + optional uint32 ChannelFreeSpaceThreshold = 369; + // Channel min free space - used for write request balancing. + optional uint32 ChannelMinFreeSpace = 370; } diff --git a/cloud/filestore/libs/storage/core/config.cpp b/cloud/filestore/libs/storage/core/config.cpp index 46ceeacaf3c..9d7b38947d8 100644 --- a/cloud/filestore/libs/storage/core/config.cpp +++ b/cloud/filestore/libs/storage/core/config.cpp @@ -181,6 +181,8 @@ using TAliases = NProto::TStorageConfig::TFilestoreAliases; xxx(BlobCompressionCodec, TString, "lz4" )\ \ xxx(MaxZeroCompactionRangesToDeletePerTx, ui32, 10000 )\ + xxx(ChannelFreeSpaceThreshold, ui32, 25 )\ + xxx(ChannelMinFreeSpace, ui32, 10 )\ // FILESTORE_STORAGE_CONFIG #define FILESTORE_STORAGE_CONFIG_REF(xxx) \ diff --git a/cloud/filestore/libs/storage/core/config.h b/cloud/filestore/libs/storage/core/config.h index 5d32c6b35bf..5de0e232386 100644 --- a/cloud/filestore/libs/storage/core/config.h +++ b/cloud/filestore/libs/storage/core/config.h @@ -242,6 +242,9 @@ class TStorageConfig const NProto::TStorageConfig::TFilestoreAliases& GetFilestoreAliases() const; const TString* FindFileSystemIdByAlias(const TString& alias) const; + + ui32 GetChannelFreeSpaceThreshold() const; + ui32 GetChannelMinFreeSpace() const; }; } // namespace NCloud::NFileStore::NStorage diff --git a/cloud/filestore/libs/storage/service/service_actor_writedata.cpp b/cloud/filestore/libs/storage/service/service_actor_writedata.cpp index 63de1db163b..6d6cf54f65b 100644 --- a/cloud/filestore/libs/storage/service/service_actor_writedata.cpp +++ b/cloud/filestore/libs/storage/service/service_actor_writedata.cpp @@ -51,6 +51,7 @@ class TWriteDataActor final: public TActorBootstrapped TMaybe InFlightRequest; TVector> InFlightBSRequests; TVector StorageStatusFlags; + TVector ApproximateFreeSpaceShares; const NCloud::NProto::EStorageMediaKind MediaKind; public: @@ -175,6 +176,7 @@ class TWriteDataActor final: public TActorBootstrapped RequestInfo->CallContext->RequestType = EFileStoreRequest::WriteBlob; InFlightBSRequests.reserve(RemainingBlobsToWrite); StorageStatusFlags.resize(GenerateBlobIdsResponse.BlobsSize()); + ApproximateFreeSpaceShares.resize(GenerateBlobIdsResponse.BlobsSize()); for (const auto& blob: GenerateBlobIdsResponse.GetBlobs()) { NKikimr::TLogoBlobID blobId = LogoBlobIDFromLogoBlobID(blob.GetBlobId()); @@ -259,6 +261,7 @@ class TWriteDataActor final: public TActorBootstrapped !InFlightBSRequests[blobIdx]->IsCompleted()); InFlightBSRequests[blobIdx]->Complete(ctx.Now(), {}); StorageStatusFlags[blobIdx] = msg->StatusFlags.Raw; + ApproximateFreeSpaceShares[blobIdx] = msg->ApproximateFreeSpaceShare; --RemainingBlobsToWrite; if (RemainingBlobsToWrite == 0) { @@ -287,6 +290,11 @@ class TWriteDataActor final: public TActorBootstrapped for (const auto flags: StorageStatusFlags) { request->Record.AddStorageStatusFlags(flags); } + request->Record.MutableApproximateFreeSpaceShares()->Reserve( + ApproximateFreeSpaceShares.size()); + for (const auto share: ApproximateFreeSpaceShares) { + request->Record.AddApproximateFreeSpaceShares(share); + } if (Range.Offset < BlobRange.Offset) { auto& unalignedHead = *request->Record.AddUnalignedDataRanges(); diff --git a/cloud/filestore/libs/storage/service/service_ut.cpp b/cloud/filestore/libs/storage/service/service_ut.cpp index 773d29638ae..e04ecdfc00d 100644 --- a/cloud/filestore/libs/storage/service/service_ut.cpp +++ b/cloud/filestore/libs/storage/service/service_ut.cpp @@ -2724,6 +2724,7 @@ Y_UNIT_TEST_SUITE(TStorageServiceTest) NProtoPrivate::TAddDataRequest addData; using TFlags = NKikimr::TStorageStatusFlags; + const float freeSpaceShare = 0.22; env.GetRuntime().SetEventFilter( [&](auto& runtime, auto& event) { @@ -2735,6 +2736,8 @@ Y_UNIT_TEST_SUITE(TStorageServiceTest) event->template Get(); const_cast(msg->StatusFlags).Raw |= ui32(yellowFlag); + const_cast(msg->ApproximateFreeSpaceShare) = + freeSpaceShare; break; } @@ -2753,6 +2756,10 @@ Y_UNIT_TEST_SUITE(TStorageServiceTest) UNIT_ASSERT_VALUES_EQUAL(1, addData.StorageStatusFlagsSize()); UNIT_ASSERT(NKikimr::TStorageStatusFlags( addData.GetStorageStatusFlags(0)).Check(yellowFlag)); + UNIT_ASSERT_VALUES_EQUAL(1, addData.ApproximateFreeSpaceSharesSize()); + UNIT_ASSERT_VALUES_EQUAL( + freeSpaceShare, + addData.GetApproximateFreeSpaceShares(0)); } void ConfigureFollowers( diff --git a/cloud/filestore/libs/storage/tablet/model/channels.cpp b/cloud/filestore/libs/storage/tablet/model/channels.cpp index 6ef94d1301b..5c09a0c84d6 100644 --- a/cloud/filestore/libs/storage/tablet/model/channels.cpp +++ b/cloud/filestore/libs/storage/tablet/model/channels.cpp @@ -1,5 +1,7 @@ #include "channels.h" +#include + #include #include @@ -18,6 +20,7 @@ struct TChannelMeta TString PoolKind; bool Writable = true; bool ToMove = false; + double FreeSpaceShare = 0; TChannelMeta() = default; @@ -35,17 +38,35 @@ struct TChannelRegistry TVector ChannelMetas; ui32 ChannelIndex = 0; - const TChannelMeta* SelectChannel() + const TChannelMeta* SelectChannel( + double minFreeSpace, + double freeSpaceThreshold) { + const TChannelMeta* bestMeta = nullptr; + double bestSpaceShare = 0; for (ui32 i = 0; i < ChannelMetas.size(); ++i) { const auto* meta = ChannelMetas[ChannelIndex % ChannelMetas.size()]; ++ChannelIndex; - if (meta->Writable) { + if (!meta->Writable) { + continue; + } + + const bool ok = CheckChannelFreeSpaceShare( + meta->FreeSpaceShare, + minFreeSpace, + freeSpaceThreshold); + + if (ok) { return meta; } + + if (meta->FreeSpaceShare > bestSpaceShare) { + bestMeta = meta; + bestSpaceShare = meta->FreeSpaceShare; + } } - return nullptr; + return bestMeta; } TVector GetChannels() const @@ -75,9 +96,15 @@ struct TChannels::TImpl TChannelsByDataKind ByDataKind; void AddChannel(ui32 channel, EChannelDataKind dataKind, TString poolKind); - void RegisterUnwritableChannel(ui32 channel); - void RegisterChannelToMove(ui32 channel); - TMaybe SelectChannel(EChannelDataKind dataKind); + void UpdateChannelStats( + ui32 channel, + bool writable, + bool toMove, + double freeSpaceShare); + TMaybe SelectChannel( + EChannelDataKind dataKind, + double minFreeSpace, + double freeSpaceThreshold); TVector GetChannels(EChannelDataKind dataKind) const; TVector GetUnwritableChannels() const; @@ -107,18 +134,20 @@ void TChannels::TImpl::AddChannel( byDataKind.ChannelMetas.push_back(&AllChannels.back()); } -void TChannels::TImpl::RegisterUnwritableChannel(ui32 channel) -{ - Y_ABORT_UNLESS(channel < AllChannels.size()); - - AllChannels[channel].Writable = false; -} - -void TChannels::TImpl::RegisterChannelToMove(ui32 channel) +void TChannels::TImpl::UpdateChannelStats( + ui32 channel, + bool writable, + bool toMove, + double freeSpaceShare) { Y_ABORT_UNLESS(channel < AllChannels.size()); - AllChannels[channel].ToMove = true; + AllChannels[channel].Writable = writable; + AllChannels[channel].ToMove = toMove; + // a value which is exactly 0 is equivalent to "no data" + if (freeSpaceShare != 0.) { + AllChannels[channel].FreeSpaceShare = freeSpaceShare; + } } TVector TChannels::TImpl::GetChannels(EChannelDataKind dataKind) const @@ -139,7 +168,8 @@ TVector TChannels::TImpl::GetUnwritableChannels() const return result; } -TVector TChannels::TImpl::GetChannelsToMove(ui32 percentageThreshold) const +TVector TChannels::TImpl::GetChannelsToMove( + ui32 percentageThreshold) const { TVector result; @@ -169,6 +199,7 @@ TChannels::TImpl::MakeChannelMonInfos() const TStringBuilder() << meta.DataKind, meta.Writable, meta.Writable, // TODO: SystemWritable + meta.FreeSpaceShare, }); } @@ -188,10 +219,15 @@ TChannelsStats TChannels::TImpl::CalculateChannelsStats() const return stats; } -TMaybe TChannels::TImpl::SelectChannel(EChannelDataKind dataKind) +TMaybe TChannels::TImpl::SelectChannel( + EChannelDataKind dataKind, + double minFreeSpace, + double freeSpaceThreshold) { auto& byDataKind = ByDataKind[static_cast(dataKind)]; - if (const auto* meta = byDataKind.SelectChannel()) { + const auto* meta = + byDataKind.SelectChannel(minFreeSpace, freeSpaceThreshold); + if (meta) { return meta->Channel; } @@ -226,14 +262,13 @@ void TChannels::AddChannel( GetImpl().AddChannel(channel, dataKind, std::move(poolKind)); } -void TChannels::RegisterUnwritableChannel(ui32 channel) -{ - GetImpl().RegisterUnwritableChannel(channel); -} - -void TChannels::RegisterChannelToMove(ui32 channel) +void TChannels::UpdateChannelStats( + ui32 channel, + bool writable, + bool toMove, + double freeSpaceShare) { - GetImpl().RegisterChannelToMove(channel); + GetImpl().UpdateChannelStats(channel, writable, toMove, freeSpaceShare); } TVector TChannels::GetChannels(EChannelDataKind dataKind) const @@ -261,9 +296,12 @@ TChannelsStats TChannels::CalculateChannelsStats() const return GetImpl().CalculateChannelsStats(); } -TMaybe TChannels::SelectChannel(EChannelDataKind dataKind) +TMaybe TChannels::SelectChannel( + EChannelDataKind dataKind, + double minFreeSpace, + double freeSpaceThreshold) { - return GetImpl().SelectChannel(dataKind); + return GetImpl().SelectChannel(dataKind, minFreeSpace, freeSpaceThreshold); } ui32 TChannels::Size() const diff --git a/cloud/filestore/libs/storage/tablet/model/channels.h b/cloud/filestore/libs/storage/tablet/model/channels.h index b1db2c5ad5b..a18dc0dfef7 100644 --- a/cloud/filestore/libs/storage/tablet/model/channels.h +++ b/cloud/filestore/libs/storage/tablet/model/channels.h @@ -34,9 +34,15 @@ class TChannels ~TChannels(); void AddChannel(ui32 channel, EChannelDataKind dataKind, TString poolKind); - void RegisterUnwritableChannel(ui32 channel); - void RegisterChannelToMove(ui32 channel); - TMaybe SelectChannel(EChannelDataKind dataKind); + void UpdateChannelStats( + ui32 channel, + bool writable, + bool toMove, + double freeSpaceShare); + TMaybe SelectChannel( + EChannelDataKind dataKind, + double minFreeSpace, + double freeSpaceThreshold); TVector GetChannels(EChannelDataKind dataKind) const; TVector GetUnwritableChannels() const; diff --git a/cloud/filestore/libs/storage/tablet/model/channels_ut.cpp b/cloud/filestore/libs/storage/tablet/model/channels_ut.cpp index bf071654670..1f52c899542 100644 --- a/cloud/filestore/libs/storage/tablet/model/channels_ut.cpp +++ b/cloud/filestore/libs/storage/tablet/model/channels_ut.cpp @@ -10,16 +10,27 @@ namespace { //////////////////////////////////////////////////////////////////////////////// +const double freeSpaceThreshold = 0.25; +const double minFreeSpace = 0.10; + +//////////////////////////////////////////////////////////////////////////////// + #define CHECK_SELECTED_CHANNEL(dataKind, expected) \ UNIT_ASSERT_VALUES_EQUAL( \ expected, \ - *channels.SelectChannel(EChannelDataKind::dataKind)); \ + *channels.SelectChannel( \ + EChannelDataKind::dataKind, \ + minFreeSpace, \ + freeSpaceThreshold)); \ // CHECK_SELECTED_CHANNEL #define CHECK_SELECTED_CHANNEL_EMPTY(dataKind) \ UNIT_ASSERT_VALUES_EQUAL( \ false, \ - channels.SelectChannel(EChannelDataKind::dataKind).Defined()); \ + channels.SelectChannel( \ + EChannelDataKind::dataKind, \ + minFreeSpace, \ + freeSpaceThreshold).Defined()); \ // CHECK_SELECTED_CHANNEL_EMPTY //////////////////////////////////////////////////////////////////////////////// @@ -62,9 +73,9 @@ Y_UNIT_TEST_SUITE(TChannelsTest) channels.GetUnwritableChannels() ); - channels.RegisterUnwritableChannel(1); - channels.RegisterUnwritableChannel(3); - channels.RegisterUnwritableChannel(4); + channels.UpdateChannelStats(1, false, false, 0); + channels.UpdateChannelStats(3, false, false, 0); + channels.UpdateChannelStats(4, false, false, 0); CHECK_SELECTED_CHANNEL_EMPTY(Index); CHECK_SELECTED_CHANNEL(Fresh, 2); @@ -78,8 +89,8 @@ Y_UNIT_TEST_SUITE(TChannelsTest) ); // check idempotency - channels.RegisterUnwritableChannel(6); - channels.RegisterUnwritableChannel(6); + channels.UpdateChannelStats(6, false, false, 0); + channels.UpdateChannelStats(6, false, false, 0); CHECK_SELECTED_CHANNEL_EMPTY(Index); CHECK_SELECTED_CHANNEL(Fresh, 2); @@ -91,8 +102,8 @@ Y_UNIT_TEST_SUITE(TChannelsTest) channels.GetUnwritableChannels() ); - channels.RegisterUnwritableChannel(2); - channels.RegisterUnwritableChannel(5); + channels.UpdateChannelStats(2, false, false, 0); + channels.UpdateChannelStats(5, false, false, 0); CHECK_SELECTED_CHANNEL_EMPTY(Index); CHECK_SELECTED_CHANNEL_EMPTY(Fresh); @@ -114,19 +125,100 @@ Y_UNIT_TEST_SUITE(TChannelsTest) channels.GetChannelsToMove(t)); for (ui32 c = 10; c < 19; ++c) { - channels.RegisterChannelToMove(c); + channels.UpdateChannelStats(c, true, true, 0); } ASSERT_VECTORS_EQUAL( TVector{}, channels.GetChannelsToMove(t)); - channels.RegisterChannelToMove(19); + channels.UpdateChannelStats(19, true, true, 0); ASSERT_VECTORS_EQUAL( TVector({10, 11, 12, 13, 14, 15, 16, 17, 18, 19}), channels.GetChannelsToMove(t)); } + + Y_UNIT_TEST(ShouldBalanceChannelsBasedOnFreeSpace) + { + const ui32 channelCount = 11; + TChannels channels = SetupChannels(channelCount); + channels.UpdateChannelStats(3, true, false, 0); + channels.UpdateChannelStats(4, false, false, 0); + channels.UpdateChannelStats(5, false, false, 0.9); + channels.UpdateChannelStats(6, true, false, 0.175); + channels.UpdateChannelStats(7, true, false, 0.05); + channels.UpdateChannelStats(8, true, false, 0.1); + channels.UpdateChannelStats(9, true, false, 0.25); + channels.UpdateChannelStats(10, true, false, 0.5); + + TVector counts(channelCount); + const ui32 iters = 10000; + for (ui32 i = 0; i < iters; ++i) { + auto selected = channels.SelectChannel( + EChannelDataKind::Mixed, + minFreeSpace, + freeSpaceThreshold); + UNIT_ASSERT(selected.Defined()); + ++counts[*selected]; + } + + // Writable == false + UNIT_ASSERT_VALUES_EQUAL(0, counts[4]); + UNIT_ASSERT_VALUES_EQUAL(0, counts[5]); + + // freeSpace <= minFreeSpace + UNIT_ASSERT_VALUES_EQUAL(0, counts[7]); + UNIT_ASSERT_VALUES_EQUAL(0, counts[8]); + + // 3, 6, 9, 10 channels remain + // channel 6 should be selected 2 times less often then the other ones + const auto halfShare = iters / 7.; + const auto otherShare = 2 * halfShare; + UNIT_ASSERT_DOUBLES_EQUAL(otherShare, counts[3], halfShare * 0.2); + UNIT_ASSERT_DOUBLES_EQUAL(halfShare, counts[6], halfShare * 0.2); + UNIT_ASSERT_DOUBLES_EQUAL(otherShare, counts[9], halfShare * 0.2); + UNIT_ASSERT_DOUBLES_EQUAL(otherShare, counts[10], halfShare * 0.2); + } + + Y_UNIT_TEST(ShouldSelectChannelWithLargestFreeSpaceShareIfAllChannelsAreAlmostFull) + { + const ui32 channelCount = 11; + TChannels channels = SetupChannels(channelCount); + channels.UpdateChannelStats(3, true, false, 0.01); + channels.UpdateChannelStats(4, false, false, 0); + channels.UpdateChannelStats(5, false, false, 0.9); + channels.UpdateChannelStats(6, true, false, 0.075); + channels.UpdateChannelStats(7, true, false, 0.05); + channels.UpdateChannelStats(8, true, false, 0.01); + channels.UpdateChannelStats(9, true, false, 0.025); + channels.UpdateChannelStats(10, true, false, 0.05); + + TVector counts(channelCount); + const ui32 iters = 1000; + for (ui32 i = 0; i < iters; ++i) { + auto selected = channels.SelectChannel( + EChannelDataKind::Mixed, + minFreeSpace, + freeSpaceThreshold); + UNIT_ASSERT(selected.Defined()); + ++counts[*selected]; + } + + // Writable == false + UNIT_ASSERT_VALUES_EQUAL(0, counts[4]); + UNIT_ASSERT_VALUES_EQUAL(0, counts[5]); + + // best free space share + UNIT_ASSERT_VALUES_EQUAL(iters, counts[6]); + + // other channels + UNIT_ASSERT_VALUES_EQUAL(0, counts[3]); + UNIT_ASSERT_VALUES_EQUAL(0, counts[7]); + UNIT_ASSERT_VALUES_EQUAL(0, counts[8]); + UNIT_ASSERT_VALUES_EQUAL(0, counts[9]); + UNIT_ASSERT_VALUES_EQUAL(0, counts[10]); + } } } // namespace NCloud::NFileStore::NStorage diff --git a/cloud/filestore/libs/storage/tablet/tablet_actor.h b/cloud/filestore/libs/storage/tablet/tablet_actor.h index fcea252950a..d12340dc83a 100644 --- a/cloud/filestore/libs/storage/tablet/tablet_actor.h +++ b/cloud/filestore/libs/storage/tablet/tablet_actor.h @@ -289,7 +289,8 @@ class TIndexTabletActor final const NActors::TActorContext& ctx, ui32 generation, ui32 channel, - const NKikimr::TStorageStatusFlags flags); + const NKikimr::TStorageStatusFlags flags, + double freeSpaceShare); void ReassignDataChannelsIfNeeded(const NActors::TActorContext& ctx); bool OnRenderAppHtmlPage( NActors::NMon::TEvRemoteHttpInfo::TPtr ev, diff --git a/cloud/filestore/libs/storage/tablet/tablet_actor_adddata.cpp b/cloud/filestore/libs/storage/tablet/tablet_actor_adddata.cpp index 68fae69945a..6d60f566f9e 100644 --- a/cloud/filestore/libs/storage/tablet/tablet_actor_adddata.cpp +++ b/cloud/filestore/libs/storage/tablet/tablet_actor_adddata.cpp @@ -441,11 +441,16 @@ void TIndexTabletActor::HandleAddData( const auto evPutResultCount = Min(blobIds.size(), msg->Record.StorageStatusFlagsSize()); for (ui32 i = 0; i < evPutResultCount; ++i) { + const double approximateFreeSpaceShare = + i < msg->Record.ApproximateFreeSpaceSharesSize() + ? msg->Record.GetApproximateFreeSpaceShares(i) + : 0; RegisterEvPutResult( ctx, blobIds[i].Generation(), blobIds[i].Channel(), - msg->Record.GetStorageStatusFlags(i)); + msg->Record.GetStorageStatusFlags(i), + approximateFreeSpaceShare); } AddTransaction(*requestInfo); diff --git a/cloud/filestore/libs/storage/tablet/tablet_actor_writeblob.cpp b/cloud/filestore/libs/storage/tablet/tablet_actor_writeblob.cpp index 99ad40d8920..c0675d1667a 100644 --- a/cloud/filestore/libs/storage/tablet/tablet_actor_writeblob.cpp +++ b/cloud/filestore/libs/storage/tablet/tablet_actor_writeblob.cpp @@ -364,12 +364,16 @@ void TIndexTabletActor::RegisterEvPutResult( const TActorContext& ctx, ui32 generation, ui32 channel, - const NKikimr::TStorageStatusFlags flags) + const NKikimr::TStorageStatusFlags flags, + double freeSpaceShare) { const auto validFlag = NKikimrBlobStorage::EStatusFlags::StatusIsValid; if (flags.Check(validFlag)) { ui32 group = Info()->GroupFor(channel, generation); + bool writable = true; + bool toMove = false; + if (flags.Check(NKikimrBlobStorage::StatusDiskSpaceLightYellowMove)) { LOG_WARN(ctx, TFileStoreComponents::TABLET, "%s Yellow move flag received for channel %u and group %u", @@ -377,7 +381,7 @@ void TIndexTabletActor::RegisterEvPutResult( channel, group); - RegisterChannelToMove(channel); + toMove = true; } if (flags.Check(NKikimrBlobStorage::StatusDiskSpaceYellowStop)) { LOG_WARN(ctx, TFileStoreComponents::TABLET, @@ -386,9 +390,11 @@ void TIndexTabletActor::RegisterEvPutResult( channel, group); - RegisterUnwritableChannel(channel); + writable = false; } + UpdateChannelStats(channel, writable, toMove, freeSpaceShare); + ReassignDataChannelsIfNeeded(ctx); } } @@ -413,7 +419,8 @@ void TIndexTabletActor::HandleWriteBlobCompleted( ctx, result.BlobId.Generation(), result.BlobId.Channel(), - result.StorageStatusFlags); + result.StorageStatusFlags, + result.ApproximateFreeSpaceShare); } if (FAILED(msg->GetStatus())) { diff --git a/cloud/filestore/libs/storage/tablet/tablet_state.cpp b/cloud/filestore/libs/storage/tablet/tablet_state.cpp index 9bfb430d0cf..167aaa937e7 100644 --- a/cloud/filestore/libs/storage/tablet/tablet_state.cpp +++ b/cloud/filestore/libs/storage/tablet/tablet_state.cpp @@ -93,6 +93,9 @@ void TIndexTabletState::LoadState( TruncateBlocksThreshold = config.GetMaxBlocksPerTruncateTx(); SessionHistoryEntryCount = config.GetSessionHistoryEntryCount(); + ChannelMinFreeSpace = config.GetChannelMinFreeSpace() / 100.; + ChannelFreeSpaceThreshold = config.GetChannelFreeSpaceThreshold() / 100.; + FileSystem.CopyFrom(fileSystem); FileSystemStats.CopyFrom(fileSystemStats); TabletStorageInfo.CopyFrom(tabletStorageInfo); diff --git a/cloud/filestore/libs/storage/tablet/tablet_state.h b/cloud/filestore/libs/storage/tablet/tablet_state.h index e4b46c05e87..691226a1b8e 100644 --- a/cloud/filestore/libs/storage/tablet/tablet_state.h +++ b/cloud/filestore/libs/storage/tablet/tablet_state.h @@ -143,6 +143,8 @@ class TIndexTabletState /*const*/ ui32 TruncateBlocksThreshold = 0; /*const*/ ui32 SessionHistoryEntryCount = 0; + /*const*/ double ChannelMinFreeSpace = 0; + /*const*/ double ChannelFreeSpaceThreshold = 1; bool StateLoaded = false; @@ -316,8 +318,11 @@ FILESTORE_FILESYSTEM_STATS(FILESTORE_DECLARE_COUNTER) TChannelsStats CalculateChannelsStats() const; - void RegisterUnwritableChannel(ui32 channel); - void RegisterChannelToMove(ui32 channel); + void UpdateChannelStats( + ui32 channel, + bool writable, + bool toMove, + double freeSpaceShare); private: void LoadChannels(); diff --git a/cloud/filestore/libs/storage/tablet/tablet_state_channels.cpp b/cloud/filestore/libs/storage/tablet/tablet_state_channels.cpp index d1ccedcdb62..269306ba524 100644 --- a/cloud/filestore/libs/storage/tablet/tablet_state_channels.cpp +++ b/cloud/filestore/libs/storage/tablet/tablet_state_channels.cpp @@ -14,14 +14,17 @@ ui64 TIndexTabletState::GetConfigChannelCount() const return FileSystem.ExplicitChannelProfilesSize(); } -void TIndexTabletState::RegisterUnwritableChannel(ui32 channel) +void TIndexTabletState::UpdateChannelStats( + ui32 channel, + bool writable, + bool toMove, + double freeSpaceShare) { - Impl->Channels.RegisterUnwritableChannel(channel); -} - -void TIndexTabletState::RegisterChannelToMove(ui32 channel) -{ - Impl->Channels.RegisterChannelToMove(channel); + Impl->Channels.UpdateChannelStats( + channel, + writable, + toMove, + freeSpaceShare); } TVector TIndexTabletState::GetChannels(EChannelDataKind kind) const diff --git a/cloud/filestore/libs/storage/tablet/tablet_state_data.cpp b/cloud/filestore/libs/storage/tablet/tablet_state_data.cpp index 309a566c0a1..1c9d45c2097 100644 --- a/cloud/filestore/libs/storage/tablet/tablet_state_data.cpp +++ b/cloud/filestore/libs/storage/tablet/tablet_state_data.cpp @@ -55,7 +55,10 @@ bool TIndexTabletState::GenerateBlobId( { auto [gen, step] = ParseCommitId(commitId); - const auto channel = Impl->Channels.SelectChannel(EChannelDataKind::Mixed); + const auto channel = Impl->Channels.SelectChannel( + EChannelDataKind::Mixed, + ChannelMinFreeSpace, + ChannelFreeSpaceThreshold); if (!channel) { return false; } diff --git a/cloud/filestore/private/api/protos/tablet.proto b/cloud/filestore/private/api/protos/tablet.proto index a97986a26a4..2339aafd2b0 100644 --- a/cloud/filestore/private/api/protos/tablet.proto +++ b/cloud/filestore/private/api/protos/tablet.proto @@ -453,8 +453,11 @@ message TAddDataRequest // Unaligned data parts - supposed to contain unaligned head and tail. repeated TFreshDataRange UnalignedDataRanges = 9; - // StorageStatusFlags for the written blobs. + // StorageStatusFlags for the written blobs' channels. repeated uint32 StorageStatusFlags = 10; + + // ApproximateFreeSpace shares for the written blobs' channels. + repeated double ApproximateFreeSpaceShares = 11; } message TAddDataResponse diff --git a/cloud/storage/core/libs/tablet/model/channels.cpp b/cloud/storage/core/libs/tablet/model/channels.cpp new file mode 100644 index 00000000000..079d47d2672 --- /dev/null +++ b/cloud/storage/core/libs/tablet/model/channels.cpp @@ -0,0 +1 @@ +#include "channels.h" diff --git a/cloud/storage/core/libs/tablet/model/channels.h b/cloud/storage/core/libs/tablet/model/channels.h new file mode 100644 index 00000000000..95580209123 --- /dev/null +++ b/cloud/storage/core/libs/tablet/model/channels.h @@ -0,0 +1,39 @@ +#pragma once + +#include + +namespace NCloud { + +//////////////////////////////////////////////////////////////////////////////// + +inline double Normalize(double x, double lo, double hi) +{ + if (x > hi) { + return 1; + } + + if (x < lo) { + return 0; + } + + return (x - lo) / (hi - lo); +} + +inline bool CheckChannelFreeSpaceShare( + double freeSpaceShare, + double minFreeSpace, + double freeSpaceThreshold) +{ + if (freeSpaceShare == 0.) { + return true; + } + + const auto fss = Normalize( + freeSpaceShare, + minFreeSpace, + freeSpaceThreshold); + + return RandomNumber() < fss; +} + +} // namespace NCloud diff --git a/cloud/storage/core/libs/tablet/model/ya.make b/cloud/storage/core/libs/tablet/model/ya.make index 7485127232b..8a74995ea3b 100644 --- a/cloud/storage/core/libs/tablet/model/ya.make +++ b/cloud/storage/core/libs/tablet/model/ya.make @@ -3,6 +3,7 @@ LIBRARY() INCLUDE(${ARCADIA_ROOT}/cloud/storage/deny_ydb_dependency.inc) SRCS( + channels.cpp commit.cpp partial_blob_id.cpp ) diff --git a/cloud/storage/core/libs/viewer/tablet_monitoring.cpp b/cloud/storage/core/libs/viewer/tablet_monitoring.cpp index a3218e07bbe..08e7cd5e95e 100644 --- a/cloud/storage/core/libs/viewer/tablet_monitoring.cpp +++ b/cloud/storage/core/libs/viewer/tablet_monitoring.cpp @@ -59,6 +59,11 @@ void DumpChannel( TStringBuilder() << "background-color: " << color) { out << label; + const auto freePercentage = + static_cast(channelInfo.FreeSpaceShare * 100); + if (freePercentage) { + out << " free=" << freePercentage << "%"; + } } } TABLED() { diff --git a/cloud/storage/core/libs/viewer/tablet_monitoring.h b/cloud/storage/core/libs/viewer/tablet_monitoring.h index 052e0ddaade..04d8cd934d0 100644 --- a/cloud/storage/core/libs/viewer/tablet_monitoring.h +++ b/cloud/storage/core/libs/viewer/tablet_monitoring.h @@ -15,6 +15,7 @@ struct TChannelMonInfo TString DataKind; bool Writable = false; bool SystemWritable = false; + double FreeSpaceShare = 0; }; using TGetMonitoringYDBGroupUrl = std::function