Skip to content

Commit

Permalink
add fs and tablets counters (#2861)
Browse files Browse the repository at this point in the history
* initial commit

* rebase

* update
  • Loading branch information
yegorskii authored Jan 17, 2025
1 parent 04b9add commit 11fa969
Show file tree
Hide file tree
Showing 11 changed files with 221 additions and 1 deletion.
3 changes: 3 additions & 0 deletions cloud/filestore/libs/storage/api/service.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,16 +87,19 @@ struct TEvService
const TString FileStoreId;
const ui64 TabletId;
const ui64 Generation;
const bool IsShard;
NProtoPrivate::TFileSystemConfig Config;

TRegisterLocalFileStore(
TString fileStoreId,
ui64 tablet,
ui64 generation,
bool isShard,
NProtoPrivate::TFileSystemConfig config)
: FileStoreId(std::move(fileStoreId))
, TabletId(tablet)
, Generation(generation)
, IsShard(isShard)
, Config(std::move(config))
{}
};
Expand Down
14 changes: 13 additions & 1 deletion cloud/filestore/libs/storage/service/service_actor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ void TStorageServiceActor::RegisterPages(const NActors::TActorContext& ctx)
mon->RegisterActorPage(rootPage, "service", "Service",
false, ctx.ExecutorThread.ActorSystem, SelfId());
}

}

void TStorageServiceActor::RegisterCounters(const NActors::TActorContext& ctx)
Expand All @@ -64,6 +63,18 @@ void TStorageServiceActor::RegisterCounters(const NActors::TActorContext& ctx)

CpuWait = serverCounters->GetCounter("CpuWait", false);
CpuWaitFailure = serverCounters->GetCounter("CpuWaitFailure", false);

auto serviceCounters = rootGroup->GetSubgroup("component", "service");
TotalFileSystemCount = serviceCounters->GetCounter("FileSystemCount", false);
TotalTabletCount = serviceCounters->GetCounter("TabletCount", false);

auto hddCounters = serviceCounters->GetSubgroup("type", "hdd");
HddFileSystemCount = hddCounters->GetCounter("FileSystemCount", false);
HddTabletCount = hddCounters->GetCounter("TabletCount", false);

auto ssdCounters = serviceCounters->GetSubgroup("type", "ssd");
SsdFileSystemCount = hddCounters->GetCounter("FileSystemCount", false);
SsdTabletCount = hddCounters->GetCounter("TabletCount", false);
}

void TStorageServiceActor::ScheduleUpdateStats(const NActors::TActorContext& ctx)
Expand Down Expand Up @@ -166,6 +177,7 @@ void TStorageServiceActor::HandleRegisterLocalFileStore(
msg->FileStoreId,
msg->TabletId,
msg->Generation,
msg->IsShard,
std::move(msg->Config));
}
}
Expand Down
10 changes: 10 additions & 0 deletions cloud/filestore/libs/storage/service/service_actor.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,16 @@ class TStorageServiceActor final

NMonitoring::TDynamicCounters::TCounterPtr CpuWait;
NMonitoring::TDynamicCounters::TCounterPtr CpuWaitFailure;

NMonitoring::TDynamicCounters::TCounterPtr TotalFileSystemCount;
NMonitoring::TDynamicCounters::TCounterPtr TotalTabletCount;

NMonitoring::TDynamicCounters::TCounterPtr HddFileSystemCount;
NMonitoring::TDynamicCounters::TCounterPtr HddTabletCount;

NMonitoring::TDynamicCounters::TCounterPtr SsdFileSystemCount;
NMonitoring::TDynamicCounters::TCounterPtr SsdTabletCount;

TInstant LastCpuWaitQuery;

public:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,39 @@ void TStorageServiceActor::HandleUpdateStats(
{
Y_UNUSED(ev);

if (State) {
i64 totalFileSystems = 0;
i64 totalTablets = 0;
i64 hddFileSystems = 0;
i64 ssdFileSystems = 0;
i64 hddTablets = 0;
i64 ssdTablets = 0;
for (const auto& item: State->GetLocalFileStores()) {
constexpr auto MediaSsd = NProto::EStorageMediaKind::STORAGE_MEDIA_SSD;
if (!item.second.IsShard) {
auto& counter =
item.second.Config.GetStorageMediaKind() == MediaSsd ?
ssdFileSystems:
hddFileSystems;
++counter;
++totalFileSystems;
}
auto& counter =
item.second.Config.GetStorageMediaKind() == MediaSsd ?
ssdTablets:
hddTablets;
++counter;
++totalTablets;

}
TotalFileSystemCount->Set(totalFileSystems);
TotalTabletCount->Set(totalTablets);
SsdFileSystemCount->Set(ssdFileSystems);
HddFileSystemCount->Set(hddFileSystems);
SsdTabletCount->Set(ssdTablets);
HddTabletCount->Set(hddTablets);
}

auto now = GetCycleCount();
for (auto it = InFlightRequests.begin(); it != InFlightRequests.end(); ) {
const auto& request = it->second;
Expand Down
2 changes: 2 additions & 0 deletions cloud/filestore/libs/storage/service/service_state.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ void TStorageServiceState::RegisterLocalFileStore(
const TString& id,
ui64 tablet,
ui32 generation,
bool isShard,
NProtoPrivate::TFileSystemConfig config)
{
// in case new instance registered before old unregistered or config was updated
Expand All @@ -191,6 +192,7 @@ void TStorageServiceState::RegisterLocalFileStore(
id,
tablet,
generation,
isShard,
std::move(config)));
}

Expand Down
4 changes: 4 additions & 0 deletions cloud/filestore/libs/storage/service/service_state.h
Original file line number Diff line number Diff line change
Expand Up @@ -156,17 +156,20 @@ struct TLocalFileStore
const TString FileStoreId;
const ui64 TabletId;
const ui32 Generation;
const bool IsShard;

NProtoPrivate::TFileSystemConfig Config;

TLocalFileStore(
TString id,
ui64 tablet,
ui32 generation,
bool isShard,
NProtoPrivate::TFileSystemConfig config)
: FileStoreId(std::move(id))
, TabletId(tablet)
, Generation(generation)
, IsShard(isShard)
, Config(std::move(config))
{}
};
Expand Down Expand Up @@ -221,6 +224,7 @@ class TStorageServiceState
const TString& id,
ui64 tablet,
ui32 generation,
bool isShard,
NProtoPrivate::TFileSystemConfig config);
void UnregisterLocalFileStore(
const TString& id,
Expand Down
75 changes: 75 additions & 0 deletions cloud/filestore/libs/storage/service/service_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3259,6 +3259,81 @@ Y_UNIT_TEST_SUITE(TStorageServiceTest)
CheckThreeStageWrites(NProto::STORAGE_MEDIA_SSD, true);
CheckTwoStageReads(NProto::STORAGE_MEDIA_SSD, true);
}

Y_UNIT_TEST(ShouldUpdateFileSystemAndTabletCountersOnRegisterAndUnregister)
{
TTestEnv env;
env.CreateSubDomain("nfs");

ui32 nodeIdx = env.CreateNode("nfs");

TServiceClient service(env.GetRuntime(), nodeIdx);

service.RegisterLocalFileStore(
"test",
1, // tablet id
1, // generation
false, // isShard
{});

env.GetRuntime().AdvanceCurrentTime(TDuration::Seconds(15));
env.GetRuntime().DispatchEvents({}, TDuration::Seconds(1));

auto counters = env.GetRuntime().GetAppData(nodeIdx).Counters;

auto fsCounter = counters
->FindSubgroup("counters", "filestore")
->FindSubgroup("component", "service")
->GetCounter("FileSystemCount", false);

auto hddFsCounter = counters
->FindSubgroup("counters", "filestore")
->FindSubgroup("component", "service")
->FindSubgroup("type", "hdd")
->GetCounter("FileSystemCount", false);

auto ssdFsCounter = counters
->FindSubgroup("counters", "filestore")
->FindSubgroup("component", "service")
->FindSubgroup("type", "ssd")
->GetCounter("FileSystemCount", false);

auto tabletCounter = counters
->FindSubgroup("counters", "filestore")
->FindSubgroup("component", "service")
->GetCounter("TabletCount", false);

auto hddTabletCounter = counters
->FindSubgroup("counters", "filestore")
->FindSubgroup("component", "service")
->FindSubgroup("type", "hdd")
->GetCounter("TabletCount", false);

auto ssdTabletCounter = counters
->FindSubgroup("counters", "filestore")
->FindSubgroup("component", "service")
->FindSubgroup("type", "ssd")
->GetCounter("TabletCount", false);

UNIT_ASSERT_VALUES_EQUAL(1, fsCounter->GetAtomic());
UNIT_ASSERT_VALUES_EQUAL(1, tabletCounter->GetAtomic());
UNIT_ASSERT_VALUES_EQUAL(1, hddFsCounter->GetAtomic());
UNIT_ASSERT_VALUES_EQUAL(1, hddTabletCounter->GetAtomic());
UNIT_ASSERT_VALUES_EQUAL(0, ssdFsCounter->GetAtomic());
UNIT_ASSERT_VALUES_EQUAL(0, ssdTabletCounter->GetAtomic());

service.UnregisterLocalFileStore("test", 1);

env.GetRuntime().AdvanceCurrentTime(TDuration::Seconds(15));
env.GetRuntime().DispatchEvents({}, TDuration::Seconds(1));

UNIT_ASSERT_VALUES_EQUAL(0, fsCounter->GetAtomic());
UNIT_ASSERT_VALUES_EQUAL(0, tabletCounter->GetAtomic());
UNIT_ASSERT_VALUES_EQUAL(0, hddFsCounter->GetAtomic());
UNIT_ASSERT_VALUES_EQUAL(0, hddTabletCounter->GetAtomic());
UNIT_ASSERT_VALUES_EQUAL(0, ssdFsCounter->GetAtomic());
UNIT_ASSERT_VALUES_EQUAL(0, ssdTabletCounter->GetAtomic());
}
}

} // namespace NCloud::NFileStore::NStorage
1 change: 1 addition & 0 deletions cloud/filestore/libs/storage/tablet/tablet_actor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1180,6 +1180,7 @@ void TIndexTabletActor::RegisterFileStore(const NActors::TActorContext& ctx)
GetFileSystemId(),
TabletID(),
GetGeneration(),
GetFileSystem().GetShardNo() > 0,
std::move(config));

ctx.Send(MakeStorageServiceId(), request.release());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -448,6 +448,8 @@ void TIndexTabletActor::CompleteTx_ConfigureAsShard(
args.Request.GetShardNo(),
JoinSeq(",", GetFileSystem().GetShardFileSystemIds()).c_str());

RegisterFileStore(ctx);

auto response =
std::make_unique<TEvIndexTablet::TEvConfigureAsShardResponse>();

Expand Down
48 changes: 48 additions & 0 deletions cloud/filestore/libs/storage/tablet/tablet_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,54 @@ Y_UNIT_TEST_SUITE(TIndexTabletTest)
true,
response->Record.GetStorageConfig().GetMultiTabletForwardingEnabled());
}

Y_UNIT_TEST(ShouldNotifyServiceWhenFileSystemConfigChanged)
{
TTestEnv env;
env.CreateSubDomain("nfs");

const auto nodeIdx = env.CreateNode("nfs");
const auto tabletId = env.BootIndexTablet(nodeIdx);

TIndexTabletClient tablet(env.GetRuntime(), nodeIdx, tabletId);

ui64 registerNonShardCount = 0;
ui64 registerShardCount = 0;
env.GetRuntime().SetEventFilter(
[&](auto& runtime, auto& event)
{
Y_UNUSED(runtime);
switch (event->GetTypeRewrite()) {
case TEvService::EvRegisterLocalFileStore: {
const auto* msg = event->template Get<
TEvService::TEvRegisterLocalFileStoreRequest>();
if (tabletId != msg->TabletId) {
break;
}
if (msg->IsShard) {
++registerShardCount;
} else {
++registerNonShardCount;
}
}
}
return false;
});

tablet.UpdateConfig({
.FileSystemId = "test_filesystem",
.CloudId = "test_cloud",
.FolderId = "test_folder",
});

UNIT_ASSERT_VALUES_EQUAL(1, registerNonShardCount);
UNIT_ASSERT_VALUES_EQUAL(0, registerShardCount);

tablet.ConfigureAsShard(1);

UNIT_ASSERT_VALUES_EQUAL(1, registerNonShardCount);
UNIT_ASSERT_VALUES_EQUAL(1, registerShardCount);
}
}

} // namespace NCloud::NFileStore::NStorage
30 changes: 30 additions & 0 deletions cloud/filestore/libs/storage/testlib/service_client.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,36 @@ class TServiceClient
return Runtime;
}

void RegisterLocalFileStore(
const TString& fileSystemId,
ui64 tabletId,
ui64 generation,
bool isShard,
NProtoPrivate::TFileSystemConfig config)
{
auto request =
std::make_unique<TEvService::TEvRegisterLocalFileStoreRequest>(
fileSystemId,
tabletId,
generation,
isShard,
std::move(config));
SendRequest(MakeStorageServiceId(), std::move(request));
Runtime.DispatchEvents({}, TDuration::Seconds(1));
}

void UnregisterLocalFileStore(
const TString& fileSystemId,
ui64 generation)
{
auto request =
std::make_unique<TEvService::TEvUnregisterLocalFileStoreRequest>(
fileSystemId,
generation);
SendRequest(MakeStorageServiceId(), std::move(request));
Runtime.DispatchEvents({}, TDuration::Seconds(1));
}

THeaders InitSession(
const TString& fileSystemId,
const TString& clientId,
Expand Down

0 comments on commit 11fa969

Please sign in to comment.