Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add fs and tablets counters #2861

Merged
merged 3 commits into from
Jan 17, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions cloud/filestore/libs/storage/api/service.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,16 +87,19 @@ struct TEvService
const TString FileStoreId;
const ui64 TabletId;
const ui64 Generation;
const bool IsShard;
NProtoPrivate::TFileSystemConfig Config;

TRegisterLocalFileStore(
TString fileStoreId,
ui64 tablet,
ui64 generation,
bool isShard,
NProtoPrivate::TFileSystemConfig config)
: FileStoreId(std::move(fileStoreId))
, TabletId(tablet)
, Generation(generation)
, IsShard(isShard)
, Config(std::move(config))
{}
};
Expand Down
14 changes: 13 additions & 1 deletion cloud/filestore/libs/storage/service/service_actor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ void TStorageServiceActor::RegisterPages(const NActors::TActorContext& ctx)
mon->RegisterActorPage(rootPage, "service", "Service",
false, ctx.ExecutorThread.ActorSystem, SelfId());
}

}

void TStorageServiceActor::RegisterCounters(const NActors::TActorContext& ctx)
Expand All @@ -64,6 +63,18 @@ void TStorageServiceActor::RegisterCounters(const NActors::TActorContext& ctx)

CpuWait = serverCounters->GetCounter("CpuWait", false);
CpuWaitFailure = serverCounters->GetCounter("CpuWaitFailure", false);

auto serviceCounters = rootGroup->GetSubgroup("component", "service");
TotalFileSystemCount = serviceCounters->GetCounter("FileSystemCount", false);
TotalTabletCount = serviceCounters->GetCounter("TabletCount", false);

auto hddCounters = serviceCounters->GetSubgroup("type", "hdd");
HddFileSystemCount = hddCounters->GetCounter("FileSystemCount", false);
HddTabletCount = hddCounters->GetCounter("TabletCount", false);

auto ssdCounters = serviceCounters->GetSubgroup("type", "ssd");
SsdFileSystemCount = hddCounters->GetCounter("FileSystemCount", false);
SsdTabletCount = hddCounters->GetCounter("TabletCount", false);
}

void TStorageServiceActor::ScheduleUpdateStats(const NActors::TActorContext& ctx)
Expand Down Expand Up @@ -166,6 +177,7 @@ void TStorageServiceActor::HandleRegisterLocalFileStore(
msg->FileStoreId,
msg->TabletId,
msg->Generation,
msg->IsShard,
std::move(msg->Config));
}
}
Expand Down
10 changes: 10 additions & 0 deletions cloud/filestore/libs/storage/service/service_actor.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,16 @@ class TStorageServiceActor final

NMonitoring::TDynamicCounters::TCounterPtr CpuWait;
NMonitoring::TDynamicCounters::TCounterPtr CpuWaitFailure;

NMonitoring::TDynamicCounters::TCounterPtr TotalFileSystemCount;
NMonitoring::TDynamicCounters::TCounterPtr TotalTabletCount;

NMonitoring::TDynamicCounters::TCounterPtr HddFileSystemCount;
NMonitoring::TDynamicCounters::TCounterPtr HddTabletCount;

NMonitoring::TDynamicCounters::TCounterPtr SsdFileSystemCount;
NMonitoring::TDynamicCounters::TCounterPtr SsdTabletCount;

TInstant LastCpuWaitQuery;

public:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,39 @@ void TStorageServiceActor::HandleUpdateStats(
{
Y_UNUSED(ev);

if (State) {
i64 totalFileSystems = 0;
i64 totalTablets = 0;
i64 hddFileSystems = 0;
i64 ssdFileSystems = 0;
i64 hddTablets = 0;
i64 ssdTablets = 0;
for (const auto& item: State->GetLocalFileStores()) {
constexpr auto MediaSsd = NProto::EStorageMediaKind::STORAGE_MEDIA_SSD;
if (!item.second.IsShard) {
auto& counter =
item.second.Config.GetStorageMediaKind() == MediaSsd ?
ssdFileSystems:
hddFileSystems;
++counter;
++totalFileSystems;
}
auto& counter =
item.second.Config.GetStorageMediaKind() == MediaSsd ?
ssdTablets:
hddTablets;
++counter;
++totalTablets;

}
TotalFileSystemCount->Set(totalFileSystems);
TotalTabletCount->Set(totalTablets);
SsdFileSystemCount->Set(ssdFileSystems);
HddFileSystemCount->Set(hddFileSystems);
SsdTabletCount->Set(ssdTablets);
HddTabletCount->Set(hddTablets);
}

auto now = GetCycleCount();
for (auto it = InFlightRequests.begin(); it != InFlightRequests.end(); ) {
const auto& request = it->second;
Expand Down
2 changes: 2 additions & 0 deletions cloud/filestore/libs/storage/service/service_state.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ void TStorageServiceState::RegisterLocalFileStore(
const TString& id,
ui64 tablet,
ui32 generation,
bool isShard,
NProtoPrivate::TFileSystemConfig config)
{
// in case new instance registered before old unregistered or config was updated
Expand All @@ -191,6 +192,7 @@ void TStorageServiceState::RegisterLocalFileStore(
id,
tablet,
generation,
isShard,
std::move(config)));
}

Expand Down
4 changes: 4 additions & 0 deletions cloud/filestore/libs/storage/service/service_state.h
Original file line number Diff line number Diff line change
Expand Up @@ -156,17 +156,20 @@ struct TLocalFileStore
const TString FileStoreId;
const ui64 TabletId;
const ui32 Generation;
const bool IsShard;

NProtoPrivate::TFileSystemConfig Config;

TLocalFileStore(
TString id,
ui64 tablet,
ui32 generation,
bool isShard,
NProtoPrivate::TFileSystemConfig config)
: FileStoreId(std::move(id))
, TabletId(tablet)
, Generation(generation)
, IsShard(isShard)
, Config(std::move(config))
{}
};
Expand Down Expand Up @@ -221,6 +224,7 @@ class TStorageServiceState
const TString& id,
ui64 tablet,
ui32 generation,
bool isShard,
NProtoPrivate::TFileSystemConfig config);
void UnregisterLocalFileStore(
const TString& id,
Expand Down
75 changes: 75 additions & 0 deletions cloud/filestore/libs/storage/service/service_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3259,6 +3259,81 @@ Y_UNIT_TEST_SUITE(TStorageServiceTest)
CheckThreeStageWrites(NProto::STORAGE_MEDIA_SSD, true);
CheckTwoStageReads(NProto::STORAGE_MEDIA_SSD, true);
}

Y_UNIT_TEST(ShouldUpdateFileSystemAndTabletCountersOnRegisterAndUnregister)
{
TTestEnv env;
env.CreateSubDomain("nfs");

ui32 nodeIdx = env.CreateNode("nfs");

TServiceClient service(env.GetRuntime(), nodeIdx);

service.RegisterLocalFileStore(
"test",
1, // tablet id
1, // generation
false, // isShard
{});

env.GetRuntime().AdvanceCurrentTime(TDuration::Seconds(15));
env.GetRuntime().DispatchEvents({}, TDuration::Seconds(1));

auto counters = env.GetRuntime().GetAppData(nodeIdx).Counters;

auto fsCounter = counters
->FindSubgroup("counters", "filestore")
->FindSubgroup("component", "service")
->GetCounter("FileSystemCount", false);

auto hddFsCounter = counters
->FindSubgroup("counters", "filestore")
->FindSubgroup("component", "service")
->FindSubgroup("type", "hdd")
->GetCounter("FileSystemCount", false);

auto ssdFsCounter = counters
->FindSubgroup("counters", "filestore")
->FindSubgroup("component", "service")
->FindSubgroup("type", "ssd")
->GetCounter("FileSystemCount", false);

auto tabletCounter = counters
->FindSubgroup("counters", "filestore")
->FindSubgroup("component", "service")
->GetCounter("TabletCount", false);

auto hddTabletCounter = counters
->FindSubgroup("counters", "filestore")
->FindSubgroup("component", "service")
->FindSubgroup("type", "hdd")
->GetCounter("TabletCount", false);

auto ssdTabletCounter = counters
->FindSubgroup("counters", "filestore")
->FindSubgroup("component", "service")
->FindSubgroup("type", "ssd")
->GetCounter("TabletCount", false);

UNIT_ASSERT_VALUES_EQUAL(1, fsCounter->GetAtomic());
UNIT_ASSERT_VALUES_EQUAL(1, tabletCounter->GetAtomic());
UNIT_ASSERT_VALUES_EQUAL(1, hddFsCounter->GetAtomic());
UNIT_ASSERT_VALUES_EQUAL(1, hddTabletCounter->GetAtomic());
UNIT_ASSERT_VALUES_EQUAL(0, ssdFsCounter->GetAtomic());
UNIT_ASSERT_VALUES_EQUAL(0, ssdTabletCounter->GetAtomic());

service.UnregisterLocalFileStore("test", 1);

env.GetRuntime().AdvanceCurrentTime(TDuration::Seconds(15));
env.GetRuntime().DispatchEvents({}, TDuration::Seconds(1));

UNIT_ASSERT_VALUES_EQUAL(0, fsCounter->GetAtomic());
UNIT_ASSERT_VALUES_EQUAL(0, tabletCounter->GetAtomic());
UNIT_ASSERT_VALUES_EQUAL(0, hddFsCounter->GetAtomic());
UNIT_ASSERT_VALUES_EQUAL(0, hddTabletCounter->GetAtomic());
UNIT_ASSERT_VALUES_EQUAL(0, ssdFsCounter->GetAtomic());
UNIT_ASSERT_VALUES_EQUAL(0, ssdTabletCounter->GetAtomic());
}
}

} // namespace NCloud::NFileStore::NStorage
1 change: 1 addition & 0 deletions cloud/filestore/libs/storage/tablet/tablet_actor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1180,6 +1180,7 @@ void TIndexTabletActor::RegisterFileStore(const NActors::TActorContext& ctx)
GetFileSystemId(),
TabletID(),
GetGeneration(),
GetFileSystem().GetShardNo() > 0,
std::move(config));

ctx.Send(MakeStorageServiceId(), request.release());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -448,6 +448,8 @@ void TIndexTabletActor::CompleteTx_ConfigureAsShard(
args.Request.GetShardNo(),
JoinSeq(",", GetFileSystem().GetShardFileSystemIds()).c_str());

RegisterFileStore(ctx);

auto response =
std::make_unique<TEvIndexTablet::TEvConfigureAsShardResponse>();

Expand Down
48 changes: 48 additions & 0 deletions cloud/filestore/libs/storage/tablet/tablet_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,54 @@ Y_UNIT_TEST_SUITE(TIndexTabletTest)
true,
response->Record.GetStorageConfig().GetMultiTabletForwardingEnabled());
}

Y_UNIT_TEST(ShouldNotifyServiceWhenFileSystemConfigChanged)
{
TTestEnv env;
env.CreateSubDomain("nfs");

const auto nodeIdx = env.CreateNode("nfs");
const auto tabletId = env.BootIndexTablet(nodeIdx);

TIndexTabletClient tablet(env.GetRuntime(), nodeIdx, tabletId);

ui64 registerNonShardCount = 0;
ui64 registerShardCount = 0;
env.GetRuntime().SetEventFilter(
[&](auto& runtime, auto& event)
{
Y_UNUSED(runtime);
switch (event->GetTypeRewrite()) {
case TEvService::EvRegisterLocalFileStore: {
const auto* msg = event->template Get<
TEvService::TEvRegisterLocalFileStoreRequest>();
if (tabletId != msg->TabletId) {
break;
}
if (msg->IsShard) {
++registerShardCount;
} else {
++registerNonShardCount;
}
}
}
return false;
});

tablet.UpdateConfig({
.FileSystemId = "test_filesystem",
.CloudId = "test_cloud",
.FolderId = "test_folder",
});

UNIT_ASSERT_VALUES_EQUAL(1, registerNonShardCount);
UNIT_ASSERT_VALUES_EQUAL(0, registerShardCount);

tablet.ConfigureAsShard(1);

UNIT_ASSERT_VALUES_EQUAL(1, registerNonShardCount);
UNIT_ASSERT_VALUES_EQUAL(1, registerShardCount);
}
}

} // namespace NCloud::NFileStore::NStorage
30 changes: 30 additions & 0 deletions cloud/filestore/libs/storage/testlib/service_client.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,36 @@ class TServiceClient
return Runtime;
}

void RegisterLocalFileStore(
const TString& fileSystemId,
ui64 tabletId,
ui64 generation,
bool isShard,
NProtoPrivate::TFileSystemConfig config)
{
auto request =
std::make_unique<TEvService::TEvRegisterLocalFileStoreRequest>(
fileSystemId,
tabletId,
generation,
isShard,
std::move(config));
SendRequest(MakeStorageServiceId(), std::move(request));
Runtime.DispatchEvents({}, TDuration::Seconds(1));
}

void UnregisterLocalFileStore(
const TString& fileSystemId,
ui64 generation)
{
auto request =
std::make_unique<TEvService::TEvUnregisterLocalFileStoreRequest>(
fileSystemId,
generation);
SendRequest(MakeStorageServiceId(), std::move(request));
Runtime.DispatchEvents({}, TDuration::Seconds(1));
}

THeaders InitSession(
const TString& fileSystemId,
const TString& clientId,
Expand Down
Loading