Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

issue-1444: Use kernel delay accounting to calculate cpu wait #1630

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions cloud/blockstore/config/diagnostics.proto
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ package NCloud.NBlockStore.NProto;

option go_package = "github.com/ydb-platform/nbs/cloud/blockstore/config";

import "cloud/storage/core/protos/diagnostics.proto";
import "cloud/storage/core/protos/trace.proto";

////////////////////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -216,4 +217,7 @@ message TDiagnosticsConfig

// Performance measurements coefficients for local HDD disks.
optional TVolumePerfSettings LocalHDDPerfSettings = 51;

// Type of fetching CPU stats
optional NCloud.NProto.EStatsFetcherType StatsFetcherType = 52;
}
6 changes: 3 additions & 3 deletions cloud/blockstore/libs/daemon/common/bootstrap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@
#include <cloud/storage/core/libs/common/timer.h>
#include <cloud/storage/core/libs/coroutine/executor.h>
#include <cloud/storage/core/libs/daemon/mlock.h>
#include <cloud/storage/core/libs/diagnostics/cgroup_stats_fetcher.h>
#include <cloud/storage/core/libs/diagnostics/stats_fetcher.h>
#include <cloud/storage/core/libs/diagnostics/critical_events.h>
#include <cloud/storage/core/libs/diagnostics/logging.h>
#include <cloud/storage/core/libs/diagnostics/monitoring.h>
Expand Down Expand Up @@ -861,7 +861,7 @@ void TBootstrapBase::Start()
START_KIKIMR_COMPONENT(NotifyService);
START_COMMON_COMPONENT(Monitoring);
START_COMMON_COMPONENT(ProfileLog);
START_KIKIMR_COMPONENT(CgroupStatsFetcher);
START_KIKIMR_COMPONENT(StatsFetcher);
START_COMMON_COMPONENT(DiscoveryService);
START_COMMON_COMPONENT(TraceProcessor);
START_KIKIMR_COMPONENT(TraceSerializer);
Expand Down Expand Up @@ -967,7 +967,7 @@ void TBootstrapBase::Stop()
STOP_KIKIMR_COMPONENT(TraceSerializer);
STOP_COMMON_COMPONENT(TraceProcessor);
STOP_COMMON_COMPONENT(DiscoveryService);
STOP_KIKIMR_COMPONENT(CgroupStatsFetcher);
STOP_KIKIMR_COMPONENT(StatsFetcher);
STOP_COMMON_COMPONENT(ProfileLog);
STOP_COMMON_COMPONENT(Monitoring);
STOP_KIKIMR_COMPONENT(LogbrokerService);
Expand Down
2 changes: 1 addition & 1 deletion cloud/blockstore/libs/daemon/common/bootstrap.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ class TBootstrapBase
virtual IStartable* GetTraceSerializer() = 0;
virtual IStartable* GetLogbrokerService() = 0;
virtual IStartable* GetNotifyService() = 0;
virtual IStartable* GetCgroupStatsFetcher() = 0;
virtual IStartable* GetStatsFetcher() = 0;
virtual IStartable* GetIamTokenClient() = 0;
virtual IStartable* GetComputeClient() = 0;
virtual IStartable* GetKmsClient() = 0;
Expand Down
2 changes: 1 addition & 1 deletion cloud/blockstore/libs/daemon/local/bootstrap.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ class TBootstrapLocal final
IStartable* GetTraceSerializer() override { return nullptr; }
IStartable* GetLogbrokerService() override { return nullptr; }
IStartable* GetNotifyService() override { return nullptr; }
IStartable* GetCgroupStatsFetcher() override { return nullptr; }
IStartable* GetStatsFetcher() override { return nullptr; }
IStartable* GetIamTokenClient() override { return nullptr; }
IStartable* GetComputeClient() override { return nullptr; }
IStartable* GetKmsClient() override { return nullptr; }
Expand Down
12 changes: 6 additions & 6 deletions cloud/blockstore/libs/daemon/ydb/bootstrap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
#include <cloud/storage/core/libs/common/proto_helpers.h>
#include <cloud/storage/core/libs/common/task_queue.h>
#include <cloud/storage/core/libs/common/thread_pool.h>
#include <cloud/storage/core/libs/diagnostics/cgroup_stats_fetcher.h>
#include <cloud/storage/core/libs/diagnostics/stats_fetcher.h>
#include <cloud/storage/core/libs/diagnostics/trace_serializer.h>
#include <cloud/storage/core/libs/iam/iface/client.h>
#include <cloud/storage/core/libs/iam/iface/config.h>
Expand Down Expand Up @@ -131,7 +131,7 @@ IStartable* TBootstrapYdb::GetYdbStorage() { return YdbStorage.get(); }
IStartable* TBootstrapYdb::GetTraceSerializer() { return TraceSerializer.get(); }
IStartable* TBootstrapYdb::GetLogbrokerService() { return LogbrokerService.get(); }
IStartable* TBootstrapYdb::GetNotifyService() { return NotifyService.get(); }
IStartable* TBootstrapYdb::GetCgroupStatsFetcher() { return CgroupStatsFetcher.get(); }
IStartable* TBootstrapYdb::GetStatsFetcher() { return StatsFetcher.get(); }
IStartable* TBootstrapYdb::GetIamTokenClient() { return IamTokenClient.get(); }
IStartable* TBootstrapYdb::GetComputeClient() { return ComputeClient.get(); }
IStartable* TBootstrapYdb::GetKmsClient() { return KmsClient.get(); }
Expand Down Expand Up @@ -499,11 +499,11 @@ void TBootstrapYdb::InitKikimrService()

STORAGE_INFO("ProfileLog initialized");

CgroupStatsFetcher = BuildCgroupStatsFetcher(
StatsFetcher = NCloud::NStorage::BuildStatsFetcher(
Configs->DiagnosticsConfig->GetStatsFetcherType(),
Configs->DiagnosticsConfig->GetCpuWaitFilename(),
Log,
logging,
"BLOCKSTORE_CGROUPS");
logging);

if (Configs->StorageConfig->GetBlockDigestsEnabled()) {
if (Configs->StorageConfig->GetUseTestBlockDigestGenerator()) {
Expand Down Expand Up @@ -553,7 +553,7 @@ void TBootstrapYdb::InitKikimrService()
args.LogbrokerService = LogbrokerService;
args.NotifyService = NotifyService;
args.VolumeStats = VolumeStats;
args.CgroupStatsFetcher = CgroupStatsFetcher;
args.StatsFetcher = StatsFetcher;
args.RdmaServer = nullptr;
args.RdmaClient = RdmaClient;
args.Logging = logging;
Expand Down
4 changes: 2 additions & 2 deletions cloud/blockstore/libs/daemon/ydb/bootstrap.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ struct TBootstrapYdb final
ITraceSerializerPtr TraceSerializer;
NLogbroker::IServicePtr LogbrokerService;
NNotify::IServicePtr NotifyService;
NCloud::NStorage::ICgroupStatsFetcherPtr CgroupStatsFetcher;
NCloud::NStorage::IStatsFetcherPtr StatsFetcher;
NIamClient::IIamTokenClientPtr IamTokenClient;
IComputeClientPtr ComputeClient;
IKmsClientPtr KmsClient;
Expand Down Expand Up @@ -115,7 +115,7 @@ struct TBootstrapYdb final
IStartable* GetTraceSerializer() override;
IStartable* GetLogbrokerService() override;
IStartable* GetNotifyService() override;
IStartable* GetCgroupStatsFetcher() override;
IStartable* GetStatsFetcher() override;
IStartable* GetIamTokenClient() override;
IStartable* GetComputeClient() override;
IStartable* GetKmsClient() override;
Expand Down
10 changes: 10 additions & 0 deletions cloud/blockstore/libs/diagnostics/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ namespace {
xxx(LocalHDDDowntimeThreshold, TDuration, TDuration::Seconds(15) )\
xxx(ReportHistogramAsMultipleCounters, bool, true )\
xxx(ReportHistogramAsSingleCounter, bool, false )\
xxx(StatsFetcherType, NCloud::NProto::EStatsFetcherType, NCloud::NProto::EStatsFetcherType::CGROUP )\
// BLOCKSTORE_DIAGNOSTICS_CONFIG

#define BLOCKSTORE_DIAGNOSTICS_DECLARE_CONFIG(name, type, value) \
Expand Down Expand Up @@ -307,3 +308,12 @@ void Out<NCloud::TRequestThresholds>(
{
OutRequestThresholds(out, value);
}

template <>
void Out<NCloud::NProto::EStatsFetcherType>(
IOutputStream& out,
NCloud::NProto::EStatsFetcherType statsFetcherType)
{
out << NCloud::NProto::EStatsFetcherType_Name(
statsFetcherType);
}
2 changes: 2 additions & 0 deletions cloud/blockstore/libs/diagnostics/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,8 @@ class TDiagnosticsConfig
TRequestThresholds GetRequestThresholds() const;
EHistogramCounterOptions GetHistogramCounterOptions() const;

NCloud::NProto::EStatsFetcherType GetStatsFetcherType() const;

void Dump(IOutputStream& out) const;
void DumpHtml(IOutputStream& out) const;
};
Expand Down
2 changes: 1 addition & 1 deletion cloud/blockstore/libs/storage/init/server/actorsystem.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,7 @@ class TStorageServicesInitializer final
auto volumeBalancerService = CreateVolumeBalancerActor(
Args.StorageConfig,
Args.VolumeStats,
Args.CgroupStatsFetcher,
Args.StatsFetcher,
Args.VolumeBalancerSwitch,
MakeStorageServiceId());

Expand Down
2 changes: 1 addition & 1 deletion cloud/blockstore/libs/storage/init/server/actorsystem.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ struct TServerActorSystemArgs
IVolumeStatsPtr VolumeStats;
NRdma::IServerPtr RdmaServer;
NRdma::IClientPtr RdmaClient;
NCloud::NStorage::ICgroupStatsFetcherPtr CgroupStatsFetcher;
NCloud::NStorage::IStatsFetcherPtr StatsFetcher;
TManuallyPreemptedVolumesPtr PreemptedVolumes;
NNvme::INvmeManagerPtr NvmeManager;
IVolumeBalancerSwitchPtr VolumeBalancerSwitch;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,14 @@ using namespace NActors;
IActorPtr CreateVolumeBalancerActor(
TStorageConfigPtr storageConfig,
IVolumeStatsPtr volumeStats,
NCloud::NStorage::ICgroupStatsFetcherPtr cgroupStatFetcher,
NCloud::NStorage::IStatsFetcherPtr statFetcher,
IVolumeBalancerSwitchPtr volumeBalancerSwitch,
NActors::TActorId serviceActorId)
{
return std::make_unique<TVolumeBalancerActor>(
std::move(storageConfig),
std::move(volumeStats),
std::move(cgroupStatFetcher),
std::move(statFetcher),
std::move(volumeBalancerSwitch),
serviceActorId);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ namespace NCloud::NBlockStore::NStorage {
NActors::IActorPtr CreateVolumeBalancerActor(
TStorageConfigPtr storageConfig,
IVolumeStatsPtr volumeStats,
NCloud::NStorage::ICgroupStatsFetcherPtr cgroupStatFetcher,
NCloud::NStorage::IStatsFetcherPtr cgroupStatFetcher,
IVolumeBalancerSwitchPtr volumeBalancerSwitch,
NActors::TActorId serviceActorId);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
#include <cloud/blockstore/libs/storage/core/config.h>
#include <cloud/blockstore/libs/storage/core/proto_helpers.h>

#include <cloud/storage/core/libs/diagnostics/cgroup_stats_fetcher.h>
#include <cloud/storage/core/libs/diagnostics/stats_fetcher.h>

#include <contrib/ydb/library/actors/core/actor_bootstrapped.h>

Expand Down Expand Up @@ -140,12 +140,12 @@ STFUNC(TRemoteVolumeStatActor::StateWork)
TVolumeBalancerActor::TVolumeBalancerActor(
TStorageConfigPtr storageConfig,
IVolumeStatsPtr volumeStats,
NCloud::NStorage::ICgroupStatsFetcherPtr cgroupStatsFetcher,
NCloud::NStorage::IStatsFetcherPtr statsFetcher,
IVolumeBalancerSwitchPtr volumeBalancerSwitch,
TActorId serviceActorId)
: StorageConfig(std::move(storageConfig))
, VolumeStats(std::move(volumeStats))
, CgroupStatsFetcher(std::move(cgroupStatsFetcher))
, StatsFetcher(std::move(statsFetcher))
, VolumeBalancerSwitch(std::move(volumeBalancerSwitch))
, ServiceActorId(serviceActorId)
, State(std::make_unique<TVolumeBalancerState>(StorageConfig))
Expand Down Expand Up @@ -246,7 +246,7 @@ void TVolumeBalancerActor::HandleGetVolumeStatsResponse(
auto now = ctx.Now();

auto interval = (now - LastCpuWaitQuery).MicroSeconds();
auto [cpuWait, error] = CgroupStatsFetcher->GetCpuWait();
auto [cpuWait, error] = StatsFetcher->GetCpuWait();
if (HasError(error)) {
*CpuWaitFailure = 1;
LOG_TRACE_S(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ class TVolumeBalancerActor final
private:
const TStorageConfigPtr StorageConfig;
const IVolumeStatsPtr VolumeStats;
const NCloud::NStorage::ICgroupStatsFetcherPtr CgroupStatsFetcher;
const NCloud::NStorage::IStatsFetcherPtr StatsFetcher;
const IVolumeBalancerSwitchPtr VolumeBalancerSwitch;
const NActors::TActorId ServiceActorId;

Expand All @@ -50,7 +50,7 @@ class TVolumeBalancerActor final
TVolumeBalancerActor(
TStorageConfigPtr storageConfig,
IVolumeStatsPtr volumeStats,
NCloud::NStorage::ICgroupStatsFetcherPtr cgroupStatsFetcher,
NCloud::NStorage::IStatsFetcherPtr statsFetcher,
IVolumeBalancerSwitchPtr volumeBalancerSwitch,
NActors::TActorId serviceActorId);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#include <cloud/blockstore/libs/storage/core/public.h>
#include <cloud/blockstore/libs/storage/volume_balancer/volume_balancer.h>

#include <cloud/storage/core/libs/diagnostics/cgroup_stats_fetcher.h>
#include <cloud/storage/core/libs/diagnostics/stats_fetcher.h>
#include <cloud/storage/core/libs/features/features_config.h>

#include <library/cpp/testing/unittest/registar.h>
Expand Down Expand Up @@ -194,7 +194,7 @@ struct TVolumeStatsTestMock final

////////////////////////////////////////////////////////////////////////////////

struct TCgroupStatsFetcherMock: public NCloud::NStorage::ICgroupStatsFetcher
struct TStatsFetcherMock: public NCloud::NStorage::IStatsFetcher
{
TResultOrError<TDuration> Value = TDuration::Zero();

Expand Down Expand Up @@ -230,14 +230,14 @@ class TVolumeBalancerTestEnv

public:
std::shared_ptr<TVolumeStatsTestMock> VolumeStats;
std::shared_ptr<TCgroupStatsFetcherMock> Fetcher;
std::shared_ptr<TStatsFetcherMock> Fetcher;

public:
TVolumeBalancerTestEnv()
{
Sender = TestEnv.GetRuntime().AllocateEdgeActor();
VolumeStats = std::make_shared<TVolumeStatsTestMock>();
Fetcher = std::make_shared<TCgroupStatsFetcherMock>();
Fetcher = std::make_shared<TStatsFetcherMock>();
}

TActorId GetEdgeActor() const
Expand Down Expand Up @@ -411,7 +411,7 @@ NFeatures::TFeaturesConfigPtr CreateFeatureConfig(
IActorPtr CreateVolumeBalancerActor(
TVolumeBalancerConfigBuilder& config,
IVolumeStatsPtr volumeStats,
NCloud::NStorage::ICgroupStatsFetcherPtr cgroupStatsFetcher,
NCloud::NStorage::IStatsFetcherPtr statsFetcher,
TActorId serviceActorId)
{
NProto::TStorageServiceConfig storageConfig = config.Build();
Expand All @@ -425,7 +425,7 @@ IActorPtr CreateVolumeBalancerActor(
CreateFeatureConfig("Balancer", {})
),
std::move(volumeStats),
std::move(cgroupStatsFetcher),
std::move(statsFetcher),
std::move(volumeBalancerSwitch),
std::move(serviceActorId));
}
Expand Down
4 changes: 4 additions & 0 deletions cloud/filestore/config/diagnostics.proto
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ package NCloud.NFileStore.NProto;

option go_package = "github.com/ydb-platform/nbs/cloud/filestore/config";

import "cloud/storage/core/protos/diagnostics.proto";
import "cloud/storage/core/protos/trace.proto";

////////////////////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -137,4 +138,7 @@ message TDiagnosticsConfig

// Performance profile for SSD filesystems.
optional TFileSystemPerformanceProfile SSDFileSystemPerformanceProfile = 27;

// Type of fetching CPU stats
optional NCloud.NProto.EStatsFetcherType StatsFetcherType = 28;
}
16 changes: 8 additions & 8 deletions cloud/filestore/libs/daemon/common/bootstrap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
#include <cloud/storage/core/libs/common/thread_pool.h>
#include <cloud/storage/core/libs/common/timer.h>
#include <cloud/storage/core/libs/daemon/mlock.h>
#include <cloud/storage/core/libs/diagnostics/cgroup_stats_fetcher.h>
#include <cloud/storage/core/libs/diagnostics/stats_fetcher.h>
#include <cloud/storage/core/libs/diagnostics/critical_events.h>
#include <cloud/storage/core/libs/diagnostics/logging.h>
#include <cloud/storage/core/libs/diagnostics/monitoring.h>
Expand Down Expand Up @@ -90,7 +90,7 @@ void TBootstrapCommon::Start()
FILESTORE_LOG_START_COMPONENT(BackgroundThreadPool);
FILESTORE_LOG_START_COMPONENT(ProfileLog);
FILESTORE_LOG_START_COMPONENT(RequestStatsUpdater);
FILESTORE_LOG_START_COMPONENT(CgroupStatsFetcher);
FILESTORE_LOG_START_COMPONENT(StatsFetcher);

StartComponents();

Expand Down Expand Up @@ -119,7 +119,7 @@ void TBootstrapCommon::Stop()

StopComponents();

FILESTORE_LOG_STOP_COMPONENT(CgroupStatsFetcher);
FILESTORE_LOG_STOP_COMPONENT(StatsFetcher);
FILESTORE_LOG_STOP_COMPONENT(RequestStatsUpdater);
FILESTORE_LOG_STOP_COMPONENT(ProfileLog);
FILESTORE_LOG_STOP_COMPONENT(BackgroundThreadPool);
Expand Down Expand Up @@ -271,16 +271,16 @@ void TBootstrapCommon::InitActorSystem()
STORAGE_INFO("TraceSerializer initialized");

auto cpuWaitFilename = Configs->DiagnosticsConfig->GetCpuWaitFilename();
CgroupStatsFetcher = BuildCgroupStatsFetcher(
StatsFetcher = NCloud::NStorage::BuildStatsFetcher(
Configs->DiagnosticsConfig->GetStatsFetcherType(),
cpuWaitFilename.empty()
? NCloud::NStorage::BuildCpuWaitStatsFilename(
Configs->DiagnosticsConfig->GetCpuWaitServiceName())
: std::move(cpuWaitFilename),
Log,
logging,
"FILESTORE_CGROUPS");
logging);

STORAGE_INFO("CgroupStatsFetcher initialized");
STORAGE_INFO("StatsFetcher initialized");

NStorage::TActorSystemArgs args;
args.NodeId = nodeId;
Expand All @@ -292,7 +292,7 @@ void TBootstrapCommon::InitActorSystem()
args.DiagnosticsConfig = Configs->DiagnosticsConfig;
args.Metrics = Metrics;
args.UserCounters = UserCounters;
args.CgroupStatsFetcher = CgroupStatsFetcher;
args.StatsFetcher = StatsFetcher;
args.ModuleFactories = ModuleFactories;

ActorSystem = NStorage::CreateActorSystem(args);
Expand Down
2 changes: 1 addition & 1 deletion cloud/filestore/libs/daemon/common/bootstrap.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ class TBootstrapCommon
ITaskQueuePtr BackgroundThreadPool;
IProfileLogPtr ProfileLog;
IActorSystemPtr ActorSystem;
NCloud::NStorage::ICgroupStatsFetcherPtr CgroupStatsFetcher;
NCloud::NStorage::IStatsFetcherPtr StatsFetcher;

public:
TBootstrapCommon(
Expand Down
Loading
Loading