Skip to content

Commit

Permalink
Explicit control of SelfManagement enabling in distconf
Browse files Browse the repository at this point in the history
  • Loading branch information
alexvru committed Jan 15, 2025
1 parent 2555770 commit 3282fe4
Show file tree
Hide file tree
Showing 22 changed files with 135 additions and 79 deletions.
3 changes: 2 additions & 1 deletion ydb/core/blobstorage/base/blobstorage_events.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@
namespace NKikimr {

TEvNodeWardenStorageConfig::TEvNodeWardenStorageConfig(const NKikimrBlobStorage::TStorageConfig& config,
const NKikimrBlobStorage::TStorageConfig *proposedConfig)
const NKikimrBlobStorage::TStorageConfig *proposedConfig, bool selfManagementEnabled)
: Config(std::make_unique<NKikimrBlobStorage::TStorageConfig>(config))
, ProposedConfig(proposedConfig
? std::make_unique<NKikimrBlobStorage::TStorageConfig>(*proposedConfig)
: nullptr)
, SelfManagementEnabled(selfManagementEnabled)
{}

TEvNodeWardenStorageConfig::~TEvNodeWardenStorageConfig()
Expand Down
3 changes: 2 additions & 1 deletion ydb/core/blobstorage/base/blobstorage_events.h
Original file line number Diff line number Diff line change
Expand Up @@ -575,9 +575,10 @@ namespace NKikimr {
{
std::unique_ptr<NKikimrBlobStorage::TStorageConfig> Config;
std::unique_ptr<NKikimrBlobStorage::TStorageConfig> ProposedConfig;
bool SelfManagementEnabled;

TEvNodeWardenStorageConfig(const NKikimrBlobStorage::TStorageConfig& config,
const NKikimrBlobStorage::TStorageConfig *proposedConfig);
const NKikimrBlobStorage::TStorageConfig *proposedConfig, bool selfManagementEnabled);
~TEvNodeWardenStorageConfig();
};

Expand Down
22 changes: 14 additions & 8 deletions ydb/core/blobstorage/nodewarden/distconf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,13 @@ namespace NKikimr::NStorage {

// generate initial drive set and query stored configuration
if (IsSelfStatic) {
EnumerateConfigDrives(InitialConfig, SelfId().NodeId(), [&](const auto& /*node*/, const auto& drive) {
DrivesToRead.push_back(drive.GetPath());
});
std::sort(DrivesToRead.begin(), DrivesToRead.end());
if (BaseConfig.GetSelfManagementConfig().GetEnabled()) {
// read this only if it is possibly enabled
EnumerateConfigDrives(InitialConfig, SelfId().NodeId(), [&](const auto& /*node*/, const auto& drive) {
DrivesToRead.push_back(drive.GetPath());
});
std::sort(DrivesToRead.begin(), DrivesToRead.end());
}
ReadConfig();
} else {
StorageConfigLoaded = true;
Expand Down Expand Up @@ -91,6 +94,10 @@ namespace NKikimr::NStorage {
}
}

SelfManagementEnabled = (!IsSelfStatic || BaseConfig.GetSelfManagementConfig().GetEnabled()) &&
config.GetSelfManagementConfig().GetEnabled() &&
config.GetGeneration();

StorageConfig.emplace(config);
if (ProposedStorageConfig && ProposedStorageConfig->GetGeneration() <= StorageConfig->GetGeneration()) {
ProposedStorageConfig.reset();
Expand Down Expand Up @@ -292,14 +299,13 @@ namespace NKikimr::NStorage {
void TDistributedConfigKeeper::ReportStorageConfigToNodeWarden(ui64 cookie) {
Y_ABORT_UNLESS(StorageConfig);
const TActorId wardenId = MakeBlobStorageNodeWardenID(SelfId().NodeId());
const bool distconfEnabled = StorageConfig->GetSelfManagementConfig().GetEnabled();
const NKikimrBlobStorage::TStorageConfig *config = distconfEnabled
const NKikimrBlobStorage::TStorageConfig *config = SelfManagementEnabled
? &StorageConfig.value()
: &BaseConfig;
const NKikimrBlobStorage::TStorageConfig *proposedConfig = ProposedStorageConfig && distconfEnabled
const NKikimrBlobStorage::TStorageConfig *proposedConfig = ProposedStorageConfig && SelfManagementEnabled
? &ProposedStorageConfig.value()
: nullptr;
auto ev = std::make_unique<TEvNodeWardenStorageConfig>(*config, proposedConfig);
auto ev = std::make_unique<TEvNodeWardenStorageConfig>(*config, proposedConfig, SelfManagementEnabled);
Send(wardenId, ev.release(), 0, cookie);
}

Expand Down
1 change: 1 addition & 0 deletions ydb/core/blobstorage/nodewarden/distconf.h
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@ namespace NKikimr::NStorage {

const bool IsSelfStatic = false;
TIntrusivePtr<TNodeWardenConfig> Cfg;
bool SelfManagementEnabled = false;

// currently active storage config
std::optional<NKikimrBlobStorage::TStorageConfig> StorageConfig;
Expand Down
6 changes: 2 additions & 4 deletions ydb/core/blobstorage/nodewarden/distconf_console.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,8 @@ namespace NKikimr::NStorage {
return; // this is not the root node
} else if (enablingDistconf) {
// NO RETURN HERE -> right now we are enabling distconf, so we can skip rest of the checks
} else if (!StorageConfig || !StorageConfig->GetSelfManagementConfig().GetEnabled()) {
return; // no self-management config enabled
} else if (!StorageConfig->HasStateStorageConfig()) {
return; // no way to find Console too
} else if (!SelfManagementEnabled || !StorageConfig->HasStateStorageConfig()) {
return; // no self-management config enabled or no way to find Console (no statestorage configured yet)
}

STLOG(PRI_DEBUG, BS_NODE, NWDC66, "ConnectToConsole: creating pipe to the Console");
Expand Down
2 changes: 1 addition & 1 deletion ydb/core/blobstorage/nodewarden/distconf_dynamic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ namespace NKikimr::NStorage {
ev->Record.SetNoQuorum(true);
} else if (!StorageConfig) {
// no storage configuration -- no nothing
} else if (auto *target = record.MutableConfig(); StorageConfig->GetSelfManagementConfig().GetEnabled()) {
} else if (auto *target = record.MutableConfig(); SelfManagementEnabled) {
target->CopyFrom(*StorageConfig);
} else {
target->CopyFrom(BaseConfig);
Expand Down
16 changes: 8 additions & 8 deletions ydb/core/blobstorage/nodewarden/distconf_invoke.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -311,10 +311,10 @@ namespace NKikimr::NStorage {
const auto& record = Event->Get()->Record;
const auto& cmd = record.GetReassignGroupDisk();

if (Scepter.expired()) {
return FinishWithError(TResult::ERROR, "scepter lost during query execution");
} else if (!RunCommonChecks()) {
if (!RunCommonChecks()) {
return;
} else if (!Self->SelfManagementEnabled) {
return FinishWithError(TResult::ERROR, "self-management is not enabled");
}

STLOG(PRI_DEBUG, BS_NODE, NWDC75, "ReassignGroupDiskExecute", (SelfId, SelfId()));
Expand Down Expand Up @@ -383,9 +383,6 @@ namespace NKikimr::NStorage {
}
const auto& ss = bsConfig.GetServiceSet();

if (!config.GetSelfManagementConfig().GetEnabled()) {
return FinishWithError(TResult::ERROR, "self-management is not enabled");
}
const auto& smConfig = config.GetSelfManagementConfig();

THashMap<TVDiskIdShort, NBsController::TPDiskId> replacedDisks;
Expand Down Expand Up @@ -656,7 +653,7 @@ namespace NKikimr::NStorage {
void ReplaceStorageConfig(const TQuery::TReplaceStorageConfig& request) {
if (!RunCommonChecks()) {
return;
} else if (!Self->ConfigCommittedToConsole && Self->StorageConfig->GetSelfManagementConfig().GetEnabled()) {
} else if (!Self->ConfigCommittedToConsole && Self->SelfManagementEnabled) {
return FinishWithError(TResult::ERROR, "previous config has not been committed to Console yet");
}

Expand Down Expand Up @@ -711,7 +708,8 @@ namespace NKikimr::NStorage {
}

// whether we are enabling distconf right now
const bool enablingDistconf = Self->StorageConfig->GetSelfManagementConfig().GetEnabled() <
const bool enablingDistconf = Self->BaseConfig.GetSelfManagementConfig().GetEnabled() &&
!Self->SelfManagementEnabled &&
config.GetSelfManagementConfig().GetEnabled();

if (!Self->EnqueueConsoleConfigValidation(SelfId(), enablingDistconf, NewYaml)) {
Expand Down Expand Up @@ -863,6 +861,8 @@ namespace NKikimr::NStorage {
FinishWithError(TResult::ERROR, "something going on with default FSM");
} else if (auto error = ValidateConfig(*Self->StorageConfig)) {
FinishWithError(TResult::ERROR, TStringBuilder() << "current config validation failed: " << *error);
} else if (Scepter.expired()) {
FinishWithError(TResult::ERROR, "scepter lost during query execution");
} else {
return true;
}
Expand Down
9 changes: 9 additions & 0 deletions ydb/core/blobstorage/nodewarden/distconf_mon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,15 @@ namespace NKikimr::NStorage {
}
}

DIV_CLASS("panel panel-info") {
DIV_CLASS("panel-heading") {
out << "Main operational parameters";
}
DIV_CLASS("panel-body") {
out << "Self-management enabled: " << (SelfManagementEnabled ? "yes" : "no") << "<br/>";
}
}

auto outputConfig = [&](const char *name, auto *config) {
DIV_CLASS("panel panel-info") {
DIV_CLASS("panel-heading") {
Expand Down
11 changes: 7 additions & 4 deletions ydb/core/blobstorage/nodewarden/distconf_persistent_storage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -293,12 +293,15 @@ namespace NKikimr::NStorage {

// generate new list of drives to acquire
std::vector<TString> drivesToRead;
EnumerateConfigDrives(InitialConfig, SelfId().NodeId(), [&](const auto& /*node*/, const auto& drive) {
drivesToRead.push_back(drive.GetPath());
});
std::sort(drivesToRead.begin(), drivesToRead.end());
if (BaseConfig.GetSelfManagementConfig().GetEnabled()) {
EnumerateConfigDrives(InitialConfig, SelfId().NodeId(), [&](const auto& /*node*/, const auto& drive) {
drivesToRead.push_back(drive.GetPath());
});
std::sort(drivesToRead.begin(), drivesToRead.end());
}

if (DrivesToRead != drivesToRead) { // re-read configuration as it may cover additional drives
DrivesToRead = std::move(drivesToRead);
ReadConfig();
} else {
ApplyStorageConfig(InitialConfig);
Expand Down
1 change: 1 addition & 0 deletions ydb/core/blobstorage/nodewarden/node_warden_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -586,6 +586,7 @@ namespace NKikimr::NStorage {
void ForwardToDistributedConfigKeeper(STATEFN_SIG);

NKikimrBlobStorage::TStorageConfig StorageConfig;
bool SelfManagementEnabled = false;
THashSet<TActorId> StorageConfigSubscribers;

void Handle(TEvNodeWardenQueryStorageConfig::TPtr ev);
Expand Down
1 change: 1 addition & 0 deletions ydb/core/blobstorage/nodewarden/node_warden_mon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ void TNodeWarden::RenderWholePage(IOutputStream& out) {

TAG(TH3) { out << "StorageConfig"; }
DIV() {
out << "<p>Self-management enabled: " << (SelfManagementEnabled ? "yes" : "no") << "</p>";
TString s;
NProtoBuf::TextFormat::PrintToString(StorageConfig, &s);
out << "<pre>" << s << "</pre>";
Expand Down
9 changes: 7 additions & 2 deletions ydb/core/blobstorage/nodewarden/node_warden_resource.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,14 +75,16 @@ void TNodeWarden::ApplyServiceSet(const NKikimrBlobStorage::TNodeWardenServiceSe
}

void TNodeWarden::Handle(TEvNodeWardenQueryStorageConfig::TPtr ev) {
Send(ev->Sender, new TEvNodeWardenStorageConfig(StorageConfig, nullptr));
Send(ev->Sender, new TEvNodeWardenStorageConfig(StorageConfig, nullptr, SelfManagementEnabled));
if (ev->Get()->Subscribe) {
StorageConfigSubscribers.insert(ev->Sender);
}
}

void TNodeWarden::Handle(TEvNodeWardenStorageConfig::TPtr ev) {
ev->Get()->Config->Swap(&StorageConfig);
SelfManagementEnabled = ev->Get()->SelfManagementEnabled;

if (StorageConfig.HasBlobStorageConfig()) {
if (const auto& bsConfig = StorageConfig.GetBlobStorageConfig(); bsConfig.HasServiceSet()) {
const NKikimrBlobStorage::TNodeWardenServiceSet *proposed = nullptr;
Expand All @@ -98,15 +100,18 @@ void TNodeWarden::Handle(TEvNodeWardenStorageConfig::TPtr ev) {
ApplyStorageConfig(bsConfig.GetServiceSet(), proposed);
}
}

if (StorageConfig.HasStateStorageConfig() && StorageConfig.HasStateStorageBoardConfig() && StorageConfig.HasSchemeBoardConfig()) {
ApplyStateStorageConfig(ev->Get()->ProposedConfig.get());
} else {
Y_ABORT_UNLESS(!StorageConfig.HasStateStorageConfig() && !StorageConfig.HasStateStorageBoardConfig() &&
!StorageConfig.HasSchemeBoardConfig());
}

for (const TActorId& subscriber : StorageConfigSubscribers) {
Send(subscriber, new TEvNodeWardenStorageConfig(StorageConfig, nullptr));
Send(subscriber, new TEvNodeWardenStorageConfig(StorageConfig, nullptr, SelfManagementEnabled));
}

TActivationContext::Send(new IEventHandle(TEvBlobStorage::EvNodeWardenStorageConfigConfirm, 0, ev->Sender, SelfId(),
nullptr, ev->Cookie));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ void TNodeWardenMockActor::Handle(TEvBlobStorage::TEvControllerNodeServiceSetUpd
}

void TNodeWardenMockActor::Handle(TEvNodeWardenQueryStorageConfig::TPtr ev) {
Send(ev->Sender, new TEvNodeWardenStorageConfig(NKikimrBlobStorage::TStorageConfig(), nullptr));
Send(ev->Sender, new TEvNodeWardenStorageConfig(NKikimrBlobStorage::TStorageConfig(), nullptr, false));
}

void TNodeWardenMockActor::HandleUnsubscribe(STATEFN_SIG) {
Expand Down
31 changes: 19 additions & 12 deletions ydb/core/mind/bscontroller/bsc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,8 @@ NKikimrBlobStorage::TGroupStatus::E TBlobStorageController::DeriveStatus(const T
}

void TBlobStorageController::OnActivateExecutor(const TActorContext&) {
StartConsoleInteraction();

// create stat processor
StatProcessorActorId = Register(CreateStatProcessorActor());

Expand Down Expand Up @@ -124,6 +126,7 @@ void TBlobStorageController::OnActivateExecutor(const TActorContext&) {

void TBlobStorageController::Handle(TEvNodeWardenStorageConfig::TPtr ev) {
ev->Get()->Config->Swap(&StorageConfig);
SelfManagementEnabled = ev->Get()->SelfManagementEnabled;

auto prevStaticPDisks = std::exchange(StaticPDisks, {});
auto prevStaticVSlots = std::exchange(StaticVSlots, {});
Expand Down Expand Up @@ -158,20 +161,21 @@ void TBlobStorageController::Handle(TEvNodeWardenStorageConfig::TPtr ev) {
}
}

if (StorageConfig.GetSelfManagementConfig().GetEnabled()) {
if (SelfManagementEnabled) {
// assuming that in autoconfig mode HostRecords are managed by the distconf; we need to apply it here to
// avoid race with box autoconfiguration and node list change
HostRecords = std::make_shared<THostRecordMap::element_type>(StorageConfig);
if (SelfHealId) {
Send(SelfHealId, new TEvPrivate::TEvUpdateHostRecords(HostRecords));
}

ConsoleInteraction->Stop(); // distconf will handle the Console from now on
} else {
StartConsoleInteraction();
ConsoleInteraction->Start(); // start console interaction when working in non-distconf mode
ConsoleInteraction->Start(); // we control the Console now
}

if (!std::exchange(StorageConfigObtained, true)) { // this is the first time we get StorageConfig in this instance of BSC
if (HostRecords) {
if (SelfManagementEnabled) {
OnHostRecordsInitiate();
} else {
Send(GetNameserviceActorId(), new TEvInterconnect::TEvListNodes(true));
Expand All @@ -192,16 +196,20 @@ void TBlobStorageController::Handle(TEvents::TEvUndelivered::TPtr ev) {
}

void TBlobStorageController::ApplyStorageConfig(bool ignoreDistconf) {
if (!StorageConfig.HasBlobStorageConfig() || // this would be strange
!ignoreDistconf && (!StorageConfig.GetSelfManagementConfig().GetEnabled() ||
!StorageConfig.GetSelfManagementConfig().GetAutomaticBoxManagement())) {
if (!StorageConfig.HasBlobStorageConfig()) {
Y_DEBUG_ABORT("missing BlobStorageConfig with running BSC");
return;
}
const auto& bsConfig = StorageConfig.GetBlobStorageConfig();

if (Boxes.size() > 1) {
return;
}

if (!ignoreDistconf && (!SelfManagementEnabled || !StorageConfig.GetSelfManagementConfig().GetAutomaticBoxManagement())) {
return; // not expected to be managed by BSC
}

std::optional<ui64> generation;
if (!Boxes.empty()) {
const auto& [boxId, box] = *Boxes.begin();
Expand Down Expand Up @@ -292,11 +300,10 @@ void TBlobStorageController::OnHostRecordsInitiate() {
"BlobStorageControllerControls.EnableSelfHealWithDegraded");
}
}
Y_ABORT_UNLESS(!SelfHealId);
SelfHealId = Register(CreateSelfHealActor());
PushStaticGroupsToSelfHeal();
if (StorageConfigObtained) {
Execute(CreateTxInitScheme());
}
Execute(CreateTxInitScheme());
}

void TBlobStorageController::IssueInitialGroupContent() {
Expand Down Expand Up @@ -471,9 +478,9 @@ void TBlobStorageController::PassAway() {
TActivationContext::Send(new IEventHandle(TEvents::TSystem::Unsubscribe, 0, MakeBlobStorageNodeWardenID(SelfId().NodeId()),
SelfId(), nullptr, 0));
if (ConsoleInteraction) {
ConsoleInteraction->OnPassAway();
ConsoleInteraction->Stop();
}
return TActor::PassAway();
TActor::PassAway();
}

TBlobStorageController::TBlobStorageController(const TActorId &tablet, TTabletStorageInfo *info)
Expand Down
Loading

0 comments on commit 3282fe4

Please sign in to comment.