Skip to content

Commit

Permalink
issue-2074: serial number change detection (#2399)
Browse files Browse the repository at this point in the history
* issue-2074: serial number change detection
  • Loading branch information
sharpeye authored Nov 2, 2024
1 parent 7e342bb commit cac9eff
Show file tree
Hide file tree
Showing 9 changed files with 526 additions and 39 deletions.
2 changes: 1 addition & 1 deletion cloud/blockstore/config/disk.proto
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ message TFileDeviceArgs
// Device pool name.
optional string PoolName = 4;

// Serial number. For testing purposes.
// Serial number. For caching purposes.
optional string SerialNumber = 5;

// Data offset relative to the beginning of the file.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ void TDiskAgentActor::InitAgent(const TActorContext& ctx)
std::move(rdmaTargetConfig),
OldRequestCounters);

auto* actorSystem = ctx.ActorSystem();
auto* actorSystem = TActivationContext::ActorSystem();
auto replyTo = ctx.SelfID;

State->Initialize().Subscribe([=] (auto future) {
Expand All @@ -61,7 +61,8 @@ void TDiskAgentActor::InitAgent(const TActorContext& ctx)
auto response = std::make_unique<TCompletionEvent>(
std::move(r.Configs),
std::move(r.Errors),
std::move(r.ConfigMismatchErrors));
std::move(r.ConfigMismatchErrors),
std::move(r.DevicesWithNewSerialNumber));

actorSystem->Send(
new IEventHandle(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,16 +37,19 @@ struct TEvDiskAgentPrivate
TVector<NProto::TDeviceConfig> Configs;
TVector<TString> Errors;
TVector<TString> ConfigMismatchErrors;
TVector<TString> DevicesWithNewSerialNumber;

TInitAgentCompleted() = default;

TInitAgentCompleted(
TVector<NProto::TDeviceConfig> configs,
TVector<TString> errors,
TVector<TString> configMismatchErrors)
TVector<TString> configMismatchErrors,
TVector<TString> devicesWithNewSerialNumber)
: Configs(std::move(configs))
, Errors(std::move(errors))
, ConfigMismatchErrors(std::move(configMismatchErrors))
, DevicesWithNewSerialNumber(std::move(devicesWithNewSerialNumber))
{}
};

Expand Down
9 changes: 5 additions & 4 deletions cloud/blockstore/libs/storage/disk_agent/disk_agent_state.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -380,7 +380,7 @@ TFuture<TInitializeResult> TDiskAgentState::InitAioStorage()
AgentConfig,
StorageProvider,
NvmeManager)
.Apply([=] (auto future) {
.Apply([=, this] (auto future) {
TInitializeStorageResult r = future.ExtractValue();

InitErrorsCount = r.Errors.size();
Expand Down Expand Up @@ -412,12 +412,13 @@ TFuture<TInitializeResult> TDiskAgentState::InitAioStorage()
std::move(device));
}

return TInitializeResult {
return TInitializeResult{
.Configs = std::move(r.Configs),
.Errors = std::move(r.Errors),
.ConfigMismatchErrors = std::move(r.ConfigMismatchErrors),
.Guard = std::move(r.Guard)
};
.DevicesWithNewSerialNumber =
std::move(r.DevicesWithNewSerialNumber),
.Guard = std::move(r.Guard)};
});
}

Expand Down
2 changes: 2 additions & 0 deletions cloud/blockstore/libs/storage/disk_agent/disk_agent_state.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,8 @@ class TDiskAgentState
TVector<NProto::TDeviceConfig> Configs;
TVector<TString> Errors;
TVector<TString> ConfigMismatchErrors;
TVector<TString> DevicesWithNewSerialNumber;

TDeviceGuard Guard;
};

Expand Down
101 changes: 70 additions & 31 deletions cloud/blockstore/libs/storage/disk_agent/storage_initializer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,11 @@ class TInitializer

TVector<TString> Errors;
TVector<TString> ConfigMismatchErrors;
TVector<TString> DevicesWithNewSerialNumber;
TMutex Lock;

THashMap<TString, TString> PathToSerial;

public:
TInitializer(
TLog log,
Expand Down Expand Up @@ -141,11 +144,17 @@ class TInitializer
void ScanFileDevices();
bool ValidateGeneratedConfigs(const TVector<NProto::TFileDeviceArgs>& fileDevices);
bool ValidateStorageDiscoveryConfig() const;
void ValidateCurrentConfigs();
void ValidateCurrentConfigs(
const TVector<NProto::TFileDeviceArgs>& cachedDevices);

void SaveCurrentConfig();

void ReportDiskAgentConfigMismatchEvent(const TString& error);

[[nodiscard]] TString GetCachedConfigsPath() const;

TString GetSerialNumber(const TString& path);
void SetupSerialNumbers(TVector<NProto::TFileDeviceArgs>& fileDevices);
};

////////////////////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -291,6 +300,33 @@ bool TInitializer::ValidateGeneratedConfigs(
return true;
}

TString TInitializer::GetSerialNumber(const TString& path)
{
auto it = PathToSerial.find(path);
if (it == PathToSerial.end()) {
auto [sn, error] = NvmeManager->GetSerialNumber(path);
it = PathToSerial.emplace(path, sn).first;
if (HasError(error)) {
with_lock (Lock) {
Errors.push_back(
TStringBuilder()
<< "Can't get serial number for " << path.Quote() << ": "
<< FormatError(error));
}
}
}

return it->second;
}

void TInitializer::SetupSerialNumbers(
TVector<NProto::TFileDeviceArgs>& fileDevices)
{
for (auto& file: fileDevices) {
file.SetSerialNumber(GetSerialNumber(file.GetPath()));
}
}

void TInitializer::ScanFileDevices()
{
if (!ValidateStorageDiscoveryConfig()) {
Expand All @@ -310,12 +346,12 @@ void TInitializer::ScanFileDevices()
}

TVector<NProto::TFileDeviceArgs> files = gen.ExtractResult();

if (files.empty()) {
return;
}

SortBy(files, GetDeviceId);
SetupSerialNumbers(files);

if (!ValidateGeneratedConfigs(files)) {
ReportDiskAgentConfigMismatchEvent("Bad generated config");
Expand Down Expand Up @@ -350,8 +386,7 @@ void TInitializer::ScanFileDevices()

void TInitializer::SaveCurrentConfig()
{
const auto path = AgentConfig->GetCachedConfigPath();

const auto path = GetCachedConfigsPath();
if (path.empty()) {
return;
}
Expand All @@ -378,16 +413,19 @@ void TInitializer::SaveCurrentConfig()
}
}

void TInitializer::ValidateCurrentConfigs()
TString TInitializer::GetCachedConfigsPath() const
{
const TString storagePath = StorageConfig->GetCachedDiskAgentConfigPath();
const TString diskAgentPath = AgentConfig->GetCachedConfigPath();
const TString& path = diskAgentPath.empty() ? storagePath : diskAgentPath;
auto cachedDevices = LoadCachedConfig(path);
return diskAgentPath.empty() ? storagePath : diskAgentPath;
}

void TInitializer::ValidateCurrentConfigs(
const TVector<NProto::TFileDeviceArgs>& cachedDevices)
{
if (cachedDevices.empty()) {
STORAGE_INFO("There is no cached config");
SaveCurrentConfig();

return;
}

Expand All @@ -406,14 +444,15 @@ void TInitializer::ValidateCurrentConfigs()
ss << d << "\n";
}
ss << "\nCached:\n";
for (auto& d: cachedDevices) {
for (const auto& d: cachedDevices) {
ss << d << "\n";
}

ReportDiskAgentConfigMismatchEvent(ss.Str());

STORAGE_WARN("Current config is broken, fallback to the cached one.");
FileDevices.swap(cachedDevices);
FileDevices = cachedDevices;
SetupSerialNumbers(FileDevices);

Errors.push_back(TStringBuilder()
<< "broken config: " << FormatError(error));
Expand All @@ -424,7 +463,22 @@ TFuture<void> TInitializer::Initialize()
ScanFileDevices();

try {
ValidateCurrentConfigs();
const auto cachedDevices = LoadCachedConfig(GetCachedConfigsPath());
for (const auto& d: cachedDevices) {
const auto currentSerialNumber = GetSerialNumber(d.GetPath());

if (currentSerialNumber != d.GetSerialNumber()) {
STORAGE_WARN(
"Device " << d.GetDeviceId() << " [" << d.GetPath()
<< "] has new serial number "
<< TString(currentSerialNumber).Quote()
<< " (was " << d.GetSerialNumber().Quote()
<< ")");
DevicesWithNewSerialNumber.push_back(d.GetDeviceId());
}
}

ValidateCurrentConfigs(cachedDevices);
} catch (...) {
return MakeErrorFuture<void>(std::current_exception());
}
Expand Down Expand Up @@ -470,7 +524,7 @@ TFuture<void> TInitializer::Initialize()
device.GetOffset() / device.GetBlockSize(),
Configs[i],
Stats[i]
).Subscribe([=] (const auto& future) {
).Subscribe([=, this] (const auto& future) {
try {
Devices[i] = future.GetValue();
} catch (...) {
Expand Down Expand Up @@ -498,7 +552,7 @@ TFuture<void> TInitializer::Initialize()

try {
auto result = CreateMemoryStorage(Configs[i], Stats[i])
.Subscribe([=] (const auto& future) {
.Subscribe([=, this] (const auto& future) {
try {
Devices[i] = future.GetValue();
} catch (...) {
Expand All @@ -520,32 +574,16 @@ TFuture<void> TInitializer::Initialize()
NProto::TDeviceConfig TInitializer::CreateConfig(
const NProto::TFileDeviceArgs& device)
{
const auto& path = device.GetPath();
const ui32 blockSize = device.GetBlockSize();

NProto::TDeviceConfig config;

config.SetDeviceName(path);
config.SetDeviceName(device.GetPath());
config.SetDeviceUUID(device.GetDeviceId());
config.SetBlockSize(blockSize);
config.SetBlockSize(device.GetBlockSize());
config.SetRack(AgentConfig->GetRack());
config.SetPoolName(device.GetPoolName());
config.SetSerialNumber(device.GetSerialNumber());
config.SetPhysicalOffset(device.GetOffset());

if (!config.GetSerialNumber()) {
auto [sn, error] = NvmeManager->GetSerialNumber(path);
if (!HasError(error)) {
config.SetSerialNumber(sn);
} else {
with_lock (Lock) {
Errors.push_back(TStringBuilder()
<< "Can't get serial number for " << path.Quote() << ": "
<< FormatError(error));
}
}
}

return config;
}

Expand Down Expand Up @@ -587,6 +625,7 @@ TInitializeStorageResult TInitializer::GetResult()

r.Errors = std::move(Errors);
r.ConfigMismatchErrors = std::move(ConfigMismatchErrors);
r.DevicesWithNewSerialNumber = std::move(DevicesWithNewSerialNumber);
r.Guard = std::move(Guard);

return r;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ struct TInitializeStorageResult
TVector<TStorageIoStatsPtr> Stats;
TVector<TString> Errors;
TVector<TString> ConfigMismatchErrors;
TVector<TString> DevicesWithNewSerialNumber;
TDeviceGuard Guard;
};

Expand Down
Loading

0 comments on commit cac9eff

Please sign in to comment.