Skip to content

Commit

Permalink
FIx restoration shadow disk from backup (#2934)
Browse files Browse the repository at this point in the history
* FIx restoration shadow disk from backup

* Fix nits
  • Loading branch information
drbasic authored Jan 28, 2025
1 parent ef9294e commit ca6a2ee
Show file tree
Hide file tree
Showing 4 changed files with 153 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ TStringBuf NormalizeMirrorId(TStringBuf diskId) {

bool CheckMirrorDiskId(
const TSet<TString>& disksInSS,
const TVector<NProto::TDiskConfig> disksInBackup,
const TVector<NProto::TDiskConfig>& disksInBackup,
const NProto::TDiskConfig& disk)
{
const TString& diskId = disk.GetDiskId();
Expand Down Expand Up @@ -532,12 +532,21 @@ void TRestoreValidationActor::HandleListVolumesResponse(
ValidSnapshot.DisksToCleanup,
NormalizeMirrorId(itr->GetDiskId())))
{
LOG_WARN_S(
ctx,
Component,
RESTORE_PREFIX
<< " DiskID " << itr->GetDiskId().Quote()
<< " is found in backup but not in SS");
const bool isShadowDisk =
!itr->GetCheckpointReplica().GetCheckpointId().empty();
if (isShadowDisk) {
LOG_WARN_S(
ctx,
Component,
RESTORE_PREFIX << " ShadowDisk " << itr->GetDiskId().Quote()
<< " is found in backup");
} else {
LOG_WARN_S(
ctx,
Component,
RESTORE_PREFIX << " DiskID " << itr->GetDiskId().Quote()
<< " is found in backup, but not in SS");
}
SetErrorDevicesInBackup(itr->GetDeviceUUIDs(), ctx.Now());
DisksInBackup.erase(itr->GetDiskId());
itr = ValidSnapshot.Disks.erase(itr);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -790,6 +790,86 @@ Y_UNIT_TEST_SUITE(TRestoreValidatorActorTest)
UNIT_ASSERT_EQUAL(
state.Disks[4].GetDiskId(), "Disk 4/1");
}

Y_UNIT_TEST_F(CheckSkipRestoreShadowDisk, TSetupEnvironment)
{
TDiskRegistryStateSnapshot backup;
{ // Source disk
auto& diskConfig = backup.Disks.emplace_back();
diskConfig.SetDiskId("Disk 1");
diskConfig.SetFolderId("Folder 1");
diskConfig.SetCloudId("Cloud 1");
diskConfig.SetBlockSize(41);
}
{ // checkpoint disk
auto& diskConfig = backup.Disks.emplace_back();
diskConfig.SetDiskId("Disk 1-cp1");
diskConfig.SetFolderId("Folder 1");
diskConfig.SetCloudId("Cloud 1");
diskConfig.SetBlockSize(41);
auto* checkpoint = diskConfig.MutableCheckpointReplica();
checkpoint->SetSourceDiskId("Disk 1");
checkpoint->SetCheckpointId("cp1");
}

auto validatorId = ActorSystem.Register(
new TRestoreValidationActor(EdgeActor, {}, 0, backup));

ActorSystem.GrabEdgeEvent<TEvService::TEvListVolumesRequest>();

auto volumeListResponse =
std::make_unique<TEvService::TEvListVolumesResponse>();
volumeListResponse->Record.AddVolumes("Disk 1");

ActorSystem.Send(new NActors::IEventHandle(
validatorId,
EdgeActor,
volumeListResponse.release()));

ActorSystem.GrabEdgeEvent<TEvSSProxy::TEvDescribeVolumeRequest>();
{
NKikimrSchemeOp::TPathDescription description;
auto* mutableVolumeConfig =
description.MutableBlockStoreVolumeDescription()
->MutableVolumeConfig();
mutableVolumeConfig->SetDiskId("Disk 1");
mutableVolumeConfig->SetBlockSize(41);
mutableVolumeConfig->SetFolderId("Folder 1");
mutableVolumeConfig->SetCloudId("Cloud 1");
auto describeVolumeResponse =
std::make_unique<TEvSSProxy::TEvDescribeVolumeResponse>(
"",
std::move(description));

ActorSystem.Send(new NActors::IEventHandle(
validatorId,
EdgeActor,
describeVolumeResponse.release()));
}

{
UNIT_ASSERT_EQUAL(
ActorSystem.GrabEdgeEvent<TEvVolume::TEvGetVolumeInfoRequest>()
->Record.GetDiskId(),
"Disk 1");

auto volumeInfoResponse =
std::make_unique<TEvVolume::TEvGetVolumeInfoResponse>();
auto& volume = *volumeInfoResponse->Record.MutableVolume();
volume.SetDiskId("Disk 1");

ActorSystem.Send(new NActors::IEventHandle(
validatorId,
EdgeActor,
volumeInfoResponse.release()));
}

auto response = ActorSystem.GrabEdgeEvent<
TEvDiskRegistryPrivate::TEvRestoreDiskRegistryValidationResponse>();
auto& state = response->LoadDBState;
UNIT_ASSERT_EQUAL(state.Disks.size(), 1);
UNIT_ASSERT_EQUAL(state.Disks[0].GetDiskId(), "Disk 1");
}
}

} // namespace NDiskRegistry
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -775,7 +775,9 @@ void TShadowDiskActor::HandleShadowDiskAcquired(
}

if (HasError(msg->Error)) {
if (acquireReason != EAcquireReason::PeriodicalReAcquire) {
if (msg->Error.GetCode() == E_NOT_FOUND ||
acquireReason != EAcquireReason::PeriodicalReAcquire)
{
SetErrorState(ctx);
}
return;
Expand Down
54 changes: 54 additions & 0 deletions cloud/blockstore/libs/storage/volume/volume_ut_checkpoint.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3725,6 +3725,60 @@ Y_UNIT_TEST_SUITE(TVolumeCheckpointTest)
}
}

Y_UNIT_TEST(ShouldStopAcquiringAfterENotFound)
{
NProto::TStorageServiceConfig config;
config.SetUseShadowDisksForNonreplDiskCheckpoints(true);
config.SetMaxAcquireShadowDiskTotalTimeoutWhenNonBlocked(2000);

auto runtime = PrepareTestActorRuntime(config);

auto describeDiskRequestsFilter = [&](TAutoPtr<IEventHandle>& event)
{
if (event->GetTypeRewrite() ==
TEvDiskRegistry::EvDescribeDiskResponse)
{ // Simulate response with E_NOT_FOUND error from DiskRegistry.
auto* msg =
event->Get<TEvDiskRegistry::TEvDescribeDiskResponse>();
msg->Record.MutableError()->SetCode(E_NOT_FOUND);
}
return TTestActorRuntime::DefaultObserverFunc(event);
};
runtime->SetObserverFunc(describeDiskRequestsFilter);

// Create volume.
TVolumeClient volume(*runtime);
volume.UpdateVolumeConfig(
0,
0,
0,
0,
false,
1,
NCloud::NProto::STORAGE_MEDIA_SSD_NONREPLICATED,
32768);

volume.WaitReady();

auto clientInfo = CreateVolumeClientInfo(
NProto::VOLUME_ACCESS_READ_WRITE,
NProto::VOLUME_MOUNT_LOCAL,
0);
volume.AddClient(clientInfo);

// Create checkpoint.
volume.CreateCheckpoint("c1");

// Reconnect pipe since partition has restarted.
volume.ReconnectPipe();

// Shadow disk entered the error state.
auto status =
volume.GetCheckpointStatus("c1")->Record.GetCheckpointStatus();

UNIT_ASSERT_EQUAL(NProto::ECheckpointStatus::ERROR, status);
}

Y_UNIT_TEST(ShouldBlockWritesWhenReAcquire)
{
NProto::TStorageServiceConfig config;
Expand Down

0 comments on commit ca6a2ee

Please sign in to comment.