Skip to content

Commit

Permalink
chore: increase ps_retries and silence log once logged
Browse files Browse the repository at this point in the history
Signed-off-by: Abhinandan Purkait <[email protected]>
  • Loading branch information
Abhinandan-Purkait committed Jan 16, 2025
1 parent d994cb0 commit e28afe9
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 13 deletions.
21 changes: 14 additions & 7 deletions io-engine/src/bdev/nexus/nexus_persistence.rs
Original file line number Diff line number Diff line change
Expand Up @@ -103,9 +103,12 @@ impl<'n> Nexus<'n> {
};
nexus_info.children.push(child_info);
});
// We started with this child because it was healthy in etcd, or isn't there at all.
// Being unhealthy here means it is undergoing a fault/retire before nexus is open.
if nexus_info.children.len() == 1 && !nexus_info.children[0].healthy {
// We started with this child because it was healthy in etcd, or
// isn't there at all. Being unhealthy here
// means it is undergoing a fault/retire before nexus is open.
if nexus_info.children.len() == 1
&& !nexus_info.children[0].healthy
{
warn!("{self:?} Not persisting: the only child went unhealthy during nexus creation");
return Err(Error::NexusCreate {
name: self.name.clone(),
Expand Down Expand Up @@ -211,6 +214,7 @@ impl<'n> Nexus<'n> {
};

let mut retry = PersistentStore::retries();
let mut logged = false;
loop {
let Err(err) = PersistentStore::put(&key, &info.inner).await else {
trace!(?key, "{self:?}: the state was saved successfully");
Expand All @@ -225,10 +229,13 @@ impl<'n> Nexus<'n> {
});
}

error!(
"{self:?}: failed to persist nexus information, \
will retry ({retry} left): {err}"
);
if !logged {
error!(
"{self:?}: failed to persist nexus information, \
will silently retry ({retry} left): {err}"
);
logged = true;
}

// Allow some time for the connection to the persistent
// store to be re-established before retrying the operation.
Expand Down
4 changes: 2 additions & 2 deletions io-engine/src/core/env.rs
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ pub struct MayastorCliArgs {
pub ps_timeout: Duration,
#[clap(long = "ps-retries", default_value = "30")]
/// Persistent store operation retries.
pub ps_retries: u8,
pub ps_retries: u16,
#[clap(long = "bdev-pool-size", default_value = "65535")]
/// Number of entries in memory pool for bdev I/O contexts
pub bdev_io_ctx_pool_size: u64,
Expand Down Expand Up @@ -374,7 +374,7 @@ pub struct MayastorEnvironment {
pub registration_endpoint: Option<Uri>,
ps_endpoint: Option<String>,
ps_timeout: Duration,
ps_retries: u8,
ps_retries: u16,
mayastor_config: Option<String>,
ptpl_dir: Option<String>,
pool_config: Option<String>,
Expand Down
8 changes: 4 additions & 4 deletions io-engine/src/persistent_store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ pub struct PersistentStoreBuilder {
/// Operation timeout.
timeout: Duration,
/// Number of operation retries.
retries: u8,
retries: u16,
}

impl Default for PersistentStoreBuilder {
Expand Down Expand Up @@ -74,7 +74,7 @@ impl PersistentStoreBuilder {
}

/// Sets number of operation retries.
pub fn with_retries(mut self, retries: u8) -> Self {
pub fn with_retries(mut self, retries: u16) -> Self {
self.retries = retries;
self
}
Expand All @@ -96,7 +96,7 @@ pub struct PersistentStore {
/// Operation timeout.
timeout: Duration,
/// Number of operation retries.
retries: u8,
retries: u16,
}

/// Persistent store global instance.
Expand Down Expand Up @@ -311,7 +311,7 @@ impl PersistentStore {
}

/// Gets the number of operation retries.
pub fn retries() -> u8 {
pub fn retries() -> u16 {
Self::instance().lock().retries
}

Expand Down

0 comments on commit e28afe9

Please sign in to comment.