From 73ffaf723b8d57743b109b7633c7f34a7a401678 Mon Sep 17 00:00:00 2001 From: "Danielle Nagar @ TFH" Date: Fri, 24 Jan 2025 11:10:38 +0100 Subject: [PATCH] move graceful shutdown to after UUID changes (#962) --- iris-mpc/src/bin/server.rs | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/iris-mpc/src/bin/server.rs b/iris-mpc/src/bin/server.rs index 6b24058e9..5e0262e1f 100644 --- a/iris-mpc/src/bin/server.rs +++ b/iris-mpc/src/bin/server.rs @@ -920,18 +920,6 @@ async fn server_main(config: Config) -> eyre::Result<()> { .json::() .await .expect("Deserialization of probe response failed"); - if probe_response.shutting_down { - tracing::info!("Node {} has starting graceful shutdown", host); - - if !heartbeat_shutdown_handler.is_shutting_down() { - heartbeat_shutdown_handler.trigger_manual_shutdown(); - tracing::error!( - "Node {} has starting graceful shutdown, therefore triggering \ - graceful shutdown", - host - ); - } - } if probe_response.image_name != image_name { // Do not create a panic as we still can continue to process before its // updated @@ -957,6 +945,17 @@ async fn server_main(config: Config) -> eyre::Result<()> { // noticing. Our main NCCL connections cannot recover from // this, so we panic. panic!("Node {} seems to have restarted, killing server...", host); + } else if probe_response.shutting_down { + tracing::info!("Node {} has starting graceful shutdown", host); + + if !heartbeat_shutdown_handler.is_shutting_down() { + heartbeat_shutdown_handler.trigger_manual_shutdown(); + tracing::error!( + "Node {} has starting graceful shutdown, therefore triggering \ + graceful shutdown", + host + ); + } } else { tracing::info!("Heartbeat: Node {} is healthy", host); }