Skip to content

Commit

Permalink
Add testing for graceful shutdown more frequently
Browse files Browse the repository at this point in the history
  • Loading branch information
danielle-tfh committed Jan 28, 2025
1 parent c04b7f7 commit 8491e39
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 2 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/temp-branch-build-and-push.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ name: Branch - Build and push docker image
on:
push:
branches:
- "reduce-size-docker-image"
- "POP-2045/investigate-nodes-not-syncing-up"

concurrency:
group: '${{ github.workflow }} @ ${{ github.event.pull_request.head.label || github.head_ref || github.ref }}'
Expand Down
17 changes: 16 additions & 1 deletion iris-mpc/src/bin/server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -901,6 +901,15 @@ async fn server_main(config: Config) -> eyre::Result<()> {
"Node {} did not respond with success, starting graceful shutdown",
host
);
// if the nodes are still starting up and they get a failure - we can panic and
// not start graceful shutdown
if last_response[i] == String::default() {
panic!(
"Node {} did not respond with success during heartbeat init phase, \
killing server...",
host
);
}

if !heartbeat_shutdown_handler.is_shutting_down() {
heartbeat_shutdown_handler.trigger_manual_shutdown();
Expand Down Expand Up @@ -968,6 +977,8 @@ async fn server_main(config: Config) -> eyre::Result<()> {
tracing::info!("Heartbeat starting...");
heartbeat_rx.await?;
tracing::info!("Heartbeat on all nodes started.");
let download_shutdown_handler = Arc::clone(&shutdown_handler);

background_tasks.check_tasks();

let my_state = SyncState {
Expand All @@ -988,7 +999,6 @@ async fn server_main(config: Config) -> eyre::Result<()> {
let load_chunks_parallelism = config.load_chunks_parallelism;
let db_chunks_bucket_name = config.db_chunks_bucket_name.clone();
let db_chunks_folder_name = config.db_chunks_folder_name.clone();
let download_shutdown_handler = Arc::clone(&shutdown_handler);

let (tx, rx) = oneshot::channel();
background_tasks.spawn_blocking(move || {
Expand Down Expand Up @@ -1033,6 +1043,11 @@ async fn server_main(config: Config) -> eyre::Result<()> {
metrics::counter!("db.sync.rollback").increment(1);
}

if download_shutdown_handler.is_shutting_down() {
tracing::warn!("Shutting down has been triggered");
return Ok(());
}

// --------------------------------------------------------------------------
// ANCHOR: Load the database
// --------------------------------------------------------------------------
Expand Down

0 comments on commit 8491e39

Please sign in to comment.