From ca24a200b122119ad3e641d37358b503ef301327 Mon Sep 17 00:00:00 2001 From: Ryo Onodera Date: Wed, 4 Dec 2024 14:58:52 +0000 Subject: [PATCH] Wire unified scheduler into banking experimentally --- Cargo.lock | 71 +- Cargo.toml | 3 + banking-bench/Cargo.toml | 4 +- banking-bench/src/main.rs | 89 +- core/Cargo.toml | 4 +- core/benches/banking_stage.rs | 14 +- core/benches/banking_trace.rs | 26 +- core/benches/sigverify_stage.rs | 5 +- core/src/banking_simulation.rs | 90 +- core/src/banking_stage.rs | 174 ++- core/src/banking_stage/decision_maker.rs | 21 +- core/src/banking_stage/packet_deserializer.rs | 19 +- .../scheduler_controller.rs | 5 +- core/src/banking_trace.rs | 88 +- core/src/cluster_info_vote_listener.rs | 2 +- core/src/replay_stage.rs | 40 +- core/src/sigverify.rs | 48 +- core/src/sigverify_stage.rs | 20 +- core/src/tpu.rs | 21 +- core/src/tracer_packet_stats.rs | 12 +- core/src/validator.rs | 111 +- core/tests/unified_scheduler.rs | 20 +- ledger-tool/Cargo.toml | 1 + ledger-tool/src/ledger_utils.rs | 93 +- ledger-tool/src/main.rs | 34 +- ledger/benches/blockstore_processor.rs | 1 + ledger/src/blockstore_processor.rs | 67 +- local-cluster/tests/local_cluster.rs | 39 +- perf/Cargo.toml | 1 + perf/src/packet.rs | 34 + poh/src/poh_recorder.rs | 29 +- programs/sbf/Cargo.lock | 68 +- runtime/Cargo.toml | 2 +- runtime/src/bank.rs | 38 +- runtime/src/bank_forks.rs | 57 +- runtime/src/installed_scheduler_pool.rs | 93 +- sdk/frozen-abi/src/abi_example.rs | 7 + sdk/src/lib.rs | 1 + sdk/src/scheduling.rs | 8 + sdk/transaction-error/src/lib.rs | 5 + storage-proto/proto/transaction_by_addr.proto | 1 + storage-proto/src/convert.rs | 4 + svm/examples/Cargo.lock | 68 +- unified-scheduler-logic/src/lib.rs | 89 +- unified-scheduler-pool/Cargo.toml | 8 +- unified-scheduler-pool/src/lib.rs | 1279 +++++++++++++---- validator/src/cli.rs | 9 + validator/src/main.rs | 11 + 48 files changed, 2285 insertions(+), 649 deletions(-) create mode 100644 sdk/src/scheduling.rs diff --git a/Cargo.lock b/Cargo.lock index ab5874283bc759..3d847ab68427ee 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -192,6 +192,7 @@ dependencies = [ "solana-log-collector", "solana-logger", "solana-measure", + "solana-poh", "solana-program-runtime", "solana-rpc", "solana-runtime", @@ -1572,6 +1573,15 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e" +[[package]] +name = "convert_case" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec182b0ca2f35d8fc196cf3404988fd8b8c739a4d270ff118a398feb0cbec1ca" +dependencies = [ + "unicode-segmentation", +] + [[package]] name = "core-foundation" version = "0.9.4" @@ -1931,13 +1941,35 @@ version = "0.99.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "40eebddd2156ce1bb37b20bbe5151340a31828b1f2d22ba4141f3531710e38df" dependencies = [ - "convert_case", + "convert_case 0.4.0", "proc-macro2", "quote", "rustc_version 0.3.3", "syn 1.0.109", ] +[[package]] +name = "derive_more" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a9b99b9cbbe49445b21764dc0625032a89b145a2642e67603e1c936f5458d05" +dependencies = [ + "derive_more-impl", +] + +[[package]] +name = "derive_more-impl" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb7330aeadfbe296029522e6c40f315320aba36fc43a5b3632f3795348f3bd22" +dependencies = [ + "convert_case 0.6.0", + "proc-macro2", + "quote", + "syn 2.0.90", + "unicode-xid", +] + [[package]] name = "dialoguer" version = "0.10.4" @@ -2061,6 +2093,12 @@ version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1435fa1053d8b2fbbe9be7e97eca7f33d37b28409959813daefc1446a14247f1" +[[package]] +name = "dyn-clone" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d6ef0072f8a535281e4876be788938b528e9a1d43900b82c2569af7da799125" + [[package]] name = "eager" version = "0.1.0" @@ -3331,7 +3369,7 @@ version = "18.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d2b99d4207e2a04fb4581746903c2bb7eb376f88de9c699d0f3e10feeac0cd3a" dependencies = [ - "derive_more", + "derive_more 0.99.16", "futures 0.3.31", "jsonrpc-core", "jsonrpc-pubsub", @@ -6026,6 +6064,7 @@ dependencies = [ name = "solana-banking-bench" version = "2.2.0" dependencies = [ + "assert_matches", "clap 3.2.23", "crossbeam-channel", "log", @@ -6043,6 +6082,7 @@ dependencies = [ "solana-sdk", "solana-streamer", "solana-tpu-client", + "solana-unified-scheduler-pool", "solana-version", ] @@ -6673,6 +6713,7 @@ dependencies = [ "chrono", "crossbeam-channel", "dashmap", + "derive_more 1.0.0", "etcd-client", "fs_extra", "futures 0.3.31", @@ -6739,6 +6780,7 @@ dependencies = [ "solana-tpu-client", "solana-transaction-status", "solana-turbine", + "solana-unified-scheduler-logic", "solana-unified-scheduler-pool", "solana-version", "solana-vote", @@ -7697,6 +7739,7 @@ dependencies = [ "bincode", "bv", "caps", + "crossbeam-channel", "curve25519-dalek 4.1.3", "dlopen2", "fnv", @@ -9530,18 +9573,23 @@ dependencies = [ "crossbeam-channel", "dashmap", "derive-where", + "derive_more 1.0.0", + "dyn-clone", "lazy_static", "log", - "qualifier_attr", "scopeguard", "solana-ledger", "solana-logger", + "solana-perf", + "solana-poh", "solana-runtime", "solana-runtime-transaction", "solana-sdk", "solana-timings", "solana-unified-scheduler-logic", + "solana-unified-scheduler-pool", "static_assertions", + "trait-set", "vec_extract_if_polyfill", ] @@ -10924,6 +10972,17 @@ dependencies = [ "tracing-core", ] +[[package]] +name = "trait-set" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b79e2e9c9ab44c6d7c20d5976961b47e8f49ac199154daa514b77cd1ab536625" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "trees" version = "0.4.2" @@ -11005,6 +11064,12 @@ dependencies = [ "tinyvec", ] +[[package]] +name = "unicode-segmentation" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" + [[package]] name = "unicode-width" version = "0.1.9" diff --git a/Cargo.toml b/Cargo.toml index c62c59226c78d2..37f546cde3476f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -294,11 +294,13 @@ curve25519-dalek = { version = "4.1.3", features = ["digest", "rand_core"] } dashmap = "5.5.3" derivation-path = { version = "0.2.0", default-features = false } derive-where = "1.2.7" +derive_more = { version = "1.0.0", features = ["full"] } dialoguer = "0.10.4" digest = "0.10.7" dir-diff = "0.3.3" dirs-next = "2.0.0" dlopen2 = "0.5.0" +dyn-clone = "1.0.17" eager = "0.1.0" ed25519-dalek = "=1.0.1" ed25519-dalek-bip32 = "0.2.0" @@ -629,6 +631,7 @@ tokio-util = "0.7" toml = "0.8.12" tonic = "0.9.2" tonic-build = "0.9.2" +trait-set = "0.3.0" trees = "0.4.2" tungstenite = "0.20.1" uriparse = "0.6.4" diff --git a/banking-bench/Cargo.toml b/banking-bench/Cargo.toml index 67ca53f88324c9..97485d0efa7d76 100644 --- a/banking-bench/Cargo.toml +++ b/banking-bench/Cargo.toml @@ -9,13 +9,14 @@ license = { workspace = true } edition = { workspace = true } [dependencies] +assert_matches = { workspace = true } clap = { version = "3.1.8", features = ["derive", "cargo"] } crossbeam-channel = { workspace = true } log = { workspace = true } rand = { workspace = true } rayon = { workspace = true } solana-client = { workspace = true } -solana-core = { workspace = true } +solana-core = { workspace = true, features = ["dev-context-only-utils"] } solana-gossip = { workspace = true } solana-ledger = { workspace = true } solana-logger = { workspace = true } @@ -26,6 +27,7 @@ solana-runtime = { workspace = true, features = ["dev-context-only-utils"] } solana-sdk = { workspace = true } solana-streamer = { workspace = true } solana-tpu-client = { workspace = true } +solana-unified-scheduler-pool = { workspace = true } solana-version = { workspace = true } [features] diff --git a/banking-bench/src/main.rs b/banking-bench/src/main.rs index c80e96005c8829..d08c59ce6c0e70 100644 --- a/banking-bench/src/main.rs +++ b/banking-bench/src/main.rs @@ -1,5 +1,6 @@ #![allow(clippy::arithmetic_side_effects)] use { + assert_matches::assert_matches, clap::{crate_description, crate_name, Arg, ArgEnum, Command}, crossbeam_channel::{unbounded, Receiver}, log::*, @@ -7,8 +8,10 @@ use { rayon::prelude::*, solana_client::connection_cache::ConnectionCache, solana_core::{ - banking_stage::BankingStage, - banking_trace::{BankingPacketBatch, BankingTracer, BANKING_TRACE_DIR_DEFAULT_BYTE_LIMIT}, + banking_stage::{update_bank_forks_and_poh_recorder_for_new_tpu_bank, BankingStage}, + banking_trace::{ + BankingPacketBatch, BankingTracer, Channels, BANKING_TRACE_DIR_DEFAULT_BYTE_LIMIT, + }, validator::BlockProductionMethod, }, solana_gossip::cluster_info::{ClusterInfo, Node}, @@ -29,6 +32,7 @@ use { hash::Hash, message::Message, pubkey::{self, Pubkey}, + scheduling::SchedulingMode, signature::{Keypair, Signature, Signer}, system_instruction, system_transaction, timing::timestamp, @@ -36,6 +40,7 @@ use { }, solana_streamer::socket::SocketAddrSpace, solana_tpu_client::tpu_client::DEFAULT_TPU_CONNECTION_POOL_SIZE, + solana_unified_scheduler_pool::{DefaultSchedulerPool, SupportedSchedulingMode}, std::{ sync::{atomic::Ordering, Arc, RwLock}, thread::sleep, @@ -347,7 +352,7 @@ fn main() { let (replay_vote_sender, _replay_vote_receiver) = unbounded(); let bank0 = Bank::new_for_benches(&genesis_config); let bank_forks = BankForks::new_rw_arc(bank0); - let mut bank = bank_forks.read().unwrap().working_bank(); + let mut bank = bank_forks.read().unwrap().working_bank_with_scheduler(); // set cost tracker limits to MAX so it will not filter out TXs bank.write_cost_tracker() @@ -440,9 +445,36 @@ fn main() { BANKING_TRACE_DIR_DEFAULT_BYTE_LIMIT, ))) .unwrap(); - let (non_vote_sender, non_vote_receiver) = banking_tracer.create_channel_non_vote(); - let (tpu_vote_sender, tpu_vote_receiver) = banking_tracer.create_channel_tpu_vote(); - let (gossip_vote_sender, gossip_vote_receiver) = banking_tracer.create_channel_gossip_vote(); + let prioritization_fee_cache = Arc::new(PrioritizationFeeCache::new(0u64)); + let scheduler_pool = if matches!( + block_production_method, + BlockProductionMethod::UnifiedScheduler + ) { + let pool = DefaultSchedulerPool::new( + SupportedSchedulingMode::Either(SchedulingMode::BlockProduction), + None, + None, + None, + Some(replay_vote_sender.clone()), + prioritization_fee_cache.clone(), + poh_recorder.read().unwrap().new_recorder(), + ); + bank_forks + .write() + .unwrap() + .install_scheduler_pool(pool.clone()); + Some(pool) + } else { + None + }; + let Channels { + non_vote_sender, + non_vote_receiver, + tpu_vote_sender, + tpu_vote_receiver, + gossip_vote_sender, + gossip_vote_receiver, + } = banking_tracer.create_channels(scheduler_pool.as_ref()); let cluster_info = { let keypair = Arc::new(Keypair::new()); let node = Node::new_localhost_with_pubkey(&keypair.pubkey()); @@ -461,7 +493,7 @@ fn main() { ), }; let banking_stage = BankingStage::new_num_threads( - block_production_method, + block_production_method.clone(), &cluster_info, &poh_recorder, non_vote_receiver, @@ -473,10 +505,23 @@ fn main() { None, Arc::new(connection_cache), bank_forks.clone(), - &Arc::new(PrioritizationFeeCache::new(0u64)), + &prioritization_fee_cache, false, + scheduler_pool, ); + // This bench processes transactions, starting from the very first bank, so special-casing is + // needed for unified scheduler. + if matches!( + block_production_method, + BlockProductionMethod::UnifiedScheduler + ) { + bank = bank_forks + .write() + .unwrap() + .reinstall_block_production_scheduler_into_working_genesis_bank(); + } + // This is so that the signal_receiver does not go out of scope after the closure. // If it is dropped before poh_service, then poh_service will error when // calling send() on the channel. @@ -537,33 +582,31 @@ fn main() { tx_total_us += now.elapsed().as_micros() as u64; let mut poh_time = Measure::start("poh_time"); - poh_recorder + let cleared_bank = poh_recorder .write() .unwrap() .reset(bank.clone(), Some((bank.slot(), bank.slot() + 1))); + assert_matches!(cleared_bank, None); poh_time.stop(); let mut new_bank_time = Measure::start("new_bank"); + if let Some((result, _timings)) = bank.wait_for_completed_scheduler() { + assert_matches!(result, Ok(_)); + } let new_slot = bank.slot() + 1; - let new_bank = Bank::new_from_parent(bank, &collector, new_slot); + let new_bank = Bank::new_from_parent(bank.clone(), &collector, new_slot); new_bank_time.stop(); let mut insert_time = Measure::start("insert_time"); - bank_forks.write().unwrap().insert(new_bank); - bank = bank_forks.read().unwrap().working_bank(); + update_bank_forks_and_poh_recorder_for_new_tpu_bank( + &bank_forks, + &poh_recorder, + new_bank, + false, + ); + bank = bank_forks.read().unwrap().working_bank_with_scheduler(); insert_time.stop(); - // set cost tracker limits to MAX so it will not filter out TXs - bank.write_cost_tracker() - .unwrap() - .set_limits(u64::MAX, u64::MAX, u64::MAX); - - assert!(poh_recorder.read().unwrap().bank().is_none()); - poh_recorder - .write() - .unwrap() - .set_bank_for_test(bank.clone()); - assert!(poh_recorder.read().unwrap().bank().is_some()); debug!( "new_bank_time: {}us insert_time: {}us poh_time: {}us", new_bank_time.as_us(), diff --git a/core/Cargo.toml b/core/Cargo.toml index df42ec84657648..3858eadaf55159 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -17,6 +17,7 @@ codecov = { repository = "solana-labs/solana", branch = "master", service = "git ahash = { workspace = true } anyhow = { workspace = true } arrayvec = { workspace = true } +assert_matches = { workspace = true } base64 = { workspace = true } bincode = { workspace = true } bs58 = { workspace = true } @@ -24,6 +25,7 @@ bytes = { workspace = true } chrono = { workspace = true, features = ["default", "serde"] } crossbeam-channel = { workspace = true } dashmap = { workspace = true, features = ["rayon", "raw-api"] } +derive_more = { workspace = true } etcd-client = { workspace = true, features = ["tls"] } futures = { workspace = true } histogram = { workspace = true } @@ -101,7 +103,6 @@ tokio = { workspace = true, features = ["full"] } trees = { workspace = true } [dev-dependencies] -assert_matches = { workspace = true } fs_extra = { workspace = true } serde_json = { workspace = true } serial_test = { workspace = true } @@ -115,6 +116,7 @@ solana-poh = { workspace = true, features = ["dev-context-only-utils"] } solana-program-runtime = { workspace = true } solana-sdk = { workspace = true, features = ["dev-context-only-utils"] } solana-stake-program = { workspace = true } +solana-unified-scheduler-logic = { workspace = true } solana-unified-scheduler-pool = { workspace = true, features = [ "dev-context-only-utils", ] } diff --git a/core/benches/banking_stage.rs b/core/benches/banking_stage.rs index 0f449719ce34cb..51292d4d267674 100644 --- a/core/benches/banking_stage.rs +++ b/core/benches/banking_stage.rs @@ -2,7 +2,7 @@ #![feature(test)] use { - solana_core::validator::BlockProductionMethod, + solana_core::{banking_trace::Channels, validator::BlockProductionMethod}, solana_vote_program::{vote_state::TowerSync, vote_transaction::new_tower_sync_transaction}, }; @@ -211,9 +211,14 @@ fn bench_banking(bencher: &mut Bencher, tx_type: TransactionType) { genesis_config.ticks_per_slot = 10_000; let banking_tracer = BankingTracer::new_disabled(); - let (non_vote_sender, non_vote_receiver) = banking_tracer.create_channel_non_vote(); - let (tpu_vote_sender, tpu_vote_receiver) = banking_tracer.create_channel_tpu_vote(); - let (gossip_vote_sender, gossip_vote_receiver) = banking_tracer.create_channel_gossip_vote(); + let Channels { + non_vote_sender, + non_vote_receiver, + tpu_vote_sender, + tpu_vote_receiver, + gossip_vote_sender, + gossip_vote_receiver, + } = banking_tracer.create_channels(None); let mut bank = Bank::new_for_benches(&genesis_config); // Allow arbitrary transaction processing time for the purposes of this bench @@ -304,6 +309,7 @@ fn bench_banking(bencher: &mut Bencher, tx_type: TransactionType) { bank_forks, &Arc::new(PrioritizationFeeCache::new(0u64)), false, + None, ); let chunk_len = verified.len() / CHUNKS; diff --git a/core/benches/banking_trace.rs b/core/benches/banking_trace.rs index fb93deebc17ae2..34ab2aaf78f640 100644 --- a/core/benches/banking_trace.rs +++ b/core/benches/banking_trace.rs @@ -7,7 +7,7 @@ use { for_test::{ drop_and_clean_temp_dir_unless_suppressed, sample_packet_batch, terminate_tracer, }, - receiving_loop_with_minimized_sender_overhead, BankingPacketBatch, BankingTracer, + receiving_loop_with_minimized_sender_overhead, BankingPacketBatch, BankingTracer, Channels, TraceError, TracerThreadResult, BANKING_TRACE_DIR_DEFAULT_BYTE_LIMIT, }, std::{ @@ -35,7 +35,11 @@ fn black_box_packet_batch(packet_batch: BankingPacketBatch) -> TracerThreadResul fn bench_banking_tracer_main_thread_overhead_noop_baseline(bencher: &mut Bencher) { let exit = Arc::::default(); let tracer = BankingTracer::new_disabled(); - let (non_vote_sender, non_vote_receiver) = tracer.create_channel_non_vote(); + let Channels { + non_vote_sender, + non_vote_receiver, + .. + } = tracer.create_channels(None); let exit_for_dummy_thread = exit.clone(); let dummy_main_thread = thread::spawn(move || { @@ -64,7 +68,11 @@ fn bench_banking_tracer_main_thread_overhead_under_peak_write(bencher: &mut Benc BANKING_TRACE_DIR_DEFAULT_BYTE_LIMIT, ))) .unwrap(); - let (non_vote_sender, non_vote_receiver) = tracer.create_channel_non_vote(); + let Channels { + non_vote_sender, + non_vote_receiver, + .. + } = tracer.create_channels(None); let exit_for_dummy_thread = exit.clone(); let dummy_main_thread = thread::spawn(move || { @@ -101,7 +109,11 @@ fn bench_banking_tracer_main_thread_overhead_under_sustained_write(bencher: &mut 1024 * 1024, // cause more frequent trace file rotation ))) .unwrap(); - let (non_vote_sender, non_vote_receiver) = tracer.create_channel_non_vote(); + let Channels { + non_vote_sender, + non_vote_receiver, + .. + } = tracer.create_channels(None); let exit_for_dummy_thread = exit.clone(); let dummy_main_thread = thread::spawn(move || { @@ -142,7 +154,11 @@ fn bench_banking_tracer_background_thread_throughput(bencher: &mut Bencher) { let (tracer, tracer_thread) = BankingTracer::new(Some((&path, exit.clone(), 50 * 1024 * 1024))).unwrap(); - let (non_vote_sender, non_vote_receiver) = tracer.create_channel_non_vote(); + let Channels { + non_vote_sender, + non_vote_receiver, + .. + } = tracer.create_channels(None); let dummy_main_thread = thread::spawn(move || { receiving_loop_with_minimized_sender_overhead::<_, TraceError, 0>( diff --git a/core/benches/sigverify_stage.rs b/core/benches/sigverify_stage.rs index 3f11cc150574d3..5ca5a00ccaf9a1 100644 --- a/core/benches/sigverify_stage.rs +++ b/core/benches/sigverify_stage.rs @@ -185,8 +185,9 @@ fn bench_sigverify_stage(bencher: &mut Bencher, use_same_tx: bool) { if let Ok(message) = verified_r.recv_timeout(Duration::from_millis(10)) { let (verifieds, tracer_packet_stats) = (&message.0, message.1.as_ref().unwrap()); received += verifieds.iter().map(|batch| batch.len()).sum::(); - total_tracer_packets_received_in_sigverify_stage += - tracer_packet_stats.total_tracer_packets_received_in_sigverify_stage; + total_tracer_packets_received_in_sigverify_stage += tracer_packet_stats + .total_tracer_packets_received_in_sigverify_stage + .0; test::black_box(message); if total_tracer_packets_received_in_sigverify_stage >= sent_len { break; diff --git a/core/src/banking_simulation.rs b/core/src/banking_simulation.rs index 6e5113ded67336..dbd078d1b28737 100644 --- a/core/src/banking_simulation.rs +++ b/core/src/banking_simulation.rs @@ -1,13 +1,17 @@ #![cfg(feature = "dev-context-only-utils")] use { crate::{ - banking_stage::{BankingStage, LikeClusterInfo}, + banking_stage::{ + update_bank_forks_and_poh_recorder_for_new_tpu_bank, BankingStage, LikeClusterInfo, + }, banking_trace::{ - BankingPacketBatch, BankingTracer, ChannelLabel, TimedTracedEvent, TracedEvent, - TracedSender, TracerThread, BANKING_TRACE_DIR_DEFAULT_BYTE_LIMIT, BASENAME, + BankingPacketBatch, BankingTracer, ChannelLabel, Channels, TimedTracedEvent, + TracedEvent, TracedSender, TracerThread, BANKING_TRACE_DIR_DEFAULT_BYTE_LIMIT, + BASENAME, }, validator::BlockProductionMethod, }, + assert_matches::assert_matches, bincode::deserialize_from, crossbeam_channel::{unbounded, Sender}, itertools::Itertools, @@ -23,7 +27,7 @@ use { }, solana_net_utils::bind_to_localhost, solana_poh::{ - poh_recorder::{PohRecorder, GRACE_TICKS_FACTOR, MAX_GRACE_SLOTS}, + poh_recorder::{NewPohRecorder, PohRecorder, GRACE_TICKS_FACTOR, MAX_GRACE_SLOTS}, poh_service::{PohService, DEFAULT_HASHES_PER_BATCH, DEFAULT_PINNED_CPU_CORE}, }, solana_runtime::{ @@ -42,6 +46,7 @@ use { }, solana_streamer::socket::SocketAddrSpace, solana_turbine::broadcast_stage::{BroadcastStage, BroadcastStageType}, + solana_unified_scheduler_pool::DefaultSchedulerPool, std::{ collections::BTreeMap, fmt::Display, @@ -449,6 +454,9 @@ impl SimulatorLoop { info!("Bank::new_from_parent()!"); logger.log_jitter(&bank); + if let Some((result, _execute_timings)) = bank.wait_for_completed_scheduler() { + assert_matches!(result, Ok(())); + } bank.freeze(); let new_slot = if bank.slot() == self.parent_slot { info!("initial leader block!"); @@ -483,17 +491,17 @@ impl SimulatorLoop { logger.log_frozen_bank_cost(&bank); } self.retransmit_slots_sender.send(bank.slot()).unwrap(); - self.bank_forks.write().unwrap().insert(new_bank); + update_bank_forks_and_poh_recorder_for_new_tpu_bank( + &self.bank_forks, + &self.poh_recorder, + new_bank, + false, + ); bank = self .bank_forks .read() .unwrap() - .working_bank_with_scheduler() - .clone_with_scheduler(); - self.poh_recorder - .write() - .unwrap() - .set_bank(bank.clone_with_scheduler(), false); + .working_bank_with_scheduler(); } else { logger.log_ongoing_bank_cost(&bank); } @@ -672,15 +680,13 @@ impl BankingSimulator { bank_forks: Arc>, blockstore: Arc, block_production_method: BlockProductionMethod, + unified_scheduler_pool: Option>, + new_poh_recorder: Option, ) -> (SenderLoop, SimulatorLoop, SimulatorThreads) { let parent_slot = self.parent_slot().unwrap(); let mut packet_batches_by_time = self.banking_trace_events.packet_batches_by_time; let freeze_time_by_slot = self.banking_trace_events.freeze_time_by_slot; - let bank = bank_forks - .read() - .unwrap() - .working_bank_with_scheduler() - .clone_with_scheduler(); + let bank = bank_forks.read().unwrap().working_bank_with_scheduler(); let leader_schedule_cache = Arc::new(LeaderScheduleCache::new_from_bank(&bank)); assert_eq!(parent_slot, bank.slot()); @@ -693,7 +699,10 @@ impl BankingSimulator { simulated_leader, self.first_simulated_slot, ); - let exit = Arc::new(AtomicBool::default()); + let exit = new_poh_recorder + .as_ref() + .map(|(poh_recorder, ..)| poh_recorder.is_exited.clone()) + .unwrap_or_else(|| Arc::new(AtomicBool::default())); if let Some(end_slot) = blockstore .slot_meta_iterator(self.first_simulated_slot) @@ -711,20 +720,23 @@ impl BankingSimulator { info!("Poh is starting!"); - let (poh_recorder, entry_receiver, record_receiver) = PohRecorder::new_with_clear_signal( - bank.tick_height(), - bank.last_blockhash(), - bank.clone(), - None, - bank.ticks_per_slot(), - false, - blockstore.clone(), - blockstore.get_new_shred_signal(0), - &leader_schedule_cache, - &genesis_config.poh_config, - None, - exit.clone(), - ); + let (poh_recorder, entry_receiver, record_receiver) = + new_poh_recorder.unwrap_or_else(|| { + PohRecorder::new_with_clear_signal( + bank.tick_height(), + bank.last_blockhash(), + bank.clone(), + None, + bank.ticks_per_slot(), + false, + blockstore.clone(), + blockstore.get_new_shred_signal(0), + &leader_schedule_cache, + &genesis_config.poh_config, + None, + exit.clone(), + ) + }); let poh_recorder = Arc::new(RwLock::new(poh_recorder)); let poh_service = PohService::new( poh_recorder.clone(), @@ -758,9 +770,14 @@ impl BankingSimulator { BANKING_TRACE_DIR_DEFAULT_BYTE_LIMIT, ); - let (non_vote_sender, non_vote_receiver) = retracer.create_channel_non_vote(); - let (tpu_vote_sender, tpu_vote_receiver) = retracer.create_channel_tpu_vote(); - let (gossip_vote_sender, gossip_vote_receiver) = retracer.create_channel_gossip_vote(); + let Channels { + non_vote_sender, + non_vote_receiver, + tpu_vote_sender, + tpu_vote_receiver, + gossip_vote_sender, + gossip_vote_receiver, + } = retracer.create_channels(unified_scheduler_pool.as_ref()); let connection_cache = Arc::new(ConnectionCache::new("connection_cache_sim")); let (replay_vote_sender, _replay_vote_receiver) = unbounded(); @@ -815,6 +832,7 @@ impl BankingSimulator { bank_forks.clone(), prioritization_fee_cache, false, + unified_scheduler_pool, ); let (&_slot, &raw_base_event_time) = freeze_time_by_slot @@ -889,12 +907,16 @@ impl BankingSimulator { bank_forks: Arc>, blockstore: Arc, block_production_method: BlockProductionMethod, + unified_scheduler_pool: Option>, + new_poh_recorder: Option, ) -> Result<(), SimulateError> { let (sender_loop, simulator_loop, simulator_threads) = self.prepare_simulation( genesis_config, bank_forks, blockstore, block_production_method, + unified_scheduler_pool, + new_poh_recorder, ); sender_loop.log_starting(); diff --git a/core/src/banking_stage.rs b/core/src/banking_stage.rs index 49ccdb6ae15eff..b487f6fd160965 100644 --- a/core/src/banking_stage.rs +++ b/core/src/banking_stage.rs @@ -2,6 +2,8 @@ //! to construct a software pipeline. The stage uses all available CPU cores and //! can do its processing in parallel with signature verification on the GPU. +#[cfg(feature = "dev-context-only-utils")] +use qualifier_attr::qualifiers; use { self::{ committer::Committer, @@ -37,10 +39,11 @@ use { solana_perf::{data_budget::DataBudget, packet::PACKETS_PER_BATCH}, solana_poh::poh_recorder::{PohRecorder, TransactionRecorder}, solana_runtime::{ - bank_forks::BankForks, prioritization_fee_cache::PrioritizationFeeCache, + bank::Bank, bank_forks::BankForks, prioritization_fee_cache::PrioritizationFeeCache, vote_sender_types::ReplayVoteSender, }, - solana_sdk::{pubkey::Pubkey, timing::AtomicInterval}, + solana_sdk::{pubkey::Pubkey, scheduling::SchedulingMode, timing::AtomicInterval}, + solana_unified_scheduler_pool::{BankingStageAdapter, DefaultSchedulerPool}, std::{ cmp, env, ops::Deref, @@ -364,6 +367,7 @@ impl BankingStage { bank_forks: Arc>, prioritization_fee_cache: &Arc, enable_forwarding: bool, + unified_scheduler_pool: Option>, ) -> Self { Self::new_num_threads( block_production_method, @@ -380,6 +384,7 @@ impl BankingStage { bank_forks, prioritization_fee_cache, enable_forwarding, + unified_scheduler_pool, ) } @@ -399,9 +404,12 @@ impl BankingStage { bank_forks: Arc>, prioritization_fee_cache: &Arc, enable_forwarding: bool, + unified_scheduler_pool: Option>, ) -> Self { + use BlockProductionMethod::*; + match block_production_method { - BlockProductionMethod::CentralScheduler => Self::new_central_scheduler( + CentralScheduler => Self::new_central_scheduler( cluster_info, poh_recorder, non_vote_receiver, @@ -416,6 +424,16 @@ impl BankingStage { prioritization_fee_cache, enable_forwarding, ), + UnifiedScheduler => Self::new_unified_scheduler( + cluster_info, + poh_recorder, + non_vote_receiver, + tpu_vote_receiver, + gossip_vote_receiver, + num_threads, + bank_forks, + unified_scheduler_pool.unwrap(), + ), } } @@ -645,6 +663,69 @@ impl BankingStage { Self { bank_thread_hdls } } + pub fn new_unified_scheduler( + cluster_info: &impl LikeClusterInfo, + poh_recorder: &Arc>, + non_vote_receiver: BankingPacketReceiver, + tpu_vote_receiver: BankingPacketReceiver, + gossip_vote_receiver: BankingPacketReceiver, + _num_threads: u32, + bank_forks: Arc>, + unified_scheduler_pool: Arc, + ) -> Self { + assert!(non_vote_receiver.same_channel(&tpu_vote_receiver)); + assert!(non_vote_receiver.same_channel(&gossip_vote_receiver)); + drop((tpu_vote_receiver, gossip_vote_receiver)); + + let unified_receiver = non_vote_receiver; + let decision_maker = DecisionMaker::new(cluster_info.id(), poh_recorder.clone()); + let banking_stage_monitor = Box::new(decision_maker.clone()); + + unified_scheduler_pool.register_banking_stage( + unified_receiver, + 1, /* todo */ + banking_stage_monitor, + Box::new(move |adapter: Arc| { + let decision_maker = decision_maker.clone(); + let bank_forks = bank_forks.clone(); + + Box::new(move |batches, task_submitter| { + let decision = decision_maker.make_consume_or_forward_decision(); + if matches!(decision, BufferedPacketsDecision::Forward) { + return; + } + let bank = bank_forks.read().unwrap().root_bank(); + for batch in batches.0.iter() { + // over-provision nevertheless some of packets could be invalid. + let task_id_base = adapter.generate_task_ids(batch.len()); + let packets = PacketDeserializer::deserialize_packets_with_indexes(batch); + + for (packet, packet_index) in packets { + let Some((transaction, _deactivation_slot)) = packet + .build_sanitized_transaction( + bank.vote_only_bank(), + &bank, + bank.get_reserved_account_keys(), + ) + else { + continue; + }; + + let index = task_id_base + packet_index; + + let task = adapter.create_new_task(transaction, index); + task_submitter(task); + } + } + }) + }), + ); + + Self { + bank_thread_hdls: vec![], + } + } + fn spawn_thread_local_multi_iterator_thread( id: u32, packet_receiver: BankingPacketReceiver, @@ -809,11 +890,32 @@ impl BankingStage { } } +#[cfg_attr(feature = "dev-context-only-utils", qualifiers(pub))] +pub(crate) fn update_bank_forks_and_poh_recorder_for_new_tpu_bank( + bank_forks: &RwLock, + poh_recorder: &RwLock, + tpu_bank: Bank, + track_transaction_indexes: bool, +) { + // A write lock for the poh recorder must be grabbed for the entire duration of inserting new + // tpu bank into the bank forks. That's because any buffered transactions could immediately be + // executed after the bank forks update, when unified scheduler is enabled for block + // production. And then, the unified scheduler would be hit with false errors due to having no + // bank in the poh recorder otherwise. + let mut poh_recorder = poh_recorder.write().unwrap(); + + let tpu_bank = bank_forks + .write() + .unwrap() + .insert_with_scheduling_mode(SchedulingMode::BlockProduction, tpu_bank); + poh_recorder.set_bank(tpu_bank, track_transaction_indexes); +} + #[cfg(test)] mod tests { use { super::*, - crate::banking_trace::{BankingPacketBatch, BankingTracer}, + crate::banking_trace::{BankingPacketBatch, BankingTracer, Channels}, crossbeam_channel::{unbounded, Receiver}, itertools::Itertools, solana_entry::entry::{self, Entry, EntrySlice}, @@ -874,10 +976,14 @@ mod tests { let genesis_config = create_genesis_config(2).genesis_config; let (bank, bank_forks) = Bank::new_no_wallclock_throttle_for_tests(&genesis_config); let banking_tracer = BankingTracer::new_disabled(); - let (non_vote_sender, non_vote_receiver) = banking_tracer.create_channel_non_vote(); - let (tpu_vote_sender, tpu_vote_receiver) = banking_tracer.create_channel_tpu_vote(); - let (gossip_vote_sender, gossip_vote_receiver) = - banking_tracer.create_channel_gossip_vote(); + let Channels { + non_vote_sender, + non_vote_receiver, + tpu_vote_sender, + tpu_vote_receiver, + gossip_vote_sender, + gossip_vote_receiver, + } = banking_tracer.create_channels(None); let ledger_path = get_tmp_ledger_path_auto_delete!(); { let blockstore = Arc::new( @@ -904,6 +1010,7 @@ mod tests { bank_forks, &Arc::new(PrioritizationFeeCache::new(0u64)), false, + None, ); drop(non_vote_sender); drop(tpu_vote_sender); @@ -926,10 +1033,14 @@ mod tests { let (bank, bank_forks) = Bank::new_no_wallclock_throttle_for_tests(&genesis_config); let start_hash = bank.last_blockhash(); let banking_tracer = BankingTracer::new_disabled(); - let (non_vote_sender, non_vote_receiver) = banking_tracer.create_channel_non_vote(); - let (tpu_vote_sender, tpu_vote_receiver) = banking_tracer.create_channel_tpu_vote(); - let (gossip_vote_sender, gossip_vote_receiver) = - banking_tracer.create_channel_gossip_vote(); + let Channels { + non_vote_sender, + non_vote_receiver, + tpu_vote_sender, + tpu_vote_receiver, + gossip_vote_sender, + gossip_vote_receiver, + } = banking_tracer.create_channels(None); let ledger_path = get_tmp_ledger_path_auto_delete!(); { let blockstore = Arc::new( @@ -960,6 +1071,7 @@ mod tests { bank_forks, &Arc::new(PrioritizationFeeCache::new(0u64)), false, + None, ); trace!("sending bank"); drop(non_vote_sender); @@ -1004,10 +1116,14 @@ mod tests { let (bank, bank_forks) = Bank::new_no_wallclock_throttle_for_tests(&genesis_config); let start_hash = bank.last_blockhash(); let banking_tracer = BankingTracer::new_disabled(); - let (non_vote_sender, non_vote_receiver) = banking_tracer.create_channel_non_vote(); - let (tpu_vote_sender, tpu_vote_receiver) = banking_tracer.create_channel_tpu_vote(); - let (gossip_vote_sender, gossip_vote_receiver) = - banking_tracer.create_channel_gossip_vote(); + let Channels { + non_vote_sender, + non_vote_receiver, + tpu_vote_sender, + tpu_vote_receiver, + gossip_vote_sender, + gossip_vote_receiver, + } = banking_tracer.create_channels(None); let ledger_path = get_tmp_ledger_path_auto_delete!(); { let blockstore = Arc::new( @@ -1040,6 +1156,7 @@ mod tests { bank_forks.clone(), // keep a local-copy of bank-forks so worker threads do not lose weak access to bank-forks &Arc::new(PrioritizationFeeCache::new(0u64)), false, + None, ); // fund another account so we can send 2 good transactions in a single batch. @@ -1138,7 +1255,14 @@ mod tests { .. } = create_slow_genesis_config(2); let banking_tracer = BankingTracer::new_disabled(); - let (non_vote_sender, non_vote_receiver) = banking_tracer.create_channel_non_vote(); + let Channels { + non_vote_sender, + non_vote_receiver, + tpu_vote_sender, + tpu_vote_receiver, + gossip_vote_sender, + gossip_vote_receiver, + } = banking_tracer.create_channels(None); // Process a batch that includes a transaction that receives two lamports. let alice = Keypair::new(); @@ -1168,9 +1292,6 @@ mod tests { .send(BankingPacketBatch::new((packet_batches, None))) .unwrap(); - let (tpu_vote_sender, tpu_vote_receiver) = banking_tracer.create_channel_tpu_vote(); - let (gossip_vote_sender, gossip_vote_receiver) = - banking_tracer.create_channel_gossip_vote(); let ledger_path = get_tmp_ledger_path_auto_delete!(); { let (replay_vote_sender, _replay_vote_receiver) = unbounded(); @@ -1361,10 +1482,14 @@ mod tests { let (bank, bank_forks) = Bank::new_no_wallclock_throttle_for_tests(&genesis_config); let start_hash = bank.last_blockhash(); let banking_tracer = BankingTracer::new_disabled(); - let (non_vote_sender, non_vote_receiver) = banking_tracer.create_channel_non_vote(); - let (tpu_vote_sender, tpu_vote_receiver) = banking_tracer.create_channel_tpu_vote(); - let (gossip_vote_sender, gossip_vote_receiver) = - banking_tracer.create_channel_gossip_vote(); + let Channels { + non_vote_sender, + non_vote_receiver, + tpu_vote_sender, + tpu_vote_receiver, + gossip_vote_sender, + gossip_vote_receiver, + } = banking_tracer.create_channels(None); let ledger_path = get_tmp_ledger_path_auto_delete!(); { let blockstore = Arc::new( @@ -1397,6 +1522,7 @@ mod tests { bank_forks, &Arc::new(PrioritizationFeeCache::new(0u64)), false, + None, ); let keypairs = (0..100).map(|_| Keypair::new()).collect_vec(); diff --git a/core/src/banking_stage/decision_maker.rs b/core/src/banking_stage/decision_maker.rs index 1bd0b224fdf034..a1e0f9fff8cedb 100644 --- a/core/src/banking_stage/decision_maker.rs +++ b/core/src/banking_stage/decision_maker.rs @@ -7,7 +7,8 @@ use { }, pubkey::Pubkey, }, - std::sync::{Arc, RwLock}, + solana_unified_scheduler_pool::{BankingStageMonitor, BankingStageStatus}, + std::sync::{atomic::Ordering::Relaxed, Arc, RwLock}, }; #[derive(Debug, Clone)] @@ -28,9 +29,10 @@ impl BufferedPacketsDecision { } } -#[derive(Clone)] +#[derive(Clone, derive_more::Debug)] pub struct DecisionMaker { my_pubkey: Pubkey, + #[debug("{poh_recorder:p}")] poh_recorder: Arc>, } @@ -112,6 +114,21 @@ impl DecisionMaker { } } +impl BankingStageMonitor for DecisionMaker { + fn status(&self) -> BankingStageStatus { + if self.poh_recorder.read().unwrap().is_exited.load(Relaxed) { + BankingStageStatus::Exited + } else if matches!( + self.make_consume_or_forward_decision(), + BufferedPacketsDecision::Forward, + ) { + BankingStageStatus::Inactive + } else { + BankingStageStatus::Active + } + } +} + #[cfg(test)] mod tests { use { diff --git a/core/src/banking_stage/packet_deserializer.rs b/core/src/banking_stage/packet_deserializer.rs index 78fab3718252f4..b3ed73fb33ce20 100644 --- a/core/src/banking_stage/packet_deserializer.rs +++ b/core/src/banking_stage/packet_deserializer.rs @@ -5,12 +5,9 @@ use { immutable_deserialized_packet::{DeserializedPacketError, ImmutableDeserializedPacket}, packet_filter::PacketFilterFailure, }, - crate::{ - banking_trace::{BankingPacketBatch, BankingPacketReceiver}, - sigverify::SigverifyTracerPacketStats, - }, + crate::banking_trace::{BankingPacketBatch, BankingPacketReceiver}, crossbeam_channel::RecvTimeoutError, - solana_perf::packet::PacketBatch, + solana_perf::packet::{PacketBatch, SigverifyTracerPacketStats}, solana_sdk::saturating_add_assign, std::time::{Duration, Instant}, }; @@ -219,6 +216,18 @@ impl PacketDeserializer { } }) } + + pub(crate) fn deserialize_packets_with_indexes( + packet_batch: &PacketBatch, + ) -> impl Iterator + '_ { + let packet_indexes = PacketDeserializer::generate_packet_indexes(packet_batch); + packet_indexes.into_iter().filter_map(move |packet_index| { + let packet = packet_batch[packet_index].clone(); + ImmutableDeserializedPacket::new(packet) + .ok() + .map(|packet| (packet, packet_index)) + }) + } } #[cfg(test)] diff --git a/core/src/banking_stage/transaction_scheduler/scheduler_controller.rs b/core/src/banking_stage/transaction_scheduler/scheduler_controller.rs index 14a175b2018260..ffa9c09b3bf53a 100644 --- a/core/src/banking_stage/transaction_scheduler/scheduler_controller.rs +++ b/core/src/banking_stage/transaction_scheduler/scheduler_controller.rs @@ -444,7 +444,6 @@ mod tests { transaction_scheduler::receive_and_buffer::SanitizedTransactionReceiveAndBuffer, }, banking_trace::BankingPacketBatch, - sigverify::SigverifyTracerPacketStats, }, crossbeam_channel::{unbounded, Receiver, Sender}, itertools::Itertools, @@ -453,7 +452,9 @@ mod tests { blockstore::Blockstore, genesis_utils::GenesisConfigInfo, get_tmp_ledger_path_auto_delete, leader_schedule_cache::LeaderScheduleCache, }, - solana_perf::packet::{to_packet_batches, PacketBatch, NUM_PACKETS}, + solana_perf::packet::{ + to_packet_batches, PacketBatch, SigverifyTracerPacketStats, NUM_PACKETS, + }, solana_poh::poh_recorder::{PohRecorder, Record, WorkingBankEntry}, solana_runtime::bank::Bank, solana_runtime_transaction::runtime_transaction::RuntimeTransaction, diff --git a/core/src/banking_trace.rs b/core/src/banking_trace.rs index 6e0797c8c3842f..26bb5e486f9c64 100644 --- a/core/src/banking_trace.rs +++ b/core/src/banking_trace.rs @@ -1,11 +1,11 @@ use { - crate::sigverify::SigverifyTracerPacketStats, bincode::serialize_into, chrono::{DateTime, Local}, crossbeam_channel::{unbounded, Receiver, SendError, Sender, TryRecvError}, rolling_file::{RollingCondition, RollingConditionBasic, RollingFileAppender}, - solana_perf::packet::PacketBatch, + solana_perf::packet::{PacketBatch, SigverifyTracerPacketStats}, solana_sdk::{hash::Hash, slot_history::Slot}, + solana_unified_scheduler_pool::DefaultSchedulerPool, std::{ fs::{create_dir_all, remove_dir_all}, io::{self, Write}, @@ -65,7 +65,7 @@ pub struct BankingTracer { #[cfg_attr( feature = "frozen-abi", derive(AbiExample), - frozen_abi(digest = "6PCDw6YSEivfbwhbPmE4NAsXb88ZX6hkFnruP8B38nma") + frozen_abi(digest = "2skEFDxJCXuMq2LmRK7tJQk1Mzh6Xnouu4uJNixn3ezQ") )] #[derive(Serialize, Deserialize, Debug)] pub struct TimedTracedEvent(pub std::time::SystemTime, pub TracedEvent); @@ -178,6 +178,15 @@ pub fn receiving_loop_with_minimized_sender_overhead( Ok(()) } +pub struct Channels { + pub non_vote_sender: BankingPacketSender, + pub non_vote_receiver: BankingPacketReceiver, + pub tpu_vote_sender: BankingPacketSender, + pub tpu_vote_receiver: BankingPacketReceiver, + pub gossip_vote_sender: BankingPacketSender, + pub gossip_vote_receiver: BankingPacketReceiver, +} + impl BankingTracer { pub fn new( maybe_config: Option<(&PathBuf, Arc, DirByteLimit)>, @@ -220,22 +229,80 @@ impl BankingTracer { self.active_tracer.is_some() } + pub fn create_channels(&self, pool: Option<&Arc>) -> Channels { + if let Some(true) = pool.map(|pool| pool.block_production_supported()) { + let (non_vote_sender, non_vote_receiver) = self.create_channel_non_vote(); + let (tpu_vote_sender, tpu_vote_receiver) = + self.create_unified_channel_tpu_vote(&non_vote_sender, &non_vote_receiver); + let (gossip_vote_sender, gossip_vote_receiver) = + self.create_unified_channel_gossip_vote(&non_vote_sender, &non_vote_receiver); + + Channels { + non_vote_sender, + non_vote_receiver, + tpu_vote_sender, + tpu_vote_receiver, + gossip_vote_sender, + gossip_vote_receiver, + } + } else { + let (non_vote_sender, non_vote_receiver) = self.create_channel_non_vote(); + let (tpu_vote_sender, tpu_vote_receiver) = self.create_channel_tpu_vote(); + let (gossip_vote_sender, gossip_vote_receiver) = self.create_channel_gossip_vote(); + + Channels { + non_vote_sender, + non_vote_receiver, + tpu_vote_sender, + tpu_vote_receiver, + gossip_vote_sender, + gossip_vote_receiver, + } + } + } + fn create_channel(&self, label: ChannelLabel) -> (BankingPacketSender, BankingPacketReceiver) { Self::channel(label, self.active_tracer.as_ref().cloned()) } - pub fn create_channel_non_vote(&self) -> (BankingPacketSender, BankingPacketReceiver) { + fn create_channel_non_vote(&self) -> (BankingPacketSender, BankingPacketReceiver) { self.create_channel(ChannelLabel::NonVote) } - pub fn create_channel_tpu_vote(&self) -> (BankingPacketSender, BankingPacketReceiver) { + fn create_channel_tpu_vote(&self) -> (BankingPacketSender, BankingPacketReceiver) { self.create_channel(ChannelLabel::TpuVote) } - pub fn create_channel_gossip_vote(&self) -> (BankingPacketSender, BankingPacketReceiver) { + fn create_channel_gossip_vote(&self) -> (BankingPacketSender, BankingPacketReceiver) { self.create_channel(ChannelLabel::GossipVote) } + fn create_unified_channel_tpu_vote( + &self, + sender: &TracedSender, + receiver: &BankingPacketReceiver, + ) -> (BankingPacketSender, BankingPacketReceiver) { + Self::channel_inner( + ChannelLabel::TpuVote, + self.active_tracer.as_ref().cloned(), + sender.sender.clone(), + receiver.clone(), + ) + } + + fn create_unified_channel_gossip_vote( + &self, + sender: &TracedSender, + receiver: &BankingPacketReceiver, + ) -> (BankingPacketSender, BankingPacketReceiver) { + Self::channel_inner( + ChannelLabel::GossipVote, + self.active_tracer.as_ref().cloned(), + sender.sender.clone(), + receiver.clone(), + ) + } + pub fn hash_event(&self, slot: Slot, blockhash: &Hash, bank_hash: &Hash) { self.trace_event(|| { TimedTracedEvent( @@ -264,6 +331,15 @@ impl BankingTracer { active_tracer: Option, ) -> (TracedSender, Receiver) { let (sender, receiver) = unbounded(); + Self::channel_inner(label, active_tracer, sender, receiver) + } + + fn channel_inner( + label: ChannelLabel, + active_tracer: Option, + sender: Sender, + receiver: BankingPacketReceiver, + ) -> (TracedSender, Receiver) { (TracedSender::new(label, sender, active_tracer), receiver) } diff --git a/core/src/cluster_info_vote_listener.rs b/core/src/cluster_info_vote_listener.rs index 56869624812940..526d297dc21dca 100644 --- a/core/src/cluster_info_vote_listener.rs +++ b/core/src/cluster_info_vote_listener.rs @@ -194,6 +194,7 @@ impl ClusterInfoVoteListener { verified_packets_sender: BankingPacketSender, vote_tracker: Arc, bank_forks: Arc>, + mut root_bank_cache: RootBankCache, subscriptions: Arc, verified_vote_sender: VerifiedVoteSender, gossip_verified_vote_hash_sender: GossipVerifiedVoteHashSender, @@ -205,7 +206,6 @@ impl ClusterInfoVoteListener { let (verified_vote_transactions_sender, verified_vote_transactions_receiver) = unbounded(); let listen_thread = { let exit = exit.clone(); - let mut root_bank_cache = RootBankCache::new(bank_forks.clone()); Builder::new() .name("solCiVoteLstnr".to_string()) .spawn(move || { diff --git a/core/src/replay_stage.rs b/core/src/replay_stage.rs index 9b998ca9a99549..681cb7051d4afd 100644 --- a/core/src/replay_stage.rs +++ b/core/src/replay_stage.rs @@ -2,6 +2,7 @@ use { crate::{ + banking_stage::update_bank_forks_and_poh_recorder_for_new_tpu_bank, banking_trace::BankingTracer, cache_block_meta_service::CacheBlockMetaSender, cluster_info_vote_listener::{ @@ -70,6 +71,7 @@ use { hash::Hash, pubkey::Pubkey, saturating_add_assign, + scheduling::SchedulingMode, signature::{Keypair, Signature, Signer}, timing::timestamp, transaction::Transaction, @@ -2220,11 +2222,12 @@ impl ReplayStage { // new()-ing of its child bank banking_tracer.hash_event(parent.slot(), &parent.last_blockhash(), &parent.hash()); - let tpu_bank = bank_forks.write().unwrap().insert(tpu_bank); - poh_recorder - .write() - .unwrap() - .set_bank(tpu_bank, track_transaction_indexes); + update_bank_forks_and_poh_recorder_for_new_tpu_bank( + bank_forks, + poh_recorder, + tpu_bank, + track_transaction_indexes, + ); true } else { error!("{} No next leader found", my_pubkey); @@ -2804,6 +2807,28 @@ impl ReplayStage { } } + fn wait_for_cleared_bank(bank: BankWithScheduler) { + if matches!( + bank.scheduling_mode(), + Some(SchedulingMode::BlockProduction) + ) { + info!("Reaping cleared tpu_bank: {}...", bank.slot()); + if let Some((result, _completed_execute_timings)) = bank.wait_for_completed_scheduler() + { + info!( + "Reaped aborted tpu_bank with unified scheduler: {} {:?}", + bank.slot(), + result + ); + } else { + info!( + "Skipped to reap a tpu_bank (seems unified scheduler is disabled): {}", + bank.slot() + ); + } + } + } + fn reset_poh_recorder( my_pubkey: &Pubkey, blockstore: &Blockstore, @@ -2822,7 +2847,10 @@ impl ReplayStage { GRACE_TICKS_FACTOR * MAX_GRACE_SLOTS, ); - poh_recorder.write().unwrap().reset(bank, next_leader_slot); + let cleared_bank = poh_recorder.write().unwrap().reset(bank, next_leader_slot); + if let Some(cleared_bank) = cleared_bank { + Self::wait_for_cleared_bank(cleared_bank); + } let next_leader_msg = if let Some(next_leader_slot) = next_leader_slot { format!("My next leader slot is {}", next_leader_slot.0) diff --git a/core/src/sigverify.rs b/core/src/sigverify.rs index 18984ecc4ef836..e384b1cb3eab89 100644 --- a/core/src/sigverify.rs +++ b/core/src/sigverify.rs @@ -12,49 +12,15 @@ use { banking_trace::{BankingPacketBatch, BankingPacketSender}, sigverify_stage::{SigVerifier, SigVerifyServiceError}, }, - solana_perf::{cuda_runtime::PinnedVec, packet::PacketBatch, recycler::Recycler, sigverify}, - solana_sdk::{packet::Packet, saturating_add_assign}, + solana_perf::{ + cuda_runtime::PinnedVec, + packet::{PacketBatch, SigverifyTracerPacketStats}, + recycler::Recycler, + sigverify, + }, + solana_sdk::packet::Packet, }; -#[cfg_attr(feature = "frozen-abi", derive(AbiExample))] -#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize)] -pub struct SigverifyTracerPacketStats { - pub total_removed_before_sigverify_stage: usize, - pub total_tracer_packets_received_in_sigverify_stage: usize, - pub total_tracer_packets_deduped: usize, - pub total_excess_tracer_packets: usize, - pub total_tracker_packets_passed_sigverify: usize, -} - -impl SigverifyTracerPacketStats { - pub fn is_default(&self) -> bool { - *self == SigverifyTracerPacketStats::default() - } - - pub fn aggregate(&mut self, other: &SigverifyTracerPacketStats) { - saturating_add_assign!( - self.total_removed_before_sigverify_stage, - other.total_removed_before_sigverify_stage - ); - saturating_add_assign!( - self.total_tracer_packets_received_in_sigverify_stage, - other.total_tracer_packets_received_in_sigverify_stage - ); - saturating_add_assign!( - self.total_tracer_packets_deduped, - other.total_tracer_packets_deduped - ); - saturating_add_assign!( - self.total_excess_tracer_packets, - other.total_excess_tracer_packets - ); - saturating_add_assign!( - self.total_tracker_packets_passed_sigverify, - other.total_tracker_packets_passed_sigverify - ); - } -} - pub struct TransactionSigVerifier { packet_sender: BankingPacketSender, tracer_packet_stats: SigverifyTracerPacketStats, diff --git a/core/src/sigverify_stage.rs b/core/src/sigverify_stage.rs index ac7d9889db0ed8..3059e4528a319b 100644 --- a/core/src/sigverify_stage.rs +++ b/core/src/sigverify_stage.rs @@ -579,10 +579,13 @@ mod tests { loop { if let Ok(message) = verified_r.recv() { let (verifieds, tracer_packet_stats) = (&message.0, message.1.as_ref().unwrap()); - total_tracer_packets_received_in_sigverify_stage += - tracer_packet_stats.total_tracer_packets_received_in_sigverify_stage; + total_tracer_packets_received_in_sigverify_stage += tracer_packet_stats + .total_tracer_packets_received_in_sigverify_stage + .0; assert_eq!( - tracer_packet_stats.total_tracer_packets_received_in_sigverify_stage + tracer_packet_stats + .total_tracer_packets_received_in_sigverify_stage + .0 % packets_per_batch, 0, ); @@ -594,27 +597,28 @@ mod tests { // Also have to account for the fact that deduper could be cleared periodically, // in which case the first transaction in the next batch won't be deduped assert!( - (tracer_packet_stats.total_tracer_packets_deduped + (tracer_packet_stats.total_tracer_packets_deduped.0 == tracer_packet_stats .total_tracer_packets_received_in_sigverify_stage + .0 - 1) || (tracer_packet_stats.total_tracer_packets_deduped == tracer_packet_stats .total_tracer_packets_received_in_sigverify_stage) ); assert!( - (tracer_packet_stats.total_tracker_packets_passed_sigverify == 1) - || (tracer_packet_stats.total_tracker_packets_passed_sigverify == 0) + (tracer_packet_stats.total_tracker_packets_passed_sigverify.0 == 1) + || (tracer_packet_stats.total_tracker_packets_passed_sigverify.0 == 0) ); } else { - assert_eq!(tracer_packet_stats.total_tracer_packets_deduped, 0); + assert_eq!(tracer_packet_stats.total_tracer_packets_deduped.0, 0); assert!( (tracer_packet_stats.total_tracker_packets_passed_sigverify == tracer_packet_stats .total_tracer_packets_received_in_sigverify_stage) ); } - assert_eq!(tracer_packet_stats.total_excess_tracer_packets, 0); + assert_eq!(tracer_packet_stats.total_excess_tracer_packets.0, 0); received += verifieds.iter().map(|batch| batch.len()).sum::(); } diff --git a/core/src/tpu.rs b/core/src/tpu.rs index 091a5901c2311e..837f3c1d5a4e26 100644 --- a/core/src/tpu.rs +++ b/core/src/tpu.rs @@ -5,7 +5,7 @@ pub use solana_sdk::net::DEFAULT_TPU_COALESCE; use { crate::{ banking_stage::BankingStage, - banking_trace::{BankingTracer, TracerThread}, + banking_trace::{BankingTracer, Channels, TracerThread}, cluster_info_vote_listener::{ ClusterInfoVoteListener, DuplicateConfirmedSlotsSender, GossipVerifiedVoteHashSender, VerifiedVoteSender, VoteTracker, @@ -33,6 +33,7 @@ use { solana_runtime::{ bank_forks::BankForks, prioritization_fee_cache::PrioritizationFeeCache, + root_bank_cache::RootBankCache, vote_sender_types::{ReplayVoteReceiver, ReplayVoteSender}, }, solana_sdk::{clock::Slot, pubkey::Pubkey, quic::NotifyKeyUpdate, signature::Keypair}, @@ -44,6 +45,7 @@ use { streamer::StakedNodes, }, solana_turbine::broadcast_stage::{BroadcastStage, BroadcastStageType}, + solana_unified_scheduler_pool::DefaultSchedulerPool, std::{ collections::HashMap, net::{SocketAddr, UdpSocket}, @@ -99,6 +101,7 @@ impl Tpu { shred_version: u16, vote_tracker: Arc, bank_forks: Arc>, + root_bank_cache: RootBankCache, verified_vote_sender: VerifiedVoteSender, gossip_verified_vote_hash_sender: GossipVerifiedVoteHashSender, replay_vote_receiver: ReplayVoteReceiver, @@ -120,6 +123,7 @@ impl Tpu { block_production_method: BlockProductionMethod, enable_block_production_forwarding: bool, _generator_config: Option, /* vestigial code for replay invalidator */ + unified_scheduler_pool: Option>, ) -> (Self, Vec>) { let TpuSockets { transactions: transactions_sockets, @@ -156,7 +160,14 @@ impl Tpu { shared_staked_nodes_overrides, ); - let (non_vote_sender, non_vote_receiver) = banking_tracer.create_channel_non_vote(); + let Channels { + non_vote_sender, + non_vote_receiver, + tpu_vote_sender, + tpu_vote_receiver, + gossip_vote_sender, + gossip_vote_receiver, + } = banking_tracer.create_channels(unified_scheduler_pool.as_ref()); // Streamer for Votes: let SpawnServerResult { @@ -235,8 +246,6 @@ impl Tpu { SigVerifyStage::new(packet_receiver, verifier, "solSigVerTpu", "tpu-verifier") }; - let (tpu_vote_sender, tpu_vote_receiver) = banking_tracer.create_channel_tpu_vote(); - let vote_sigverify_stage = { let verifier = TransactionSigVerifier::new_reject_non_vote(tpu_vote_sender); SigVerifyStage::new( @@ -247,14 +256,13 @@ impl Tpu { ) }; - let (gossip_vote_sender, gossip_vote_receiver) = - banking_tracer.create_channel_gossip_vote(); let cluster_info_vote_listener = ClusterInfoVoteListener::new( exit.clone(), cluster_info.clone(), gossip_vote_sender, vote_tracker, bank_forks.clone(), + root_bank_cache, subscriptions.clone(), verified_vote_sender, gossip_verified_vote_hash_sender, @@ -278,6 +286,7 @@ impl Tpu { bank_forks.clone(), prioritization_fee_cache, enable_block_production_forwarding, + unified_scheduler_pool, ); let (entry_receiver, tpu_entry_notifier) = diff --git a/core/src/tracer_packet_stats.rs b/core/src/tracer_packet_stats.rs index 2269b35cc702fb..a9ff49c4d701e3 100644 --- a/core/src/tracer_packet_stats.rs +++ b/core/src/tracer_packet_stats.rs @@ -1,5 +1,5 @@ use { - crate::sigverify::SigverifyTracerPacketStats, + solana_perf::packet::SigverifyTracerPacketStats, solana_sdk::{pubkey::Pubkey, saturating_add_assign, timing::timestamp}, std::collections::HashSet, }; @@ -128,14 +128,14 @@ impl TracerPacketStats { "total_removed_before_sigverify", modifiable_tracer_packet_stats .sigverify_tracer_packet_stats - .total_removed_before_sigverify_stage as i64, + .total_removed_before_sigverify_stage.0 as i64, i64 ), ( "total_tracer_packets_received_in_sigverify", modifiable_tracer_packet_stats .sigverify_tracer_packet_stats - .total_tracer_packets_received_in_sigverify_stage + .total_tracer_packets_received_in_sigverify_stage.0 as i64, i64 ), @@ -143,21 +143,21 @@ impl TracerPacketStats { "total_tracer_packets_deduped_in_sigverify", modifiable_tracer_packet_stats .sigverify_tracer_packet_stats - .total_tracer_packets_deduped as i64, + .total_tracer_packets_deduped.0 as i64, i64 ), ( "total_excess_tracer_packets_discarded_in_sigverify", modifiable_tracer_packet_stats .sigverify_tracer_packet_stats - .total_excess_tracer_packets as i64, + .total_excess_tracer_packets.0 as i64, i64 ), ( "total_tracker_packets_passed_sigverify", modifiable_tracer_packet_stats .sigverify_tracer_packet_stats - .total_tracker_packets_passed_sigverify as i64, + .total_tracker_packets_passed_sigverify.0 as i64, i64 ), ( diff --git a/core/src/validator.rs b/core/src/validator.rs index e2d05f3f09ede7..e894b267b6679b 100644 --- a/core/src/validator.rs +++ b/core/src/validator.rs @@ -105,6 +105,7 @@ use { bank_forks::BankForks, commitment::BlockCommitmentCache, prioritization_fee_cache::PrioritizationFeeCache, + root_bank_cache::RootBankCache, runtime_config::RuntimeConfig, snapshot_archive_info::SnapshotArchiveInfoGetter, snapshot_bank_utils::{self, DISABLED_SNAPSHOT_ARCHIVE_INTERVAL}, @@ -120,6 +121,7 @@ use { hard_forks::HardForks, hash::Hash, pubkey::Pubkey, + scheduling::SchedulingMode, shred_version::compute_shred_version, signature::{Keypair, Signer}, timing::timestamp, @@ -127,7 +129,7 @@ use { solana_send_transaction_service::send_transaction_service, solana_streamer::{socket::SocketAddrSpace, streamer::StakedNodes}, solana_turbine::{self, broadcast_stage::BroadcastStageType}, - solana_unified_scheduler_pool::DefaultSchedulerPool, + solana_unified_scheduler_pool::{DefaultSchedulerPool, SupportedSchedulingMode}, solana_vote_program::vote_state, solana_wen_restart::wen_restart::{wait_for_wen_restart, WenRestartConfig}, std::{ @@ -184,11 +186,14 @@ impl BlockVerificationMethod { } } -#[derive(Clone, EnumString, EnumVariantNames, Default, IntoStaticStr, Display)] +#[derive( + Clone, EnumCount, EnumIter, EnumString, EnumVariantNames, Default, IntoStaticStr, Display, +)] #[strum(serialize_all = "kebab-case")] pub enum BlockProductionMethod { - #[default] CentralScheduler, + #[default] + UnifiedScheduler, } impl BlockProductionMethod { @@ -208,6 +213,23 @@ impl BlockProductionMethod { } } +pub fn supported_scheduling_mode( + (verification, production): (&BlockVerificationMethod, &BlockProductionMethod), +) -> SupportedSchedulingMode { + match (verification, production) { + (BlockVerificationMethod::UnifiedScheduler, BlockProductionMethod::UnifiedScheduler) => { + SupportedSchedulingMode::Both + } + (BlockVerificationMethod::UnifiedScheduler, _) => { + SupportedSchedulingMode::Either(SchedulingMode::BlockVerification) + } + (_, BlockProductionMethod::UnifiedScheduler) => { + SupportedSchedulingMode::Either(SchedulingMode::BlockProduction) + } + _ => unreachable!("seems unified scheduler is disabled"), + } +} + /// Configuration for the block generator invalidator for replay. #[derive(Clone, Debug)] pub struct GeneratorConfig { @@ -866,32 +888,61 @@ impl Validator { // (by both replay stage and banking stage) let prioritization_fee_cache = Arc::new(PrioritizationFeeCache::default()); - match &config.block_verification_method { - BlockVerificationMethod::BlockstoreProcessor => { - info!("no scheduler pool is installed for block verification..."); - if let Some(count) = config.unified_scheduler_handler_threads { - warn!( - "--unified-scheduler-handler-threads={count} is ignored because unified \ - scheduler isn't enabled" - ); - } - } - BlockVerificationMethod::UnifiedScheduler => { - let scheduler_pool = DefaultSchedulerPool::new_dyn( + let leader_schedule_cache = Arc::new(leader_schedule_cache); + let startup_verification_complete; + let (poh_recorder, entry_receiver, record_receiver) = { + let bank = &bank_forks.read().unwrap().working_bank(); + startup_verification_complete = Arc::clone(bank.get_startup_verification_complete()); + PohRecorder::new_with_clear_signal( + bank.tick_height(), + bank.last_blockhash(), + bank.clone(), + None, + bank.ticks_per_slot(), + config.delay_leader_block_for_pending_fork, + blockstore.clone(), + blockstore.get_new_shred_signal(0), + &leader_schedule_cache, + &genesis_config.poh_config, + Some(poh_timing_point_sender), + exit.clone(), + ) + }; + let poh_recorder = Arc::new(RwLock::new(poh_recorder)); + + let unified_scheduler_pool = match ( + &config.block_verification_method, + &config.block_production_method, + ) { + methods @ (BlockVerificationMethod::UnifiedScheduler, _) + | methods @ (_, BlockProductionMethod::UnifiedScheduler) => { + let pool = DefaultSchedulerPool::new( + supported_scheduling_mode(methods), config.unified_scheduler_handler_threads, config.runtime_config.log_messages_bytes_limit, transaction_status_sender.clone(), Some(replay_vote_sender.clone()), prioritization_fee_cache.clone(), + poh_recorder.read().unwrap().new_recorder(), ); bank_forks .write() .unwrap() - .install_scheduler_pool(scheduler_pool); + .install_scheduler_pool(pool.clone()); + Some(pool) } - } + _ => { + info!("no scheduler pool is installed for block verification/production..."); + if let Some(count) = config.unified_scheduler_handler_threads { + warn!( + "--unified-scheduler-handler-threads={count} is ignored because unified \ + scheduler isn't enabled" + ); + } + None + } + }; - let leader_schedule_cache = Arc::new(leader_schedule_cache); let entry_notification_sender = entry_notifier_service .as_ref() .map(|service| service.sender()); @@ -967,27 +1018,6 @@ impl Validator { let max_slots = Arc::new(MaxSlots::default()); - let startup_verification_complete; - let (poh_recorder, entry_receiver, record_receiver) = { - let bank = &bank_forks.read().unwrap().working_bank(); - startup_verification_complete = Arc::clone(bank.get_startup_verification_complete()); - PohRecorder::new_with_clear_signal( - bank.tick_height(), - bank.last_blockhash(), - bank.clone(), - None, - bank.ticks_per_slot(), - config.delay_leader_block_for_pending_fork, - blockstore.clone(), - blockstore.get_new_shred_signal(0), - &leader_schedule_cache, - &genesis_config.poh_config, - Some(poh_timing_point_sender), - exit.clone(), - ) - }; - let poh_recorder = Arc::new(RwLock::new(poh_recorder)); - let staked_nodes = Arc::new(RwLock::new(StakedNodes::default())); let connection_cache = match use_quic { @@ -1367,6 +1397,7 @@ impl Validator { let cluster_slots = Arc::new(crate::cluster_slots_service::cluster_slots::ClusterSlots::default()); + let root_bank_cache = RootBankCache::new(bank_forks.clone()); let tvu = Tvu::new( vote_account, authorized_voter_keypairs, @@ -1474,6 +1505,7 @@ impl Validator { node.info.shred_version(), vote_tracker, bank_forks.clone(), + root_bank_cache, verified_vote_sender, gossip_verified_vote_hash_sender, replay_vote_receiver, @@ -1495,6 +1527,7 @@ impl Validator { config.block_production_method.clone(), config.enable_block_production_forwarding, config.generator_config.clone(), + unified_scheduler_pool, ); datapoint_info!( diff --git a/core/tests/unified_scheduler.rs b/core/tests/unified_scheduler.rs index 75795f2f6c01ee..9a8b66fc04e64e 100644 --- a/core/tests/unified_scheduler.rs +++ b/core/tests/unified_scheduler.rs @@ -17,16 +17,13 @@ use { solana_ledger::genesis_utils::create_genesis_config, solana_runtime::{ accounts_background_service::AbsRequestSender, bank::Bank, bank_forks::BankForks, - genesis_utils::GenesisConfigInfo, prioritization_fee_cache::PrioritizationFeeCache, + genesis_utils::GenesisConfigInfo, installed_scheduler_pool::SchedulingContext, + prioritization_fee_cache::PrioritizationFeeCache, }, solana_runtime_transaction::runtime_transaction::RuntimeTransaction, - solana_sdk::{ - hash::Hash, - pubkey::Pubkey, - system_transaction, - transaction::{Result, SanitizedTransaction}, - }, + solana_sdk::{hash::Hash, pubkey::Pubkey, system_transaction, transaction::Result}, solana_timings::ExecuteTimings, + solana_unified_scheduler_logic::Task, solana_unified_scheduler_pool::{ DefaultTaskHandler, HandlerContext, PooledScheduler, SchedulerPool, TaskHandler, }, @@ -48,9 +45,8 @@ fn test_scheduler_waited_by_drop_bank_service() { fn handle( result: &mut Result<()>, timings: &mut ExecuteTimings, - bank: &Arc, - transaction: &RuntimeTransaction, - index: usize, + scheduling_context: &SchedulingContext, + task: &Task, handler_context: &HandlerContext, ) { info!("Stalling at StallingHandler::handle()..."); @@ -59,7 +55,7 @@ fn test_scheduler_waited_by_drop_bank_service() { std::thread::sleep(std::time::Duration::from_secs(3)); info!("Now entering into DefaultTaskHandler::handle()..."); - DefaultTaskHandler::handle(result, timings, bank, transaction, index, handler_context); + DefaultTaskHandler::handle(result, timings, scheduling_context, task, handler_context); } } @@ -73,7 +69,7 @@ fn test_scheduler_waited_by_drop_bank_service() { let genesis_bank = Bank::new_for_tests(&genesis_config); let bank_forks = BankForks::new_rw_arc(genesis_bank); let ignored_prioritization_fee_cache = Arc::new(PrioritizationFeeCache::new(0u64)); - let pool_raw = SchedulerPool::, _>::new( + let pool_raw = SchedulerPool::, _>::new_for_verification( None, None, None, diff --git a/ledger-tool/Cargo.toml b/ledger-tool/Cargo.toml index d8e63900bd4474..899f549a0dcf96 100644 --- a/ledger-tool/Cargo.toml +++ b/ledger-tool/Cargo.toml @@ -42,6 +42,7 @@ solana-ledger = { workspace = true, features = ["dev-context-only-utils"] } solana-log-collector = { workspace = true } solana-logger = { workspace = true } solana-measure = { workspace = true } +solana-poh = { workspace = true } solana-program-runtime = { workspace = true } solana-rpc = { workspace = true } solana-runtime = { workspace = true, features = ["dev-context-only-utils"] } diff --git a/ledger-tool/src/ledger_utils.rs b/ledger-tool/src/ledger_utils.rs index 03856acbbf756c..89814bedc5ef4a 100644 --- a/ledger-tool/src/ledger_utils.rs +++ b/ledger-tool/src/ledger_utils.rs @@ -10,7 +10,8 @@ use { solana_core::{ accounts_hash_verifier::AccountsHashVerifier, rewards_recorder_service::RewardsRecorderService, - snapshot_packager_service::PendingSnapshotPackages, validator::BlockVerificationMethod, + snapshot_packager_service::PendingSnapshotPackages, + validator::{supported_scheduling_mode, BlockProductionMethod, BlockVerificationMethod}, }, solana_geyser_plugin_manager::geyser_plugin_service::{ GeyserPluginService, GeyserPluginServiceError, @@ -27,6 +28,7 @@ use { use_snapshot_archives_at_startup::UseSnapshotArchivesAtStartup, }, solana_measure::measure_time, + solana_poh::poh_recorder::{NewPohRecorder, PohRecorder}, solana_rpc::transaction_status_service::TransactionStatusService, solana_runtime::{ accounts_background_service::{ @@ -66,6 +68,8 @@ pub struct LoadAndProcessLedgerOutput { // not. It is safe to let ABS continue in the background, and ABS will stop // if/when it finally checks the exit flag pub accounts_background_service: AccountsBackgroundService, + pub unified_scheduler_pool: Option>, + pub new_poh_recorder: Option, } const PROCESS_SLOTS_HELP_STRING: &str = @@ -350,43 +354,86 @@ pub fn load_and_process_ledger( exit.clone(), ) .map_err(LoadAndProcessLedgerError::LoadBankForks)?; + let leader_schedule_cache = Arc::new(leader_schedule_cache); let block_verification_method = value_t!( arg_matches, "block_verification_method", BlockVerificationMethod ) .unwrap_or_default(); + let block_production_method = value_t!( + arg_matches, + "block_production_method", + BlockProductionMethod + ) + .inspect(|method| { + if matches!(method, BlockProductionMethod::UnifiedScheduler) + && !arg_matches.is_present("enable_experimental_block_production_method") + { + error!( + "Currently, the unified-scheduler method is experimental for block-production. \ + Explicitly pass --enable-experimental-block-production-method to supress this error" + ); + } + }) + .unwrap_or_default(); info!( - "Using: block-verification-method: {}", - block_verification_method, + "Using: block-verification-method: {}, block-production-method: {}", + block_verification_method, block_production_method ); let unified_scheduler_handler_threads = value_t!(arg_matches, "unified_scheduler_handler_threads", usize).ok(); - match block_verification_method { - BlockVerificationMethod::BlockstoreProcessor => { - info!("no scheduler pool is installed for block verification..."); - if let Some(count) = unified_scheduler_handler_threads { - warn!( - "--unified-scheduler-handler-threads={count} is ignored because unified \ - scheduler isn't enabled" + let (unified_scheduler_pool, new_poh_recorder) = + match (&block_verification_method, &block_production_method) { + methods @ (BlockVerificationMethod::UnifiedScheduler, _) + | methods @ (_, BlockProductionMethod::UnifiedScheduler) => { + let no_replay_vote_sender = None; + let ignored_prioritization_fee_cache = Arc::new(PrioritizationFeeCache::new(0u64)); + + let exit = Arc::new(AtomicBool::new(false)); + let poh_bank = bank_forks.read().unwrap().working_bank(); + let new_poh_recorder = PohRecorder::new_with_clear_signal( + poh_bank.tick_height(), + poh_bank.last_blockhash(), + poh_bank.clone(), + None, + poh_bank.ticks_per_slot(), + false, + blockstore.clone(), + blockstore.get_new_shred_signal(0), + &leader_schedule_cache, + &genesis_config.poh_config, + None, + exit.clone(), ); - } - } - BlockVerificationMethod::UnifiedScheduler => { - let no_replay_vote_sender = None; - let ignored_prioritization_fee_cache = Arc::new(PrioritizationFeeCache::new(0u64)); - bank_forks - .write() - .unwrap() - .install_scheduler_pool(DefaultSchedulerPool::new_dyn( + drop(poh_bank); + + let pool = DefaultSchedulerPool::new( + supported_scheduling_mode(methods), unified_scheduler_handler_threads, process_options.runtime_config.log_messages_bytes_limit, transaction_status_sender.clone(), no_replay_vote_sender, ignored_prioritization_fee_cache, - )); - } - } + new_poh_recorder.0.new_recorder(), + ); + bank_forks + .write() + .unwrap() + .install_scheduler_pool(pool.clone()); + (Some(pool), Some(new_poh_recorder)) + } + _ => { + info!("no scheduler pool is installed for block verification/production..."); + if let Some(count) = unified_scheduler_handler_threads { + warn!( + "--unified-scheduler-handler-threads={count} is ignored because unified \ + scheduler isn't enabled" + ); + } + (None, None) + } + }; let pending_snapshot_packages = Arc::new(Mutex::new(PendingSnapshotPackages::default())); let (accounts_package_sender, accounts_package_receiver) = crossbeam_channel::unbounded(); @@ -436,6 +483,8 @@ pub fn load_and_process_ledger( bank_forks, starting_snapshot_hashes, accounts_background_service, + unified_scheduler_pool, + new_poh_recorder, }) .map_err(LoadAndProcessLedgerError::ProcessBlockstoreFromRoot); diff --git a/ledger-tool/src/main.rs b/ledger-tool/src/main.rs index 4580c0b7731abe..30471c9d5f84d6 100644 --- a/ledger-tool/src/main.rs +++ b/ledger-tool/src/main.rs @@ -991,6 +991,15 @@ fn main() { .global(true) .help(DefaultSchedulerPool::cli_message()), ) + .arg( + Arg::with_name("enable_experimental_block_production_method") + .long("enable-experimental-block-production-method") + .takes_value(false) + .help( + "Accept unified-scheduler to be used as an experimental block \ + production method", + ), + ) .arg( Arg::with_name("output_format") .long("output") @@ -2068,6 +2077,7 @@ fn main() { bank_forks, starting_snapshot_hashes, accounts_background_service, + .. } = load_and_process_ledger_or_exit( arg_matches, &genesis_config, @@ -2500,14 +2510,18 @@ fn main() { AccessType::Primary, // needed for purging already existing simulated block shreds... )); let genesis_config = open_genesis_config_by(&ledger_path, arg_matches); - let LoadAndProcessLedgerOutput { bank_forks, .. } = - load_and_process_ledger_or_exit( - arg_matches, - &genesis_config, - blockstore.clone(), - process_options, - None, // transaction status sender - ); + let LoadAndProcessLedgerOutput { + bank_forks, + unified_scheduler_pool, + new_poh_recorder, + .. + } = load_and_process_ledger_or_exit( + arg_matches, + &genesis_config, + blockstore.clone(), + process_options, + None, // transaction status sender + ); let block_production_method = value_t!( arg_matches, @@ -2516,13 +2530,13 @@ fn main() { ) .unwrap_or_default(); - info!("Using: block-production-method: {block_production_method}"); - match simulator.start( genesis_config, bank_forks, blockstore, block_production_method, + unified_scheduler_pool, + new_poh_recorder, ) { Ok(()) => println!("Ok"), Err(error) => { diff --git a/ledger/benches/blockstore_processor.rs b/ledger/benches/blockstore_processor.rs index 44f65db1d54fd4..711c5381b63b8c 100644 --- a/ledger/benches/blockstore_processor.rs +++ b/ledger/benches/blockstore_processor.rs @@ -162,6 +162,7 @@ fn bench_execute_batch( &mut timing, None, &prioritization_fee_cache, + None:: Option>>, ); } }); diff --git a/ledger/src/blockstore_processor.rs b/ledger/src/blockstore_processor.rs index 2a514286b6ed3a..499fd9eadf655f 100644 --- a/ledger/src/blockstore_processor.rs +++ b/ledger/src/blockstore_processor.rs @@ -110,6 +110,7 @@ fn first_err(results: &[Result<()>]) -> Result<()> { fn get_first_error( batch: &TransactionBatch, commit_results: &[TransactionCommitResult], + is_unified_scheduler_for_block_production: bool, ) -> Option<(Result<()>, Signature)> { let mut first_err = None; for (commit_result, transaction) in commit_results.iter().zip(batch.sanitized_transactions()) { @@ -117,18 +118,20 @@ fn get_first_error( if first_err.is_none() { first_err = Some((Err(err.clone()), *transaction.signature())); } - warn!( - "Unexpected validator error: {:?}, transaction: {:?}", - err, transaction - ); - datapoint_error!( - "validator_process_entry_error", - ( - "error", - format!("error: {err:?}, transaction: {transaction:?}"), - String - ) - ); + if !is_unified_scheduler_for_block_production { + warn!( + "Unexpected validator error: {:?}, transaction: {:?}", + err, transaction + ); + datapoint_error!( + "validator_process_entry_error", + ( + "error", + format!("error: {err:?}, transaction: {transaction:?}"), + String + ) + ); + } } } first_err @@ -150,12 +153,14 @@ pub fn execute_batch( timings: &mut ExecuteTimings, log_messages_bytes_limit: Option, prioritization_fee_cache: &PrioritizationFeeCache, + pre_commit_callback: Option Option>>, ) -> Result<()> { let TransactionBatchWithIndexes { batch, transaction_indexes, } = batch; let record_token_balances = transaction_status_sender.is_some(); + let mut transaction_indexes = transaction_indexes.to_vec(); let mut mint_decimals: HashMap = HashMap::new(); @@ -165,14 +170,32 @@ pub fn execute_batch( vec![] }; - let (commit_results, balances) = batch.bank().load_execute_and_commit_transactions( + let is_unified_scheduler_for_block_production = pre_commit_callback.is_some(); + let pre_commit_callback = pre_commit_callback.map(|original_callback| { + || { + if let Some(maybe_index) = original_callback() { + if let Some(index) = maybe_index { + assert!(transaction_indexes.is_empty()); + transaction_indexes.push(index); + } + true + } else { + false + } + } + }); + + let Some((commit_results, balances)) = batch.bank().do_load_execute_and_commit_transactions( batch, MAX_PROCESSING_AGE, transaction_status_sender.is_some(), ExecutionRecordingConfig::new_single_setting(transaction_status_sender.is_some()), timings, log_messages_bytes_limit, - ); + pre_commit_callback, + ) else { + return Err(TransactionError::CommitFailed); + }; bank_utils::find_and_send_votes( batch.sanitized_transactions(), @@ -201,7 +224,11 @@ pub fn execute_batch( .filter_map(|(commit_result, tx)| commit_result.was_committed().then_some(tx)) .collect_vec(); - let first_err = get_first_error(batch, &commit_results); + let first_err = get_first_error( + batch, + &commit_results, + is_unified_scheduler_for_block_production, + ); if let Some(transaction_status_sender) = transaction_status_sender { let transactions: Vec = batch @@ -224,7 +251,7 @@ pub fn execute_batch( commit_results, balances, token_balances, - transaction_indexes.to_vec(), + transaction_indexes, ); } @@ -322,6 +349,7 @@ fn execute_batches_internal( &mut timings, log_messages_bytes_limit, prioritization_fee_cache, + None:: Option>>, )); let thread_index = replay_tx_thread_pool.current_thread_index().unwrap(); @@ -4427,7 +4455,7 @@ pub mod tests { &mut ExecuteTimings::default(), None, ); - let (err, signature) = get_first_error(&batch, &commit_results).unwrap(); + let (err, signature) = get_first_error(&batch, &commit_results, false).unwrap(); assert_eq!(err.unwrap_err(), TransactionError::AccountNotFound); assert_eq!(signature, account_not_found_sig); } @@ -4992,16 +5020,17 @@ pub mod tests { .. } = create_genesis_config_with_leader(500, &dummy_leader_pubkey, 100); let bank = Arc::new(Bank::new_for_tests(&genesis_config)); - let context = SchedulingContext::new(bank.clone()); + let context = SchedulingContext::for_verification(bank.clone()); let txs = create_test_transactions(&mint_keypair, &genesis_config.hash()); let mut mocked_scheduler = MockInstalledScheduler::new(); let seq = Arc::new(Mutex::new(mockall::Sequence::new())); let seq_cloned = seq.clone(); + // Used for assertions in BankWithScheduler::{new, schedule_transaction_executions} mocked_scheduler .expect_context() - .times(1) + .times(2) .in_sequence(&mut seq.lock().unwrap()) .return_const(context); if should_succeed { diff --git a/local-cluster/tests/local_cluster.rs b/local-cluster/tests/local_cluster.rs index 7f847d848a0ada..3989ba5f6495c3 100644 --- a/local-cluster/tests/local_cluster.rs +++ b/local-cluster/tests/local_cluster.rs @@ -16,7 +16,7 @@ use { }, optimistic_confirmation_verifier::OptimisticConfirmationVerifier, replay_stage::DUPLICATE_THRESHOLD, - validator::{BlockVerificationMethod, ValidatorConfig}, + validator::{BlockProductionMethod, BlockVerificationMethod, ValidatorConfig}, }, solana_download_utils::download_snapshot_archive, solana_entry::entry::create_ticks, @@ -5799,6 +5799,43 @@ fn test_randomly_mixed_block_verification_methods_between_bootstrap_and_not() { ); } +#[test] +#[serial] +fn test_randomly_mixed_block_production_methods_between_bootstrap_and_not() { + // tailored logging just to see two block production methods are working correctly + solana_logger::setup_with_default( + "solana_metrics::metrics=warn,\ + solana_core=warn,\ + solana_runtime::installed_scheduler_pool=trace,\ + solana_ledger::blockstore_processor=debug,\ + info", + ); + + let num_nodes = BlockVerificationMethod::COUNT; + let mut config = ClusterConfig::new_with_equal_stakes( + num_nodes, + DEFAULT_CLUSTER_LAMPORTS, + DEFAULT_NODE_STAKE, + ); + + // Overwrite block_production_method with shuffled variants + let mut methods = BlockProductionMethod::iter().collect::>(); + methods.shuffle(&mut rand::thread_rng()); + for (validator_config, method) in config.validator_configs.iter_mut().zip_eq(methods) { + validator_config.block_production_method = method; + } + + let local = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified); + cluster_tests::spend_and_verify_all_nodes( + &local.entry_point_info, + &local.funding_keypair, + num_nodes, + HashSet::new(), + SocketAddrSpace::Unspecified, + &local.connection_cache, + ); +} + /// Forks previous marked invalid should be marked as such in fork choice on restart #[test] #[ignore] diff --git a/perf/Cargo.toml b/perf/Cargo.toml index 4af056343ba218..16b28dbab2d28f 100644 --- a/perf/Cargo.toml +++ b/perf/Cargo.toml @@ -13,6 +13,7 @@ edition = { workspace = true } ahash = { workspace = true } bincode = { workspace = true } bv = { workspace = true, features = ["serde"] } +crossbeam-channel = { workspace = true } curve25519-dalek = { workspace = true } dlopen2 = { workspace = true } fnv = { workspace = true } diff --git a/perf/src/packet.rs b/perf/src/packet.rs index 73c29bc53788a9..a5a8ae5dda9d9b 100644 --- a/perf/src/packet.rs +++ b/perf/src/packet.rs @@ -8,8 +8,10 @@ use { std::{ io::Read, net::SocketAddr, + num::Saturating, ops::{Index, IndexMut}, slice::{Iter, IterMut, SliceIndex}, + sync::Arc, }, }; @@ -226,6 +228,38 @@ pub fn to_packet_batches(items: &[T], chunk_size: usize) -> Vec, Option)>; +pub type BankingPacketReceiver = crossbeam_channel::Receiver< + std::sync::Arc<( + Vec, + std::option::Option, + )>, +>; +#[cfg_attr(feature = "frozen-abi", derive(AbiExample))] +#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct SigverifyTracerPacketStats { + pub total_removed_before_sigverify_stage: Saturating, + pub total_tracer_packets_received_in_sigverify_stage: Saturating, + pub total_tracer_packets_deduped: Saturating, + pub total_excess_tracer_packets: Saturating, + pub total_tracker_packets_passed_sigverify: Saturating, +} + +impl SigverifyTracerPacketStats { + pub fn is_default(&self) -> bool { + *self == SigverifyTracerPacketStats::default() + } + + pub fn aggregate(&mut self, other: &SigverifyTracerPacketStats) { + self.total_removed_before_sigverify_stage += other.total_removed_before_sigverify_stage; + self.total_tracer_packets_received_in_sigverify_stage += + other.total_tracer_packets_received_in_sigverify_stage; + self.total_tracer_packets_deduped += other.total_tracer_packets_deduped; + self.total_excess_tracer_packets += other.total_excess_tracer_packets; + self.total_tracker_packets_passed_sigverify += other.total_tracker_packets_passed_sigverify; + } +} + #[cfg(test)] fn to_packet_batches_for_tests(items: &[T]) -> Vec { to_packet_batches(items, NUM_PACKETS) diff --git a/poh/src/poh_recorder.rs b/poh/src/poh_recorder.rs index 8b95ecec039d64..91788dc3bb434c 100644 --- a/poh/src/poh_recorder.rs +++ b/poh/src/poh_recorder.rs @@ -140,7 +140,7 @@ pub struct RecordTransactionsSummary { pub starting_transaction_index: Option, } -#[derive(Clone)] +#[derive(Clone, Debug)] pub struct TransactionRecorder { // shared by all users of PohRecorder pub record_sender: Sender, @@ -155,6 +155,13 @@ impl TransactionRecorder { } } + pub fn new_dummy() -> Self { + Self { + record_sender: crossbeam_channel::unbounded().0, + is_exited: Arc::new(AtomicBool::default()), + } + } + /// Hashes `transactions` and sends to PoH service for recording. Waits for response up to 1s. /// Panics on unexpected (non-`MaxHeightReached`) errors. pub fn record_transactions( @@ -313,8 +320,11 @@ pub struct PohRecorder { pub is_exited: Arc, } +pub type NewPohRecorder = (PohRecorder, Receiver, Receiver); + impl PohRecorder { - fn clear_bank(&mut self) { + fn clear_bank(&mut self) -> Option { + let mut cleared_bank = None; if let Some(WorkingBank { bank, start, .. }) = self.working_bank.take() { self.leader_bank_notifier.set_completed(bank.slot()); let next_leader_slot = self.leader_schedule_cache.next_leader_slot( @@ -340,6 +350,7 @@ impl PohRecorder { ("slot", bank.slot(), i64), ("elapsed", start.elapsed().as_millis(), i64), ); + cleared_bank = Some(bank); } if let Some(ref signal) = self.clear_bank_signal { @@ -353,6 +364,7 @@ impl PohRecorder { } } } + cleared_bank } pub fn would_be_leader(&self, within_next_n_ticks: u64) -> bool { @@ -661,8 +673,12 @@ impl PohRecorder { } // synchronize PoH with a bank - pub fn reset(&mut self, reset_bank: Arc, next_leader_slot: Option<(Slot, Slot)>) { - self.clear_bank(); + pub fn reset( + &mut self, + reset_bank: Arc, + next_leader_slot: Option<(Slot, Slot)>, + ) -> Option { + let cleared_bank = self.clear_bank(); self.reset_poh(reset_bank, true); if let Some(ref sender) = self.poh_timing_point_sender { @@ -683,6 +699,7 @@ impl PohRecorder { self.leader_first_tick_height_including_grace_ticks = leader_first_tick_height_including_grace_ticks; self.leader_last_tick_height = leader_last_tick_height; + cleared_bank } pub fn set_bank(&mut self, bank: BankWithScheduler, track_transaction_indexes: bool) { @@ -1028,7 +1045,7 @@ impl PohRecorder { poh_config: &PohConfig, poh_timing_point_sender: Option, is_exited: Arc, - ) -> (Self, Receiver, Receiver) { + ) -> NewPohRecorder { let tick_number = 0; let poh = Arc::new(Mutex::new(Poh::new_with_slot_info( last_entry_hash, @@ -1098,7 +1115,7 @@ impl PohRecorder { leader_schedule_cache: &Arc, poh_config: &PohConfig, is_exited: Arc, - ) -> (Self, Receiver, Receiver) { + ) -> NewPohRecorder { let delay_leader_block_for_pending_fork = false; Self::new_with_clear_signal( tick_height, diff --git a/programs/sbf/Cargo.lock b/programs/sbf/Cargo.lock index 4aabc16f3c955e..597732a8967921 100644 --- a/programs/sbf/Cargo.lock +++ b/programs/sbf/Cargo.lock @@ -1109,6 +1109,15 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e" +[[package]] +name = "convert_case" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec182b0ca2f35d8fc196cf3404988fd8b8c739a4d270ff118a398feb0cbec1ca" +dependencies = [ + "unicode-segmentation", +] + [[package]] name = "core-foundation" version = "0.9.3" @@ -1387,13 +1396,35 @@ version = "0.99.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4fb810d30a7c1953f91334de7244731fc3f3c10d7fe163338a35b9f640960321" dependencies = [ - "convert_case", + "convert_case 0.4.0", "proc-macro2", "quote", "rustc_version", "syn 1.0.109", ] +[[package]] +name = "derive_more" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a9b99b9cbbe49445b21764dc0625032a89b145a2642e67603e1c936f5458d05" +dependencies = [ + "derive_more-impl", +] + +[[package]] +name = "derive_more-impl" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb7330aeadfbe296029522e6c40f315320aba36fc43a5b3632f3795348f3bd22" +dependencies = [ + "convert_case 0.6.0", + "proc-macro2", + "quote", + "syn 2.0.87", + "unicode-xid", +] + [[package]] name = "dialoguer" version = "0.10.4" @@ -1502,6 +1533,12 @@ version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1435fa1053d8b2fbbe9be7e97eca7f33d37b28409959813daefc1446a14247f1" +[[package]] +name = "dyn-clone" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d6ef0072f8a535281e4876be788938b528e9a1d43900b82c2569af7da799125" + [[package]] name = "eager" version = "0.1.0" @@ -2665,7 +2702,7 @@ version = "18.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d2b99d4207e2a04fb4581746903c2bb7eb376f88de9c699d0f3e10feeac0cd3a" dependencies = [ - "derive_more", + "derive_more 0.99.17", "futures 0.3.31", "jsonrpc-core", "jsonrpc-pubsub", @@ -5368,6 +5405,7 @@ dependencies = [ "ahash 0.8.11", "anyhow", "arrayvec", + "assert_matches", "base64 0.22.1", "bincode", "bs58", @@ -5375,6 +5413,7 @@ dependencies = [ "chrono", "crossbeam-channel", "dashmap", + "derive_more 1.0.0", "etcd-client", "futures 0.3.31", "histogram", @@ -6053,6 +6092,7 @@ dependencies = [ "bincode", "bv", "caps", + "crossbeam-channel", "curve25519-dalek 4.1.3", "dlopen2", "fnv", @@ -6592,6 +6632,7 @@ dependencies = [ "ahash 0.8.11", "aquamarine", "arrayref", + "assert_matches", "base64 0.22.1", "bincode", "blake3", @@ -7979,16 +8020,20 @@ dependencies = [ "crossbeam-channel", "dashmap", "derive-where", + "derive_more 1.0.0", + "dyn-clone", "log", - "qualifier_attr", "scopeguard", "solana-ledger", + "solana-perf", + "solana-poh", "solana-runtime", "solana-runtime-transaction", "solana-sdk", "solana-timings", "solana-unified-scheduler-logic", "static_assertions", + "trait-set", "vec_extract_if_polyfill", ] @@ -9206,6 +9251,17 @@ dependencies = [ "tracing-core", ] +[[package]] +name = "trait-set" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b79e2e9c9ab44c6d7c20d5976961b47e8f49ac199154daa514b77cd1ab536625" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "trees" version = "0.4.2" @@ -9281,6 +9337,12 @@ dependencies = [ "tinyvec", ] +[[package]] +name = "unicode-segmentation" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" + [[package]] name = "unicode-width" version = "0.1.8" diff --git a/runtime/Cargo.toml b/runtime/Cargo.toml index b7eefcea5a76b2..f3fd96ef7e9b15 100644 --- a/runtime/Cargo.toml +++ b/runtime/Cargo.toml @@ -13,6 +13,7 @@ edition = { workspace = true } ahash = { workspace = true } aquamarine = { workspace = true } arrayref = { workspace = true } +assert_matches = { workspace = true } base64 = { workspace = true } bincode = { workspace = true } blake3 = { workspace = true } @@ -104,7 +105,6 @@ name = "solana_runtime" [dev-dependencies] agave-transaction-view = { workspace = true } -assert_matches = { workspace = true } ed25519-dalek = { workspace = true } libsecp256k1 = { workspace = true } memoffset = { workspace = true } diff --git a/runtime/src/bank.rs b/runtime/src/bank.rs index 68f735b3d011c4..957ee60d834fe7 100644 --- a/runtime/src/bank.rs +++ b/runtime/src/bank.rs @@ -256,7 +256,7 @@ struct RentMetrics { pub type BankStatusCache = StatusCache>; #[cfg_attr( feature = "frozen-abi", - frozen_abi(digest = "BHg4qpwegtaJypLUqAdjQYzYeLfEGf6tA4U5cREbHMHi") + frozen_abi(digest = "Fj6ATu6Rr5ossAykzbRSkCsuUzjdAZbYo5JaqfR1A72G") )] pub type BankSlotDelta = SlotDelta>; @@ -5029,6 +5029,29 @@ impl Bank { timings: &mut ExecuteTimings, log_messages_bytes_limit: Option, ) -> (Vec, TransactionBalancesSet) { + self.do_load_execute_and_commit_transactions( + batch, + max_age, + collect_balances, + recording_config, + timings, + log_messages_bytes_limit, + None:: bool>, + ) + .unwrap() + } + + #[must_use] + pub fn do_load_execute_and_commit_transactions( + &self, + batch: &TransactionBatch, + max_age: usize, + collect_balances: bool, + recording_config: ExecutionRecordingConfig, + timings: &mut ExecuteTimings, + log_messages_bytes_limit: Option, + pre_commit_callback: Option bool>, + ) -> Option<(Vec, TransactionBalancesSet)> { let pre_balances = if collect_balances { self.collect_balances(batch) } else { @@ -5054,6 +5077,15 @@ impl Bank { }, ); + if let Some(pre_commit_callback) = pre_commit_callback { + if let Some(e) = processing_results.first() { + assert_eq!(processing_results.len(), 1); + if e.is_ok() && !pre_commit_callback() { + return None; + } + } + } + let commit_results = self.commit_transactions( batch.sanitized_transactions(), processing_results, @@ -5065,10 +5097,10 @@ impl Bank { } else { vec![] }; - ( + Some(( commit_results, TransactionBalancesSet::new(pre_balances, post_balances), - ) + )) } /// Process a Transaction. This is used for unit tests and simply calls the vector diff --git a/runtime/src/bank_forks.rs b/runtime/src/bank_forks.rs index 83c2e0ab3fd675..4ff20a1503faa3 100644 --- a/runtime/src/bank_forks.rs +++ b/runtime/src/bank_forks.rs @@ -17,6 +17,7 @@ use { solana_sdk::{ clock::{BankId, Slot}, hash::Hash, + scheduling::SchedulingMode, }, std::{ collections::{hash_map::Entry, HashMap, HashSet}, @@ -226,18 +227,22 @@ impl BankForks { ); } - pub fn insert(&mut self, mut bank: Bank) -> BankWithScheduler { + pub fn insert(&mut self, bank: Bank) -> BankWithScheduler { + self.insert_with_scheduling_mode(SchedulingMode::BlockVerification, bank) + } + + pub fn insert_with_scheduling_mode( + &mut self, + mode: SchedulingMode, + mut bank: Bank, + ) -> BankWithScheduler { if self.root.load(Ordering::Relaxed) < self.highest_slot_at_startup { bank.set_check_program_modification_slot(true); } let bank = Arc::new(bank); let bank = if let Some(scheduler_pool) = &self.scheduler_pool { - let context = SchedulingContext::new(bank.clone()); - let scheduler = scheduler_pool.take_scheduler(context); - let bank_with_scheduler = BankWithScheduler::new(bank, Some(scheduler)); - scheduler_pool.register_timeout_listener(bank_with_scheduler.create_timeout_listener()); - bank_with_scheduler + Self::install_scheduler_into_bank(scheduler_pool, mode, bank, false) } else { BankWithScheduler::new_without_scheduler(bank) }; @@ -251,6 +256,42 @@ impl BankForks { bank } + fn install_scheduler_into_bank( + scheduler_pool: &InstalledSchedulerPoolArc, + mode: SchedulingMode, + bank: Arc, + is_reinstall: bool, + ) -> BankWithScheduler { + trace!( + "Inserting bank (slot: {}) with scheduler (mode: {:?}, reinstall: {:?})", + bank.slot(), + mode, + is_reinstall, + ); + let context = SchedulingContext::new(mode, Some(bank.clone())); + let Some(scheduler) = scheduler_pool.take_scheduler(context) else { + return BankWithScheduler::new_without_scheduler(bank); + }; + let bank_with_scheduler = BankWithScheduler::new(bank, Some(scheduler)); + scheduler_pool.register_timeout_listener(bank_with_scheduler.create_timeout_listener()); + bank_with_scheduler + } + + #[cfg(feature = "dev-context-only-utils")] + pub fn reinstall_block_production_scheduler_into_working_genesis_bank( + &mut self, + ) -> BankWithScheduler { + let bank = self.working_bank(); + assert!(self.banks.len() == 1 && bank.slot() == 0 && !bank.is_frozen()); + let pool = self.scheduler_pool.as_ref().unwrap(); + let mode = SchedulingMode::BlockProduction; + let bank = Self::install_scheduler_into_bank(pool, mode, bank, true); + self.banks + .insert(bank.slot(), bank.clone_with_scheduler()) + .expect("some removed bank"); + bank + } + pub fn insert_from_ledger(&mut self, bank: Bank) -> BankWithScheduler { self.highest_slot_at_startup = std::cmp::max(self.highest_slot_at_startup, bank.slot()); self.insert(bank) @@ -284,8 +325,8 @@ impl BankForks { self[self.highest_slot()].clone() } - pub fn working_bank_with_scheduler(&self) -> &BankWithScheduler { - &self.banks[&self.highest_slot()] + pub fn working_bank_with_scheduler(&self) -> BankWithScheduler { + self.banks[&self.highest_slot()].clone_with_scheduler() } /// Register to be notified when a bank has been dumped (due to duplicate block handling) diff --git a/runtime/src/installed_scheduler_pool.rs b/runtime/src/installed_scheduler_pool.rs index 9aa4a20e09c558..4cc58bcfbe818e 100644 --- a/runtime/src/installed_scheduler_pool.rs +++ b/runtime/src/installed_scheduler_pool.rs @@ -22,11 +22,13 @@ use { crate::bank::Bank, + assert_matches::assert_matches, log::*, solana_runtime_transaction::runtime_transaction::RuntimeTransaction, solana_sdk::{ clock::Slot, hash::Hash, + scheduling::SchedulingMode, transaction::{Result, SanitizedTransaction, TransactionError}, }, solana_timings::ExecuteTimings, @@ -46,7 +48,7 @@ pub fn initialized_result_with_timings() -> ResultWithTimings { } pub trait InstalledSchedulerPool: Send + Sync + Debug { - fn take_scheduler(&self, context: SchedulingContext) -> InstalledSchedulerBox { + fn take_scheduler(&self, context: SchedulingContext) -> Option { self.take_resumed_scheduler(context, initialized_result_with_timings()) } @@ -54,7 +56,7 @@ pub trait InstalledSchedulerPool: Send + Sync + Debug { &self, context: SchedulingContext, result_with_timings: ResultWithTimings, - ) -> InstalledSchedulerBox; + ) -> Option; fn register_timeout_listener(&self, timeout_listener: TimeoutListener); } @@ -227,21 +229,30 @@ pub type SchedulerId = u64; /// `SchedulingContext`s. #[derive(Clone, Debug)] pub struct SchedulingContext { - // mode: SchedulingMode, // this will be added later. - bank: Arc, + mode: SchedulingMode, + bank: Option>, } impl SchedulingContext { - pub fn new(bank: Arc) -> Self { - Self { bank } + pub fn new(mode: SchedulingMode, bank: Option>) -> Self { + Self { mode, bank } + } + + #[cfg_attr(feature = "dev-context-only-utils", qualifiers(pub))] + fn for_verification(bank: Arc) -> Self { + Self::new(SchedulingMode::BlockVerification, Some(bank)) + } + + pub fn mode(&self) -> SchedulingMode { + self.mode } pub fn bank(&self) -> &Arc { - &self.bank + self.bank.as_ref().unwrap() } - pub fn slot(&self) -> Slot { - self.bank().slot() + pub fn slot(&self) -> Option { + self.bank.as_ref().map(|bank| bank.slot()) } } @@ -301,7 +312,7 @@ pub enum SchedulerStatus { /// Scheduler is idling for long time, returning scheduler back to the pool. /// This will be immediately (i.e. transaparently) transitioned to Active as soon as there's /// new transaction to be executed. - Stale(InstalledSchedulerPoolArc, ResultWithTimings), + Stale(InstalledSchedulerPoolArc, SchedulingMode, ResultWithTimings), } impl SchedulerStatus { @@ -312,13 +323,23 @@ impl SchedulerStatus { } } + fn scheduling_mode(&self) -> Option { + match self { + SchedulerStatus::Unavailable => None, + SchedulerStatus::Active(scheduler) => Some(scheduler.context().mode()), + SchedulerStatus::Stale(_, mode, _) => Some(*mode), + } + } + fn transition_from_stale_to_active( &mut self, f: impl FnOnce(InstalledSchedulerPoolArc, ResultWithTimings) -> InstalledSchedulerBox, ) { - let Self::Stale(pool, result_with_timings) = mem::replace(self, Self::Unavailable) else { + let Self::Stale(pool, mode, result_with_timings) = mem::replace(self, Self::Unavailable) + else { panic!("transition to Active failed: {self:?}"); }; + assert_matches!(mode, SchedulingMode::BlockVerification); *self = Self::Active(f(pool, result_with_timings)); } @@ -332,8 +353,9 @@ impl SchedulerStatus { let Self::Active(scheduler) = mem::replace(self, Self::Unavailable) else { unreachable!("not active: {self:?}"); }; + let mode = scheduler.context().mode; let (pool, result_with_timings) = f(scheduler); - *self = Self::Stale(pool, result_with_timings); + *self = Self::Stale(pool, mode, result_with_timings); } fn transition_from_active_to_unavailable(&mut self) -> InstalledSchedulerBox { @@ -344,7 +366,8 @@ impl SchedulerStatus { } fn transition_from_stale_to_unavailable(&mut self) -> ResultWithTimings { - let Self::Stale(_pool, result_with_timings) = mem::replace(self, Self::Unavailable) else { + let Self::Stale(_pool, _mode, result_with_timings) = mem::replace(self, Self::Unavailable) + else { panic!("transition to Unavailable failed: {self:?}"); }; result_with_timings @@ -454,6 +477,10 @@ impl BankWithScheduler { ); let schedule_result: ScheduleResult = self.inner.with_active_scheduler(|scheduler| { + assert_matches!( + scheduler.context().mode(), + SchedulingMode::BlockVerification + ); for (sanitized_transaction, index) in transactions_with_indexes { scheduler.schedule_execution(sanitized_transaction, index)?; } @@ -507,6 +534,10 @@ impl BankWithScheduler { ) } + pub fn scheduling_mode(&self) -> Option { + self.inner.scheduler.read().unwrap().scheduling_mode() + } + pub const fn no_scheduler_available() -> InstalledSchedulerRwLock { RwLock::new(SchedulerStatus::Unavailable) } @@ -523,22 +554,26 @@ impl BankWithSchedulerInner { // This is the fast path, needing single read-lock most of time. f(scheduler) } - SchedulerStatus::Stale(_pool, (result, _timings)) if result.is_err() => { + SchedulerStatus::Stale(_pool, mode, (result, _timings)) if result.is_err() => { + assert_matches!(mode, SchedulingMode::BlockVerification); trace!( "with_active_scheduler: bank (slot: {}) has a stale aborted scheduler...", self.bank.slot(), ); Err(SchedulerAborted) } - SchedulerStatus::Stale(pool, _result_with_timings) => { + SchedulerStatus::Stale(pool, mode, _result_with_timings) => { + assert_matches!(mode, SchedulingMode::BlockVerification); let pool = pool.clone(); drop(scheduler); - let context = SchedulingContext::new(self.bank.clone()); + let context = SchedulingContext::for_verification(self.bank.clone()); let mut scheduler = self.scheduler.write().unwrap(); trace!("with_active_scheduler: {:?}", scheduler); scheduler.transition_from_stale_to_active(|pool, result_with_timings| { - let scheduler = pool.take_resumed_scheduler(context, result_with_timings); + let scheduler = pool + .take_resumed_scheduler(context, result_with_timings) + .expect("successful retaking"); info!( "with_active_scheduler: bank (slot: {}) got active, taking scheduler (id: {})", self.bank.slot(), @@ -571,23 +606,24 @@ impl BankWithSchedulerInner { return; }; + let mut id = None; scheduler.maybe_transition_from_active_to_stale(|scheduler| { // The scheduler hasn't still been wait_for_termination()-ed after awhile... // Return the installed scheduler back to the scheduler pool as soon as the // scheduler gets idle after executing all currently-scheduled transactions. - let id = scheduler.id(); + id = Some(scheduler.id()); let (result_with_timings, uninstalled_scheduler) = scheduler.wait_for_termination(false); uninstalled_scheduler.return_to_pool(); info!( - "timeout_listener: bank (slot: {}) got stale, returning scheduler (id: {})", + "timeout_listener: bank (slot: {}) got stale, returned scheduler (id: {:?})", bank.bank.slot(), id, ); (pool, result_with_timings) }); - trace!("timeout_listener: {:?}", scheduler); + trace!("timeout_listener: {:?}", id); }) } @@ -597,7 +633,8 @@ impl BankWithSchedulerInner { let mut scheduler = self.scheduler.write().unwrap(); match &mut *scheduler { SchedulerStatus::Active(scheduler) => scheduler.recover_error_after_abort(), - SchedulerStatus::Stale(_pool, (result, _timings)) if result.is_err() => { + SchedulerStatus::Stale(_pool, mode, (result, _timings)) if result.is_err() => { + assert_matches!(mode, SchedulingMode::BlockVerification); result.clone().unwrap_err() } _ => unreachable!("no error in {:?}", self.scheduler), @@ -639,12 +676,12 @@ impl BankWithSchedulerInner { uninstalled_scheduler.return_to_pool(); (false, Some(result_with_timings)) } - SchedulerStatus::Stale(_pool, _result_with_timings) if reason.is_paused() => { + SchedulerStatus::Stale(_pool, _mode, _result_with_timings) if reason.is_paused() => { // Do nothing for pauses because the scheduler termination is guaranteed to be // called later. (true, None) } - SchedulerStatus::Stale(_pool, _result_with_timings) => { + SchedulerStatus::Stale(_pool, _mode, _result_with_timings) => { let result_with_timings = scheduler.transition_from_stale_to_unavailable(); (true, Some(result_with_timings)) } @@ -711,7 +748,6 @@ mod tests { bank::test_utils::goto_end_of_slot_with_scheduler, genesis_utils::{create_genesis_config, GenesisConfigInfo}, }, - assert_matches::assert_matches, mockall::Sequence, solana_sdk::system_transaction, std::sync::Mutex, @@ -721,14 +757,16 @@ mod tests { bank: Arc, is_dropped_flags: impl Iterator, f: Option, + extra_context_use: usize, ) -> InstalledSchedulerBox { let mut mock = MockInstalledScheduler::new(); let seq = Arc::new(Mutex::new(Sequence::new())); + // Could be used for assertions in BankWithScheduler::{new, schedule_transaction_executions} mock.expect_context() - .times(1) + .times(1 + extra_context_use) .in_sequence(&mut seq.lock().unwrap()) - .return_const(SchedulingContext::new(bank)); + .return_const(SchedulingContext::for_verification(bank)); for wait_reason in is_dropped_flags { let seq_cloned = seq.clone(); @@ -765,6 +803,7 @@ mod tests { bank, is_dropped_flags, None:: ()>, + 0, ) } @@ -826,6 +865,7 @@ mod tests { .times(1) .returning(|| ()); }), + 0, )), ); goto_end_of_slot_with_scheduler(&bank); @@ -867,6 +907,7 @@ mod tests { .returning(|| TransactionError::InsufficientFundsForFee); } }), + 1, ); let bank = BankWithScheduler::new(bank, Some(mocked_scheduler)); diff --git a/sdk/frozen-abi/src/abi_example.rs b/sdk/frozen-abi/src/abi_example.rs index 63b3c1d68c28d6..46eae5a5ee4b85 100644 --- a/sdk/frozen-abi/src/abi_example.rs +++ b/sdk/frozen-abi/src/abi_example.rs @@ -189,6 +189,13 @@ example_impls! { i32, 0 } example_impls! { i64, 0 } example_impls! { i128, 0 } +impl AbiExample for std::num::Saturating { + fn example() -> Self { + info!("AbiExample for (Saturating): {}", type_name::()); + std::num::Saturating(T::example()) + } +} + example_impls! { f32, 0.0f32 } example_impls! { f64, 0.0f64 } example_impls! { String, String::new() } diff --git a/sdk/src/lib.rs b/sdk/src/lib.rs index 752ad612a5c7b5..6d230318bbd9ca 100644 --- a/sdk/src/lib.rs +++ b/sdk/src/lib.rs @@ -92,6 +92,7 @@ pub mod reward_type { pub use solana_reward_info::RewardType; } pub mod rpc_port; +pub mod scheduling; pub mod shred_version; pub mod signature; pub mod signer; diff --git a/sdk/src/scheduling.rs b/sdk/src/scheduling.rs new file mode 100644 index 00000000000000..ebab21a912f292 --- /dev/null +++ b/sdk/src/scheduling.rs @@ -0,0 +1,8 @@ +//! Primitive types relevant to transaction scheduling +#![cfg(feature = "full")] + +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum SchedulingMode { + BlockVerification, + BlockProduction, +} diff --git a/sdk/transaction-error/src/lib.rs b/sdk/transaction-error/src/lib.rs index 433a48b0122e31..db08f4fe6ed11f 100644 --- a/sdk/transaction-error/src/lib.rs +++ b/sdk/transaction-error/src/lib.rs @@ -137,6 +137,9 @@ pub enum TransactionError { /// Program cache hit max limit. ProgramCacheHitMaxLimit, + + /// Commit failed internally. + CommitFailed, } impl std::error::Error for TransactionError {} @@ -220,6 +223,8 @@ impl fmt::Display for TransactionError { => f.write_str("Sum of account balances before and after transaction do not match"), Self::ProgramCacheHitMaxLimit => f.write_str("Program cache hit max limit"), + Self::CommitFailed + => f.write_str("CommitFailed"), } } } diff --git a/storage-proto/proto/transaction_by_addr.proto b/storage-proto/proto/transaction_by_addr.proto index d0fa74a2104707..c4025dbafe8922 100644 --- a/storage-proto/proto/transaction_by_addr.proto +++ b/storage-proto/proto/transaction_by_addr.proto @@ -63,6 +63,7 @@ enum TransactionErrorType { PROGRAM_EXECUTION_TEMPORARILY_RESTRICTED = 35; UNBALANCED_TRANSACTION = 36; PROGRAM_CACHE_HIT_MAX_LIMIT = 37; + COMMIT_FAILED = 38; } message InstructionError { diff --git a/storage-proto/src/convert.rs b/storage-proto/src/convert.rs index 6a6e451b4858f1..55a54c3d06d54c 100644 --- a/storage-proto/src/convert.rs +++ b/storage-proto/src/convert.rs @@ -852,6 +852,7 @@ impl TryFrom for TransactionError { 34 => TransactionError::ResanitizationNeeded, 36 => TransactionError::UnbalancedTransaction, 37 => TransactionError::ProgramCacheHitMaxLimit, + 38 => TransactionError::CommitFailed, _ => return Err("Invalid TransactionError"), }) } @@ -973,6 +974,9 @@ impl From for tx_by_addr::TransactionError { TransactionError::ProgramCacheHitMaxLimit => { tx_by_addr::TransactionErrorType::ProgramCacheHitMaxLimit } + TransactionError::CommitFailed => { + tx_by_addr::TransactionErrorType::CommitFailed + } } as i32, instruction_error: match transaction_error { TransactionError::InstructionError(index, ref instruction_error) => { diff --git a/svm/examples/Cargo.lock b/svm/examples/Cargo.lock index 7bd58e61e953d3..5a65d16330bde9 100644 --- a/svm/examples/Cargo.lock +++ b/svm/examples/Cargo.lock @@ -1023,6 +1023,15 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e" +[[package]] +name = "convert_case" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec182b0ca2f35d8fc196cf3404988fd8b8c739a4d270ff118a398feb0cbec1ca" +dependencies = [ + "unicode-segmentation", +] + [[package]] name = "core-foundation" version = "0.9.4" @@ -1297,13 +1306,35 @@ version = "0.99.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5f33878137e4dafd7fa914ad4e259e18a4e8e532b9617a2d0150262bf53abfce" dependencies = [ - "convert_case", + "convert_case 0.4.0", "proc-macro2", "quote", "rustc_version", "syn 2.0.87", ] +[[package]] +name = "derive_more" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a9b99b9cbbe49445b21764dc0625032a89b145a2642e67603e1c936f5458d05" +dependencies = [ + "derive_more-impl", +] + +[[package]] +name = "derive_more-impl" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb7330aeadfbe296029522e6c40f315320aba36fc43a5b3632f3795348f3bd22" +dependencies = [ + "convert_case 0.6.0", + "proc-macro2", + "quote", + "syn 2.0.87", + "unicode-xid", +] + [[package]] name = "dialoguer" version = "0.10.4" @@ -1412,6 +1443,12 @@ version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1435fa1053d8b2fbbe9be7e97eca7f33d37b28409959813daefc1446a14247f1" +[[package]] +name = "dyn-clone" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d6ef0072f8a535281e4876be788938b528e9a1d43900b82c2569af7da799125" + [[package]] name = "eager" version = "0.1.0" @@ -2621,7 +2658,7 @@ version = "18.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d2b99d4207e2a04fb4581746903c2bb7eb376f88de9c699d0f3e10feeac0cd3a" dependencies = [ - "derive_more", + "derive_more 0.99.18", "futures 0.3.31", "jsonrpc-core", "jsonrpc-pubsub", @@ -5219,6 +5256,7 @@ dependencies = [ "ahash 0.8.11", "anyhow", "arrayvec", + "assert_matches", "base64 0.22.1", "bincode", "bs58", @@ -5226,6 +5264,7 @@ dependencies = [ "chrono", "crossbeam-channel", "dashmap", + "derive_more 1.0.0", "etcd-client", "futures 0.3.31", "histogram", @@ -5873,6 +5912,7 @@ dependencies = [ "bincode", "bv", "caps", + "crossbeam-channel", "curve25519-dalek 4.1.3", "dlopen2", "fnv", @@ -6412,6 +6452,7 @@ dependencies = [ "ahash 0.8.11", "aquamarine", "arrayref", + "assert_matches", "base64 0.22.1", "bincode", "blake3", @@ -7324,16 +7365,20 @@ dependencies = [ "crossbeam-channel", "dashmap", "derive-where", + "derive_more 1.0.0", + "dyn-clone", "log", - "qualifier_attr", "scopeguard", "solana-ledger", + "solana-perf", + "solana-poh", "solana-runtime", "solana-runtime-transaction", "solana-sdk", "solana-timings", "solana-unified-scheduler-logic", "static_assertions", + "trait-set", "vec_extract_if_polyfill", ] @@ -8520,6 +8565,17 @@ dependencies = [ "tracing-core", ] +[[package]] +name = "trait-set" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b79e2e9c9ab44c6d7c20d5976961b47e8f49ac199154daa514b77cd1ab536625" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "trees" version = "0.4.2" @@ -8592,6 +8648,12 @@ dependencies = [ "tinyvec", ] +[[package]] +name = "unicode-segmentation" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" + [[package]] name = "unicode-width" version = "0.1.14" diff --git a/unified-scheduler-logic/src/lib.rs b/unified-scheduler-logic/src/lib.rs index 2e8caca3b85b8b..390802a9601c18 100644 --- a/unified-scheduler-logic/src/lib.rs +++ b/unified-scheduler-logic/src/lib.rs @@ -450,6 +450,10 @@ impl TaskInner { .with_borrow_mut(token, |usage_count| usage_count.decrement_self().is_zero()); did_unblock.then_some(self) } + + pub fn into_transaction(self: Task) -> RuntimeTransaction { + Task::into_inner(self).unwrap().transaction + } } /// [`Task`]'s per-address context to lock a [usage_queue](UsageQueue) with [certain kind of @@ -618,23 +622,37 @@ const_assert_eq!(mem::size_of::(), 8); pub struct SchedulingStateMachine { unblocked_task_queue: VecDeque, active_task_count: ShortCounter, + executing_task_count: ShortCounter, + max_executing_task_count: u32, handled_task_count: ShortCounter, unblocked_task_count: ShortCounter, total_task_count: ShortCounter, count_token: BlockedUsageCountToken, usage_queue_token: UsageQueueToken, } -const_assert_eq!(mem::size_of::(), 48); +const_assert_eq!(mem::size_of::(), 56); impl SchedulingStateMachine { pub fn has_no_active_task(&self) -> bool { self.active_task_count.is_zero() } + pub fn has_no_executing_task(&self) -> bool { + self.executing_task_count.current() == 0 + } + pub fn has_unblocked_task(&self) -> bool { !self.unblocked_task_queue.is_empty() } + pub fn has_runnable_task(&mut self) -> bool { + self.is_task_runnable() && self.has_unblocked_task() + } + + pub fn is_task_runnable(&self) -> bool { + self.executing_task_count.current() < self.max_executing_task_count + } + pub fn unblocked_task_queue_count(&self) -> usize { self.unblocked_task_queue.len() } @@ -663,14 +681,28 @@ impl SchedulingStateMachine { /// Note that this function takes ownership of the task to allow for future optimizations. #[must_use] pub fn schedule_task(&mut self, task: Task) -> Option { + self.do_schedule_task(task, false) + } + + pub fn do_schedule_task(&mut self, task: Task, force_buffer_mode: bool) -> Option { self.total_task_count.increment_self(); self.active_task_count.increment_self(); - self.try_lock_usage_queues(task) + self.try_lock_usage_queues(task).and_then(|task| { + if self.is_task_runnable() && !force_buffer_mode { + self.executing_task_count.increment_self(); + Some(task) + } else { + self.unblocked_task_count.increment_self(); + self.unblocked_task_queue.push_back(task); + None + } + }) } #[must_use] pub fn schedule_next_unblocked_task(&mut self) -> Option { self.unblocked_task_queue.pop_front().inspect(|_| { + self.executing_task_count.increment_self(); self.unblocked_task_count.increment_self(); }) } @@ -686,6 +718,7 @@ impl SchedulingStateMachine { /// tasks inside `SchedulingStateMachine` to provide an offloading-based optimization /// opportunity for callers. pub fn deschedule_task(&mut self, task: &Task) { + self.executing_task_count.decrement_self(); self.active_task_count.decrement_self(); self.handled_task_count.increment_self(); self.unlock_usage_queues(task); @@ -772,6 +805,14 @@ impl SchedulingStateMachine { transaction: RuntimeTransaction, index: usize, usage_queue_loader: &mut impl FnMut(Pubkey) -> UsageQueue, + ) -> Task { + Self::do_create_task(transaction, index, usage_queue_loader) + } + + pub fn do_create_task( + transaction: RuntimeTransaction, + index: usize, + usage_queue_loader: &mut impl FnMut(Pubkey) -> UsageQueue, ) -> Task { // It's crucial for tasks to be validated with // `account_locks::validate_account_locks()` prior to the creation. @@ -841,11 +882,14 @@ impl SchedulingStateMachine { /// other slots. pub fn reinitialize(&mut self) { assert!(self.has_no_active_task()); + assert_eq!(self.executing_task_count.current(), 0); assert_eq!(self.unblocked_task_queue.len(), 0); // nice trick to ensure all fields are handled here if new one is added. let Self { unblocked_task_queue: _, active_task_count, + executing_task_count: _, + max_executing_task_count: _, handled_task_count, unblocked_task_count, total_task_count, @@ -865,12 +909,16 @@ impl SchedulingStateMachine { /// # Safety /// Call this exactly once for each thread. See [`TokenCell`] for details. #[must_use] - pub unsafe fn exclusively_initialize_current_thread_for_scheduling() -> Self { + pub unsafe fn exclusively_initialize_current_thread_for_scheduling( + max_executing_task_count: u32, + ) -> Self { Self { // It's very unlikely this is desired to be configurable, like // `UsageQueueInner::blocked_usages_from_tasks`'s cap. unblocked_task_queue: VecDeque::with_capacity(1024), active_task_count: ShortCounter::zero(), + executing_task_count: ShortCounter::zero(), + max_executing_task_count, handled_task_count: ShortCounter::zero(), unblocked_task_count: ShortCounter::zero(), total_task_count: ShortCounter::zero(), @@ -878,6 +926,11 @@ impl SchedulingStateMachine { usage_queue_token: unsafe { UsageQueueToken::assume_exclusive_mutating_thread() }, } } + + #[cfg(test)] + unsafe fn exclusively_initialize_current_thread_for_scheduling_for_test() -> Self { + Self::exclusively_initialize_current_thread_for_scheduling(200) + } } #[cfg(test)] @@ -941,7 +994,7 @@ mod tests { #[test] fn test_scheduling_state_machine_creation() { let state_machine = unsafe { - SchedulingStateMachine::exclusively_initialize_current_thread_for_scheduling() + SchedulingStateMachine::exclusively_initialize_current_thread_for_scheduling_for_test() }; assert_eq!(state_machine.active_task_count(), 0); assert_eq!(state_machine.total_task_count(), 0); @@ -951,7 +1004,7 @@ mod tests { #[test] fn test_scheduling_state_machine_good_reinitialization() { let mut state_machine = unsafe { - SchedulingStateMachine::exclusively_initialize_current_thread_for_scheduling() + SchedulingStateMachine::exclusively_initialize_current_thread_for_scheduling_for_test() }; state_machine.total_task_count.increment_self(); assert_eq!(state_machine.total_task_count(), 1); @@ -963,7 +1016,7 @@ mod tests { #[should_panic(expected = "assertion failed: self.has_no_active_task()")] fn test_scheduling_state_machine_bad_reinitialization() { let mut state_machine = unsafe { - SchedulingStateMachine::exclusively_initialize_current_thread_for_scheduling() + SchedulingStateMachine::exclusively_initialize_current_thread_for_scheduling_for_test() }; let address_loader = &mut create_address_loader(None); let task = SchedulingStateMachine::create_task(simplest_transaction(), 3, address_loader); @@ -988,7 +1041,7 @@ mod tests { let task = SchedulingStateMachine::create_task(sanitized, 3, address_loader); let mut state_machine = unsafe { - SchedulingStateMachine::exclusively_initialize_current_thread_for_scheduling() + SchedulingStateMachine::exclusively_initialize_current_thread_for_scheduling_for_test() }; let task = state_machine.schedule_task(task).unwrap(); assert_eq!(state_machine.active_task_count(), 1); @@ -1008,7 +1061,7 @@ mod tests { let task3 = SchedulingStateMachine::create_task(sanitized.clone(), 103, address_loader); let mut state_machine = unsafe { - SchedulingStateMachine::exclusively_initialize_current_thread_for_scheduling() + SchedulingStateMachine::exclusively_initialize_current_thread_for_scheduling_for_test() }; assert_matches!( state_machine @@ -1060,7 +1113,7 @@ mod tests { let task3 = SchedulingStateMachine::create_task(sanitized.clone(), 103, address_loader); let mut state_machine = unsafe { - SchedulingStateMachine::exclusively_initialize_current_thread_for_scheduling() + SchedulingStateMachine::exclusively_initialize_current_thread_for_scheduling_for_test() }; assert_matches!( state_machine @@ -1110,7 +1163,7 @@ mod tests { let task2 = SchedulingStateMachine::create_task(sanitized2, 102, address_loader); let mut state_machine = unsafe { - SchedulingStateMachine::exclusively_initialize_current_thread_for_scheduling() + SchedulingStateMachine::exclusively_initialize_current_thread_for_scheduling_for_test() }; // both of read-only tasks should be immediately runnable assert_matches!( @@ -1151,7 +1204,7 @@ mod tests { let task3 = SchedulingStateMachine::create_task(sanitized3, 103, address_loader); let mut state_machine = unsafe { - SchedulingStateMachine::exclusively_initialize_current_thread_for_scheduling() + SchedulingStateMachine::exclusively_initialize_current_thread_for_scheduling_for_test() }; assert_matches!( state_machine @@ -1202,7 +1255,7 @@ mod tests { let task3 = SchedulingStateMachine::create_task(sanitized3, 103, address_loader); let mut state_machine = unsafe { - SchedulingStateMachine::exclusively_initialize_current_thread_for_scheduling() + SchedulingStateMachine::exclusively_initialize_current_thread_for_scheduling_for_test() }; assert_matches!( state_machine @@ -1244,7 +1297,7 @@ mod tests { let task2 = SchedulingStateMachine::create_task(sanitized2, 102, address_loader); let mut state_machine = unsafe { - SchedulingStateMachine::exclusively_initialize_current_thread_for_scheduling() + SchedulingStateMachine::exclusively_initialize_current_thread_for_scheduling_for_test() }; assert_matches!( state_machine @@ -1280,7 +1333,7 @@ mod tests { let task4 = SchedulingStateMachine::create_task(sanitized4, 104, address_loader); let mut state_machine = unsafe { - SchedulingStateMachine::exclusively_initialize_current_thread_for_scheduling() + SchedulingStateMachine::exclusively_initialize_current_thread_for_scheduling_for_test() }; assert_matches!( state_machine @@ -1336,7 +1389,7 @@ mod tests { let task2 = SchedulingStateMachine::create_task(sanitized2, 102, address_loader); let mut state_machine = unsafe { - SchedulingStateMachine::exclusively_initialize_current_thread_for_scheduling() + SchedulingStateMachine::exclusively_initialize_current_thread_for_scheduling_for_test() }; assert_matches!( state_machine @@ -1376,7 +1429,7 @@ mod tests { #[should_panic(expected = "internal error: entered unreachable code")] fn test_unreachable_unlock_conditions1() { let mut state_machine = unsafe { - SchedulingStateMachine::exclusively_initialize_current_thread_for_scheduling() + SchedulingStateMachine::exclusively_initialize_current_thread_for_scheduling_for_test() }; let usage_queue = UsageQueue::default(); usage_queue @@ -1390,7 +1443,7 @@ mod tests { #[should_panic(expected = "internal error: entered unreachable code")] fn test_unreachable_unlock_conditions2() { let mut state_machine = unsafe { - SchedulingStateMachine::exclusively_initialize_current_thread_for_scheduling() + SchedulingStateMachine::exclusively_initialize_current_thread_for_scheduling_for_test() }; let usage_queue = UsageQueue::default(); usage_queue @@ -1405,7 +1458,7 @@ mod tests { #[should_panic(expected = "internal error: entered unreachable code")] fn test_unreachable_unlock_conditions3() { let mut state_machine = unsafe { - SchedulingStateMachine::exclusively_initialize_current_thread_for_scheduling() + SchedulingStateMachine::exclusively_initialize_current_thread_for_scheduling_for_test() }; let usage_queue = UsageQueue::default(); usage_queue diff --git a/unified-scheduler-pool/Cargo.toml b/unified-scheduler-pool/Cargo.toml index e1b17308633798..17fd58dbd7ab97 100644 --- a/unified-scheduler-pool/Cargo.toml +++ b/unified-scheduler-pool/Cargo.toml @@ -14,16 +14,20 @@ assert_matches = { workspace = true } crossbeam-channel = { workspace = true } dashmap = { workspace = true } derive-where = { workspace = true } +derive_more = { workspace = true } +dyn-clone = { workspace = true } log = { workspace = true } -qualifier_attr = { workspace = true } scopeguard = { workspace = true } solana-ledger = { workspace = true } +solana-perf = { workspace = true } +solana-poh = { workspace = true } solana-runtime = { workspace = true } solana-runtime-transaction = { workspace = true } solana-sdk = { workspace = true } solana-timings = { workspace = true } solana-unified-scheduler-logic = { workspace = true } static_assertions = { workspace = true } +trait-set = { workspace = true } vec_extract_if_polyfill = { workspace = true } [dev-dependencies] @@ -31,6 +35,8 @@ assert_matches = { workspace = true } lazy_static = { workspace = true } solana-logger = { workspace = true } solana-runtime = { workspace = true, features = ["dev-context-only-utils"] } +# See order-crates-for-publishing.py for using this unusual `path = "."` +solana-unified-scheduler-pool = { path = ".", features = ["dev-context-only-utils"] } [features] dev-context-only-utils = [] diff --git a/unified-scheduler-pool/src/lib.rs b/unified-scheduler-pool/src/lib.rs index 61690f4ff3917f..c4d59995c30629 100644 --- a/unified-scheduler-pool/src/lib.rs +++ b/unified-scheduler-pool/src/lib.rs @@ -8,25 +8,25 @@ //! and commits any side-effects (i.e. on-chain state changes) into the associated `Bank` via //! `solana-ledger`'s helper function called `execute_batch()`. -#[cfg(feature = "dev-context-only-utils")] -use qualifier_attr::qualifiers; use { assert_matches::assert_matches, crossbeam_channel::{self, never, select_biased, Receiver, RecvError, SendError, Sender}, dashmap::DashMap, derive_where::derive_where, + dyn_clone::{clone_trait_object, DynClone}, log::*, scopeguard::defer, solana_ledger::blockstore_processor::{ execute_batch, TransactionBatchWithIndexes, TransactionStatusSender, }, + solana_perf::packet::{BankingPacketBatch, BankingPacketReceiver}, + solana_poh::poh_recorder::TransactionRecorder, solana_runtime::{ - bank::Bank, installed_scheduler_pool::{ initialized_result_with_timings, InstalledScheduler, InstalledSchedulerBox, - InstalledSchedulerPool, InstalledSchedulerPoolArc, ResultWithTimings, ScheduleResult, - SchedulerAborted, SchedulerId, SchedulingContext, TimeoutListener, - UninstalledScheduler, UninstalledSchedulerBox, + InstalledSchedulerPool, ResultWithTimings, ScheduleResult, SchedulerAborted, + SchedulerId, SchedulingContext, TimeoutListener, UninstalledScheduler, + UninstalledSchedulerBox, }, prioritization_fee_cache::PrioritizationFeeCache, vote_sender_types::ReplayVoteSender, @@ -34,6 +34,7 @@ use { solana_runtime_transaction::runtime_transaction::RuntimeTransaction, solana_sdk::{ pubkey::Pubkey, + scheduling::SchedulingMode, transaction::{Result, SanitizedTransaction, TransactionError}, }, solana_timings::ExecuteTimings, @@ -44,15 +45,23 @@ use { marker::PhantomData, mem, sync::{ - atomic::{AtomicU64, Ordering::Relaxed}, - Arc, Mutex, OnceLock, Weak, + atomic::{AtomicU64, AtomicUsize, Ordering::Relaxed}, + Arc, Condvar, Mutex, MutexGuard, OnceLock, Weak, }, thread::{self, sleep, JoinHandle}, time::{Duration, Instant}, }, + trait_set::trait_set, vec_extract_if_polyfill::MakeExtractIf, }; +#[derive(Clone)] +pub struct BankingStageContext { + adapter: Arc, + banking_packet_receiver: BankingPacketReceiver, + on_banking_packet_receive: Box, +} + mod sleepless_testing; use crate::sleepless_testing::BuilderTracked; @@ -70,15 +79,40 @@ enum CheckPoint { type AtomicSchedulerId = AtomicU64; +#[derive(Debug)] +pub enum SupportedSchedulingMode { + Either(SchedulingMode), + Both, +} + +impl SupportedSchedulingMode { + fn is_supported(&self, requested_mode: SchedulingMode) -> bool { + match (self, requested_mode) { + (Self::Both, _) => true, + (Self::Either(ref supported), ref requested) if supported == requested => true, + _ => false, + } + } + + #[cfg(feature = "dev-context-only-utils")] + fn block_verification_only() -> Self { + Self::Either(SchedulingMode::BlockVerification) + } +} + // SchedulerPool must be accessed as a dyn trait from solana-runtime, because SchedulerPool // contains some internal fields, whose types aren't available in solana-runtime (currently // TransactionStatusSender; also, PohRecorder in the future)... #[derive(Debug)] pub struct SchedulerPool, TH: TaskHandler> { + supported_scheduling_mode: SupportedSchedulingMode, scheduler_inners: Mutex>, + block_production_scheduler_inner: Mutex<(Option, Option)>, + block_production_scheduler_condvar: Condvar, + block_production_scheduler_respawner: Mutex>, trashed_scheduler_inners: Mutex>, timeout_listeners: Mutex>, - handler_count: usize, + block_verification_handler_count: usize, handler_context: HandlerContext, // weak_self could be elided by changing InstalledScheduler::take_scheduler()'s receiver to // Arc from &Self, because SchedulerPool is used as in the form of Arc @@ -102,14 +136,15 @@ pub struct HandlerContext { transaction_status_sender: Option, replay_vote_sender: Option, prioritization_fee_cache: Arc, + transaction_recorder: TransactionRecorder, } pub type DefaultSchedulerPool = SchedulerPool, DefaultTaskHandler>; -const DEFAULT_POOL_CLEANER_INTERVAL: Duration = Duration::from_secs(10); +const DEFAULT_POOL_CLEANER_INTERVAL: Duration = Duration::from_secs(5); const DEFAULT_MAX_POOLING_DURATION: Duration = Duration::from_secs(180); -const DEFAULT_TIMEOUT_DURATION: Duration = Duration::from_secs(12); +const DEFAULT_TIMEOUT_DURATION: Duration = Duration::from_secs(5); // Rough estimate of max UsageQueueLoader size in bytes: // UsageFromTask * UsageQueue's capacity * DEFAULT_MAX_USAGE_QUEUE_COUNT // 16 bytes * 128 items * 262_144 entries == 512 MiB @@ -126,27 +161,47 @@ const DEFAULT_TIMEOUT_DURATION: Duration = Duration::from_secs(12); // because UsageQueueLoader won't grow that much to begin with. const DEFAULT_MAX_USAGE_QUEUE_COUNT: usize = 262_144; +trait_set! { + pub trait BatchConverter = + DynClone + (for<'a> Fn(BankingPacketBatch, &'a dyn Fn(Task))) + Send + 'static; +} + +clone_trait_object!(BatchConverter); + +type BatchConverterCreator = + Box) -> Box) + Send>; + +#[derive(derive_more::Debug)] +struct BlockProductionSchedulerRespawner { + handler_count: usize, + #[debug("{on_spawn_block_production_scheduler:p}")] + on_spawn_block_production_scheduler: BatchConverterCreator, + banking_packet_receiver: BankingPacketReceiver, + banking_stage_monitor: Box, +} + impl SchedulerPool where S: SpawnableScheduler, TH: TaskHandler, { - // Some internal impl and test code want an actual concrete type, NOT the - // `dyn InstalledSchedulerPool`. So don't merge this into `Self::new_dyn()`. - #[cfg_attr(feature = "dev-context-only-utils", qualifiers(pub))] - fn new( + pub fn new( + supported_scheduling_mode: SupportedSchedulingMode, handler_count: Option, log_messages_bytes_limit: Option, transaction_status_sender: Option, replay_vote_sender: Option, prioritization_fee_cache: Arc, + transaction_recorder: TransactionRecorder, ) -> Arc { Self::do_new( + supported_scheduling_mode, handler_count, log_messages_bytes_limit, transaction_status_sender, replay_vote_sender, prioritization_fee_cache, + transaction_recorder, DEFAULT_POOL_CLEANER_INTERVAL, DEFAULT_MAX_POOLING_DURATION, DEFAULT_MAX_USAGE_QUEUE_COUNT, @@ -154,30 +209,62 @@ where ) } + #[cfg(feature = "dev-context-only-utils")] + pub fn new_for_verification( + handler_count: Option, + log_messages_bytes_limit: Option, + transaction_status_sender: Option, + replay_vote_sender: Option, + prioritization_fee_cache: Arc, + ) -> Arc { + Self::new( + SupportedSchedulingMode::block_verification_only(), + handler_count, + log_messages_bytes_limit, + transaction_status_sender, + replay_vote_sender, + prioritization_fee_cache, + TransactionRecorder::new_dummy(), + ) + } + + #[allow(clippy::too_many_arguments)] fn do_new( + supported_scheduling_mode: SupportedSchedulingMode, handler_count: Option, log_messages_bytes_limit: Option, transaction_status_sender: Option, replay_vote_sender: Option, prioritization_fee_cache: Arc, + mut transaction_recorder: TransactionRecorder, pool_cleaner_interval: Duration, max_pooling_duration: Duration, max_usage_queue_count: usize, timeout_duration: Duration, ) -> Arc { let handler_count = handler_count.unwrap_or(Self::default_handler_count()); - assert!(handler_count >= 1); + let bp_is_supported = + supported_scheduling_mode.is_supported(SchedulingMode::BlockProduction); + + if !bp_is_supported { + transaction_recorder = TransactionRecorder::new_dummy(); + } let scheduler_pool = Arc::new_cyclic(|weak_self| Self { + supported_scheduling_mode, scheduler_inners: Mutex::default(), + block_production_scheduler_inner: Mutex::default(), + block_production_scheduler_condvar: Condvar::default(), + block_production_scheduler_respawner: Mutex::default(), trashed_scheduler_inners: Mutex::default(), timeout_listeners: Mutex::default(), - handler_count, + block_verification_handler_count: handler_count, handler_context: HandlerContext { log_messages_bytes_limit, transaction_status_sender, replay_vote_sender, prioritization_fee_cache, + transaction_recorder, }, weak_self: weak_self.clone(), next_scheduler_id: AtomicSchedulerId::default(), @@ -188,8 +275,10 @@ where let cleaner_main_loop = { let weak_scheduler_pool = Arc::downgrade(&scheduler_pool); + let mut exiting = false; move || loop { sleep(pool_cleaner_interval); + trace!("Scheduler pool cleaner: start!!!",); let Some(scheduler_pool) = weak_scheduler_pool.upgrade() else { break; @@ -222,6 +311,12 @@ where idle_inner_count }; + let banking_stage_status = scheduler_pool.banking_stage_status(); + if !exiting && matches!(banking_stage_status, Some(BankingStageStatus::Exited)) { + exiting = true; + scheduler_pool.unregister_banking_stage(); + } + let trashed_inner_count = { let Ok(mut trashed_scheduler_inners) = scheduler_pool.trashed_scheduler_inners.lock() @@ -236,7 +331,7 @@ where trashed_inner_count }; - let triggered_timeout_listener_count = { + let (triggered_timeout_listener_count, active_timeout_listener_count) = { // Pre-allocate rather large capacity to avoid reallocation inside the lock. let mut expired_listeners = Vec::with_capacity(128); let Ok(mut timeout_listeners) = scheduler_pool.timeout_listeners.lock() else { @@ -248,24 +343,59 @@ where now.duration_since(*registered_at) > timeout_duration }, )); + let not_expired_count = timeout_listeners.len(); drop(timeout_listeners); - let count = expired_listeners.len(); + let expired_count = expired_listeners.len(); for (timeout_listener, _registered_at) in expired_listeners { timeout_listener.trigger(scheduler_pool.clone()); } - count + (expired_count, not_expired_count) }; + if matches!(banking_stage_status, Some(BankingStageStatus::Inactive)) { + let mut id_and_inner = scheduler_pool + .block_production_scheduler_inner + .lock() + .unwrap(); + if let Some(pooled) = &id_and_inner.1 { + if pooled.is_overgrown() { + let pooled = id_and_inner.1.take().unwrap(); + assert_eq!(Some(pooled.id()), id_and_inner.0.take()); + scheduler_pool.spawn_block_production_scheduler(&mut id_and_inner); + drop(id_and_inner); + drop(pooled); + } else { + pooled.reset(); + } + } + } + info!( - "Scheduler pool cleaner: dropped {} idle inners, {} trashed inners, triggered {} timeout listeners", - idle_inner_count, trashed_inner_count, triggered_timeout_listener_count, + "Scheduler pool cleaner: dropped {} idle inners, {} trashed inners, triggered {} timeout listeners, (exit: {:?})", + idle_inner_count, trashed_inner_count, triggered_timeout_listener_count, exiting, ); sleepless_testing::at(CheckPoint::IdleSchedulerCleaned(idle_inner_count)); sleepless_testing::at(CheckPoint::TrashedSchedulerCleaned(trashed_inner_count)); sleepless_testing::at(CheckPoint::TimeoutListenerTriggered( triggered_timeout_listener_count, )); + + if exiting && active_timeout_listener_count == 0 { + // Wait a bit to ensure the replay stage has gone. + sleep(Duration::from_secs(1)); + + let mut id_and_inner = scheduler_pool + .block_production_scheduler_inner + .lock() + .unwrap(); + if let Some(pooled) = id_and_inner.1.take() { + assert_eq!(Some(pooled.id()), id_and_inner.0.take()); + drop(id_and_inner); + drop(pooled); + } + break; + } } }; @@ -278,24 +408,6 @@ where scheduler_pool } - // This apparently-meaningless wrapper is handy, because some callers explicitly want - // `dyn InstalledSchedulerPool` to be returned for type inference convenience. - pub fn new_dyn( - handler_count: Option, - log_messages_bytes_limit: Option, - transaction_status_sender: Option, - replay_vote_sender: Option, - prioritization_fee_cache: Arc, - ) -> InstalledSchedulerPoolArc { - Self::new( - handler_count, - log_messages_bytes_limit, - transaction_status_sender, - replay_vote_sender, - prioritization_fee_cache, - ) - } - // See a comment at the weak_self field for justification of this method's existence. fn self_arc(&self) -> Arc { self.weak_self @@ -309,21 +421,49 @@ where // This fn needs to return immediately due to being part of the blocking // `::wait_for_termination()` call. - fn return_scheduler(&self, scheduler: S::Inner, should_trash: bool) { + fn return_scheduler(&self, mut scheduler: S::Inner, should_trash: bool) { + let id = scheduler.id(); + debug!("return_scheduler(): id: {id} should_trash: {should_trash}"); + let mut id_and_inner = self.block_production_scheduler_inner.lock().unwrap(); + let is_block_production_scheduler_returned = Some(id) == id_and_inner.0.as_ref().copied(); + if should_trash { + if is_block_production_scheduler_returned { + // Abort this trashed scheduler to stop receiving BankingPacketBatch anymore... + scheduler.ensure_abort(); + } // Delay drop()-ing this trashed returned scheduler inner by stashing it in // self.trashed_scheduler_inners, which is periodically drained by the `solScCleaner` // thread. Dropping it could take long time (in fact, - // PooledSchedulerInner::usage_queue_loader can contain many entries to drop). + // TaskCreator::usage_queue_loader() can contain many entries to drop). self.trashed_scheduler_inners .lock() .expect("not poisoned") .push(scheduler); + + if is_block_production_scheduler_returned && self.should_respawn() { + info!("respawning scheduler after being trashed..."); + assert_eq!(id_and_inner.0.take(), Some(id)); + self.spawn_block_production_scheduler(&mut id_and_inner); + info!("respawned scheduler after being trashed."); + } + drop(id_and_inner); } else { - self.scheduler_inners - .lock() - .expect("not poisoned") - .push((scheduler, Instant::now())); + drop(id_and_inner); + if !is_block_production_scheduler_returned { + self.scheduler_inners + .lock() + .expect("not poisoned") + .push((scheduler, Instant::now())); + } else { + assert!(self + .block_production_scheduler_inner + .lock() + .unwrap() + .1 + .replace(scheduler) + .is_none()); + } } } @@ -339,13 +479,35 @@ where ) -> S { assert_matches!(result_with_timings, (Ok(_), _)); - // pop is intentional for filo, expecting relatively warmed-up scheduler due to having been - // returned recently - if let Some((inner, _pooled_at)) = self.scheduler_inners.lock().expect("not poisoned").pop() - { - S::from_inner(inner, context, result_with_timings) + if matches!(context.mode(), SchedulingMode::BlockVerification) { + // pop is intentional for filo, expecting relatively warmed-up scheduler due to having been + // returned recently + if let Some((inner, _pooled_at)) = + self.scheduler_inners.lock().expect("not poisoned").pop() + { + S::from_inner(inner, context, result_with_timings) + } else { + S::spawn( + self.block_verification_handler_count, + self.self_arc(), + context, + result_with_timings, + None, + ) + } } else { - S::spawn(self.self_arc(), context, result_with_timings) + let mut id_and_inner = self + .block_production_scheduler_inner + .lock() + .expect("not poisoned"); + id_and_inner = self + .block_production_scheduler_condvar + .wait_while(id_and_inner, |id_and_inner| id_and_inner.0.is_none()) + .unwrap(); + let Some(inner) = id_and_inner.1.take() else { + panic!("double take: {:?}, {:?}", context.slot(), context.mode()); + }; + S::from_inner(inner, context, result_with_timings) } } @@ -354,6 +516,96 @@ where self.scheduler_inners.lock().expect("not poisoned").len() } + pub fn block_production_supported(&self) -> bool { + self.supported_scheduling_mode + .is_supported(SchedulingMode::BlockProduction) + } + + pub fn register_banking_stage( + &self, + banking_packet_receiver: BankingPacketReceiver, + handler_count: usize, + banking_stage_monitor: Box, + on_spawn_block_production_scheduler: BatchConverterCreator, + ) { + *self.block_production_scheduler_respawner.lock().unwrap() = + Some(BlockProductionSchedulerRespawner { + handler_count, + banking_packet_receiver, + on_spawn_block_production_scheduler, + banking_stage_monitor, + }); + self.spawn_block_production_scheduler( + &mut self.block_production_scheduler_inner.lock().unwrap(), + ); + } + + fn unregister_banking_stage(&self) { + assert!(self + .block_production_scheduler_respawner + .lock() + .unwrap() + .take() + .is_some()); + } + + fn banking_stage_status(&self) -> Option { + self.block_production_scheduler_respawner + .lock() + .unwrap() + .as_ref() + .map(|respawner| respawner.banking_stage_monitor.status()) + } + + fn should_respawn(&self) -> bool { + !matches!( + self.banking_stage_status(), + None | Some(BankingStageStatus::Exited) + ) + } + + fn spawn_block_production_scheduler( + &self, + id_and_inner: &mut MutexGuard<'_, (Option, Option)>, + ) { + trace!("spawn block production scheduler: start!"); + let (handler_count, banking_stage_context) = { + let mut respawner_write = self.block_production_scheduler_respawner.lock().unwrap(); + let BlockProductionSchedulerRespawner { + handler_count, + banking_packet_receiver, + on_spawn_block_production_scheduler, + banking_stage_monitor: _, + } = &mut *respawner_write.as_mut().unwrap(); + + let adapter = Arc::new(BankingStageAdapter::default()); + + ( + *handler_count, + BankingStageContext { + banking_packet_receiver: banking_packet_receiver.clone(), + on_banking_packet_receive: on_spawn_block_production_scheduler(adapter.clone()), + adapter, + }, + ) + }; + + let scheduler = S::spawn( + handler_count, + self.self_arc(), + SchedulingContext::new(SchedulingMode::BlockProduction, None), + initialized_result_with_timings(), + Some(banking_stage_context), + ); + let ((Ok(_result), _timings), inner) = scheduler.into_inner() else { + panic!() + }; + assert!(id_and_inner.0.replace(inner.id()).is_none()); + assert!(id_and_inner.1.replace(inner).is_none()); + self.block_production_scheduler_condvar.notify_all(); + trace!("spawn block production scheduler: end!"); + } + pub fn default_handler_count() -> usize { Self::calculate_default_handler_count( thread::available_parallelism() @@ -395,8 +647,14 @@ where &self, context: SchedulingContext, result_with_timings: ResultWithTimings, - ) -> InstalledSchedulerBox { - Box::new(self.do_take_resumed_scheduler(context, result_with_timings)) + ) -> Option { + if !self.supported_scheduling_mode.is_supported(context.mode()) { + return None; + } + + Some(Box::new( + self.do_take_resumed_scheduler(context, result_with_timings), + )) } fn register_timeout_listener(&self, timeout_listener: TimeoutListener) { @@ -411,9 +669,8 @@ pub trait TaskHandler: Send + Sync + Debug + Sized + 'static { fn handle( result: &mut Result<()>, timings: &mut ExecuteTimings, - bank: &Arc, - transaction: &RuntimeTransaction, - index: usize, + scheduling_context: &SchedulingContext, + task: &Task, handler_context: &HandlerContext, ); } @@ -425,17 +682,44 @@ impl TaskHandler for DefaultTaskHandler { fn handle( result: &mut Result<()>, timings: &mut ExecuteTimings, - bank: &Arc, - transaction: &RuntimeTransaction, - index: usize, + scheduling_context: &SchedulingContext, + task: &Task, handler_context: &HandlerContext, ) { // scheduler must properly prevent conflicting tx executions. thus, task handler isn't // responsible for locking. + let bank = scheduling_context.bank(); + let transaction = task.transaction(); + let index = task.task_index(); + let batch = bank.prepare_unlocked_batch_from_single_tx(transaction); + let transaction_indexes = match scheduling_context.mode() { + SchedulingMode::BlockVerification => vec![index], + SchedulingMode::BlockProduction => { + if handler_context.transaction_status_sender.is_some() { + // will be filled inside execute_batch() + Vec::with_capacity(1) + } else { + vec![] + } + } + }; let batch_with_indexes = TransactionBatchWithIndexes { batch, - transaction_indexes: vec![index], + transaction_indexes, + }; + + let pre_commit_callback = match scheduling_context.mode() { + SchedulingMode::BlockVerification => None, + SchedulingMode::BlockProduction => Some(|| { + let summary = handler_context + .transaction_recorder + .record_transactions(bank.slot(), vec![transaction.to_versioned_transaction()]); + summary + .result + .ok() + .map(|()| summary.starting_transaction_index) + }), }; *result = execute_batch( @@ -446,7 +730,9 @@ impl TaskHandler for DefaultTaskHandler { timings, handler_context.log_messages_bytes_limit, &handler_context.prioritization_fee_cache, + pre_commit_callback, ); + sleepless_testing::at(CheckPoint::TaskHandled(index)); } } @@ -463,6 +749,10 @@ impl ExecutedTask { result_with_timings: initialized_result_with_timings(), }) } + + fn into_inner(self) -> Task { + self.task + } } // A very tiny generic message type to signal about opening and closing of subchannels, which are @@ -475,6 +765,8 @@ enum SubchanneledPayload { Payload(P1), OpenSubchannel(P2), CloseSubchannel, + Disconnect, + Reset, } type NewTaskPayload = SubchanneledPayload>; @@ -666,10 +958,37 @@ pub struct PooledScheduler { context: SchedulingContext, } +#[derive(Debug)] +enum TaskCreator { + BlockVerification { + usage_queue_loader: UsageQueueLoader, + }, + BlockProduction { + banking_stage_adapter: Arc, + }, +} + +impl TaskCreator { + fn usage_queue_loader(&self) -> &UsageQueueLoader { + use TaskCreator::*; + + match self { + BlockVerification { usage_queue_loader } => usage_queue_loader, + BlockProduction { + banking_stage_adapter, + } => &banking_stage_adapter.usage_queue_loader, + } + } + + fn is_overgrown(&self, max_usage_queue_count: usize) -> bool { + self.usage_queue_loader().count() > max_usage_queue_count + } +} + #[derive(Debug)] pub struct PooledSchedulerInner, TH: TaskHandler> { thread_manager: ThreadManager, - usage_queue_loader: UsageQueueLoader, + task_creator: TaskCreator, } impl Drop for ThreadManager @@ -700,7 +1019,7 @@ where // Ensure to initiate thread shutdown via disconnected new_task_receiver by replacing the // current new_task_sender with a random one... - self.new_task_sender = crossbeam_channel::unbounded().0; + self.disconnect_new_task_sender(); self.ensure_join_threads(true); assert_matches!(self.session_result_with_timings, Some((Ok(_), _))); @@ -709,13 +1028,9 @@ where impl PooledSchedulerInner where - S: SpawnableScheduler, + S: SpawnableScheduler, TH: TaskHandler, { - fn id(&self) -> SchedulerId { - self.thread_manager.scheduler_id - } - fn is_trashed(&self) -> bool { self.is_aborted() || self.is_overgrown() } @@ -739,10 +1054,6 @@ where // scheduler to the pool, considering is_trashed() is checked immediately before that. self.thread_manager.are_threads_joined() } - - fn is_overgrown(&self) -> bool { - self.usage_queue_loader.count() > self.thread_manager.pool.max_usage_queue_count - } } // This type manages the OS threads for scheduling and executing transactions. The term @@ -754,7 +1065,7 @@ where struct ThreadManager, TH: TaskHandler> { scheduler_id: SchedulerId, pool: Arc>, - new_task_sender: Sender, + new_task_sender: Arc>, new_task_receiver: Option>, session_result_sender: Sender, session_result_receiver: Receiver, @@ -770,23 +1081,22 @@ impl, TH: TaskHandler> ThreadManager { fn new(pool: Arc>) -> Self { let (new_task_sender, new_task_receiver) = crossbeam_channel::unbounded(); let (session_result_sender, session_result_receiver) = crossbeam_channel::unbounded(); - let handler_count = pool.handler_count; Self { scheduler_id: pool.new_scheduler_id(), pool, - new_task_sender, + new_task_sender: Arc::new(new_task_sender), new_task_receiver: Some(new_task_receiver), session_result_sender, session_result_receiver, session_result_with_timings: None, scheduler_thread: None, - handler_threads: Vec::with_capacity(handler_count), + handler_threads: vec![], } } fn execute_task_with_handler( - bank: &Arc, + scheduling_context: &SchedulingContext, executed_task: &mut Box, handler_context: &HandlerContext, ) { @@ -794,29 +1104,45 @@ impl, TH: TaskHandler> ThreadManager { TH::handle( &mut executed_task.result_with_timings.0, &mut executed_task.result_with_timings.1, - bank, - executed_task.task.transaction(), - executed_task.task.task_index(), + scheduling_context, + &executed_task.task, handler_context, ); } #[must_use] fn accumulate_result_with_timings( + context: &SchedulingContext, (result, timings): &mut ResultWithTimings, executed_task: HandlerResult, - ) -> Option> { + ) -> Option<(Box, bool)> { let Ok(executed_task) = executed_task else { return None; }; timings.accumulate(&executed_task.result_with_timings.1); - match executed_task.result_with_timings.0 { - Ok(()) => Some(executed_task), - Err(error) => { - error!("error is detected while accumulating....: {error:?}"); - *result = Err(error); - None - } + match context.mode() { + SchedulingMode::BlockVerification => match executed_task.result_with_timings.0 { + Ok(()) => Some((executed_task, false)), + Err(error) => { + error!("error is detected while accumulating....: {error:?}"); + *result = Err(error); + None + } + }, + SchedulingMode::BlockProduction => match executed_task.result_with_timings.0 { + Ok(()) => Some((executed_task, false)), + Err(TransactionError::CommitFailed) + | Err(TransactionError::WouldExceedMaxBlockCostLimit) + | Err(TransactionError::WouldExceedMaxVoteCostLimit) + | Err(TransactionError::WouldExceedMaxAccountCostLimit) + | Err(TransactionError::WouldExceedAccountDataBlockLimit) => { + Some((executed_task, true)) + } + Err(ref error) => { + debug!("error is detected while accumulating....: {error:?}"); + Some((executed_task, false)) + } + }, } } @@ -838,9 +1164,18 @@ impl, TH: TaskHandler> ThreadManager { // for type safety. fn start_threads( &mut self, - context: SchedulingContext, + handler_count: usize, + mut context: SchedulingContext, mut result_with_timings: ResultWithTimings, + banking_stage_context: Option, ) { + assert!(handler_count >= 1); + + let postfix = match context.mode() { + SchedulingMode::BlockVerification => "V", + SchedulingMode::BlockProduction => "P", + }; + // Firstly, setup bi-directional messaging between the scheduler and handlers to pass // around tasks, by creating 2 channels (one for to-be-handled tasks from the scheduler to // the handlers and the other for finished tasks from the handlers to the scheduler). @@ -918,7 +1253,7 @@ impl, TH: TaskHandler> ThreadManager { // prioritization further. Consequently, this also contributes to alleviate the known // heuristic's caveat for the first task of linearized runs, which is described above. let (mut runnable_task_sender, runnable_task_receiver) = - chained_channel::unbounded::(context); + chained_channel::unbounded::(context.clone()); // Create two handler-to-scheduler channels to prioritize the finishing of blocked tasks, // because it is more likely that a blocked task will have more blocked tasks behind it, // which should be scheduled while minimizing the delay to clear buffered linearized runs @@ -938,7 +1273,7 @@ impl, TH: TaskHandler> ThreadManager { // 5. the handler thread reply back to the scheduler thread as an executed task. // 6. the scheduler thread post-processes the executed task. let scheduler_main_loop = { - let handler_count = self.pool.handler_count; + let banking_stage_context = banking_stage_context.clone(); let session_result_sender = self.session_result_sender.clone(); // Taking new_task_receiver here is important to ensure there's a single receiver. In // this way, the replay stage will get .send() failures reliably, after this scheduler @@ -949,6 +1284,13 @@ impl, TH: TaskHandler> ThreadManager { .expect("no 2nd start_threads()"); let mut session_ending = false; + let (mut session_pausing, mut is_finished) = + if matches!(context.mode(), SchedulingMode::BlockProduction) { + (true, true) + } else { + (false, false) + }; + let mut session_resetting = false; // Now, this is the main loop for the scheduler thread, which is a special beast. // @@ -999,7 +1341,9 @@ impl, TH: TaskHandler> ThreadManager { }; let mut state_machine = unsafe { - SchedulingStateMachine::exclusively_initialize_current_thread_for_scheduling() + SchedulingStateMachine::exclusively_initialize_current_thread_for_scheduling( + handler_count.checked_mul(2).unwrap().try_into().unwrap(), + ) }; // The following loop maintains and updates ResultWithTimings as its @@ -1009,14 +1353,13 @@ impl, TH: TaskHandler> ThreadManager { // 2. Subsequent result_with_timings are propagated explicitly from // the new_task_receiver.recv() invocation located at the end of loop. 'nonaborted_main_loop: loop { - let mut is_finished = false; while !is_finished { // ALL recv selectors are eager-evaluated ALWAYS by current crossbeam impl, // which isn't great and is inconsistent with `if`s in the Rust's match // arm. So, eagerly binding the result to a variable unconditionally here // makes no perf. difference... let dummy_unblocked_task_receiver = - dummy_receiver(state_machine.has_unblocked_task()); + dummy_receiver(state_machine.has_runnable_task() && !session_pausing); // There's something special called dummy_unblocked_task_receiver here. // This odd pattern was needed to react to newly unblocked tasks from @@ -1030,13 +1373,28 @@ impl, TH: TaskHandler> ThreadManager { // to measure _actual_ cpu usage easily with the select approach. select_biased! { recv(finished_blocked_task_receiver) -> executed_task => { - let Some(executed_task) = Self::accumulate_result_with_timings( + let Ok(executed_task) = executed_task else { + assert_matches!(context.mode(), SchedulingMode::BlockProduction); + break 'nonaborted_main_loop; + }; + + let Some((executed_task, should_pause)) = Self::accumulate_result_with_timings( + &context, &mut result_with_timings, - executed_task.expect("alive handler") + executed_task, ) else { break 'nonaborted_main_loop; }; state_machine.deschedule_task(&executed_task.task); + if should_pause && !session_ending { + let task = banking_stage_context.as_ref().unwrap().adapter.recreate_task( + executed_task.into_inner(), + ); + state_machine.do_schedule_task(task, true); + } + if should_pause && !session_pausing { + session_pausing = true; + } }, recv(dummy_unblocked_task_receiver) -> dummy => { assert_matches!(dummy, Err(RecvError)); @@ -1047,21 +1405,40 @@ impl, TH: TaskHandler> ThreadManager { runnable_task_sender.send_payload(task).unwrap(); }, recv(new_task_receiver) -> message => { - assert!(!session_ending); + assert!(context.mode() == SchedulingMode::BlockProduction || !session_ending); match message { Ok(NewTaskPayload::Payload(task)) => { + if session_ending { + continue; + } sleepless_testing::at(CheckPoint::NewTask(task.task_index())); if let Some(task) = state_machine.schedule_task(task) { runnable_task_sender.send_aux_payload(task).unwrap(); } } Ok(NewTaskPayload::CloseSubchannel) => { - session_ending = true; + match context.mode() { + SchedulingMode::BlockVerification => { + session_ending = true; + }, + SchedulingMode::BlockProduction => { + if !session_pausing { + session_pausing = true; + } else { + info!("ignoring duplicate close subch"); + continue; + } + }, + } + } + Ok(NewTaskPayload::Reset) => { + session_pausing = true; + session_resetting = true; } Ok(NewTaskPayload::OpenSubchannel(_context_and_result_with_timings)) => unreachable!(), - Err(RecvError) => { + Ok(NewTaskPayload::Disconnect) | Err(RecvError) => { // Mostly likely is that this scheduler is dropped for pruned blocks of // abandoned forks... // This short-circuiting is tested with test_scheduler_drop_short_circuiting. @@ -1070,48 +1447,93 @@ impl, TH: TaskHandler> ThreadManager { } }, recv(finished_idle_task_receiver) -> executed_task => { - let Some(executed_task) = Self::accumulate_result_with_timings( + let Some((executed_task, should_pause)) = Self::accumulate_result_with_timings( + &context, &mut result_with_timings, - executed_task.expect("alive handler") + executed_task.expect("alive handler"), ) else { break 'nonaborted_main_loop; }; state_machine.deschedule_task(&executed_task.task); + if should_pause && !session_ending { + let task = banking_stage_context.as_ref().unwrap().adapter.recreate_task( + executed_task.into_inner(), + ); + state_machine.do_schedule_task(task, true); + } + if should_pause && !session_pausing { + session_pausing = true; + } }, }; - is_finished = session_ending && state_machine.has_no_active_task(); + is_finished = session_ending && state_machine.has_no_active_task() + || session_pausing && state_machine.has_no_executing_task(); } + assert!(mem::replace(&mut is_finished, false)); // Finalize the current session after asserting it's explicitly requested so. - assert!(session_ending); + assert!(session_ending || session_pausing); // Send result first because this is blocking the replay code-path. session_result_sender .send(result_with_timings) .expect("always outlived receiver"); - state_machine.reinitialize(); - session_ending = false; // Prepare for the new session. - match new_task_receiver.recv() { - Ok(NewTaskPayload::OpenSubchannel(context_and_result_with_timings)) => { - let (new_context, new_result_with_timings) = - *context_and_result_with_timings; - // We just received subsequent (= not initial) session and about to - // enter into the preceding `while(!is_finished) {...}` loop again. - // Before that, propagate new SchedulingContext to handler threads - runnable_task_sender - .send_chained_channel(new_context, handler_count) - .unwrap(); - result_with_timings = new_result_with_timings; + loop { + if session_resetting { + while let Some(task) = state_machine.schedule_next_unblocked_task() { + state_machine.deschedule_task(&task); + drop(task); + } + session_resetting = false; } - Err(_) => { - // This unusual condition must be triggered by ThreadManager::drop(). - // Initialize result_with_timings with a harmless value... - result_with_timings = initialized_result_with_timings(); - break 'nonaborted_main_loop; + match new_task_receiver.recv() { + Ok(NewTaskPayload::OpenSubchannel(context_and_result_with_timings)) => { + let (new_context, new_result_with_timings) = + *context_and_result_with_timings; + // We just received subsequent (= not initial) session and about to + // enter into the preceding `while(!is_finished) {...}` loop again. + // Before that, propagate new SchedulingContext to handler threads + assert_eq!(context.mode(), new_context.mode()); + + if session_ending { + state_machine.reinitialize(); + session_ending = false; + } else { + session_pausing = false; + } + + runnable_task_sender + .send_chained_channel(new_context.clone(), handler_count) + .unwrap(); + context = new_context; + result_with_timings = new_result_with_timings; + break; + } + Ok(NewTaskPayload::CloseSubchannel) + if matches!(context.mode(), SchedulingMode::BlockProduction) => + { + info!("ignoring duplicate CloseSubchannel..."); + } + Ok(NewTaskPayload::Reset) + if matches!(context.mode(), SchedulingMode::BlockProduction) => + { + session_resetting = true; + } + Ok(NewTaskPayload::Payload(task)) + if matches!(context.mode(), SchedulingMode::BlockProduction) => + { + assert!(state_machine.do_schedule_task(task, true).is_none()); + } + Ok(NewTaskPayload::Disconnect) | Err(_) => { + // This unusual condition must be triggered by ThreadManager::drop(). + // Initialize result_with_timings with a harmless value... + result_with_timings = initialized_result_with_timings(); + break 'nonaborted_main_loop; + } + Ok(_) => unreachable!(), } - Ok(_) => unreachable!(), } } @@ -1139,6 +1561,9 @@ impl, TH: TaskHandler> ThreadManager { }; let handler_main_loop = || { + let banking_stage_context = banking_stage_context.clone(); + let new_task_sender = Arc::downgrade(&self.new_task_sender); + let pool = self.pool.clone(); let mut runnable_task_receiver = runnable_task_receiver.clone(); let finished_blocked_task_sender = finished_blocked_task_sender.clone(); @@ -1152,69 +1577,101 @@ impl, TH: TaskHandler> ThreadManager { // 2. Subsequent contexts are propagated explicitly inside `.after_select()` as part of // `select_biased!`, which are sent from `.send_chained_channel()` in the scheduler // thread for all-but-initial sessions. - move || loop { - let (task, sender) = select_biased! { - recv(runnable_task_receiver.for_select()) -> message => { - let Ok(message) = message else { - break; - }; - if let Some(task) = runnable_task_receiver.after_select(message) { - (task, &finished_blocked_task_sender) - } else { + move || { + let banking_packet_receiver = if let Some(b) = banking_stage_context.as_ref() { + &b.banking_packet_receiver + } else { + &never() + }; + + loop { + let (task, sender) = select_biased! { + recv(runnable_task_receiver.for_select()) -> message => { + let Ok(message) = message else { + break; + }; + if let Some(task) = runnable_task_receiver.after_select(message) { + (task, &finished_blocked_task_sender) + } else { + continue; + } + }, + recv(runnable_task_receiver.aux_for_select()) -> task => { + if let Ok(task) = task { + (task, &finished_idle_task_sender) + } else { + runnable_task_receiver.never_receive_from_aux(); + continue; + } + }, + recv(banking_packet_receiver) -> banking_packet => { + let Some(new_task_sender) = new_task_sender.upgrade() else { + info!("dead new_task_sender"); + break; + }; + + let Ok(banking_packet) = banking_packet else { + info!("disconnected banking_packet_receiver"); + let current_thread = thread::current(); + if new_task_sender.send(NewTaskPayload::Disconnect).is_ok() { + info!("notified a disconnect from {:?}", current_thread); + } else { + // It seems that the scheduler thread has been aborted already... + warn!("failed to notify a disconnect from {:?}", current_thread); + } + break; + }; + (banking_stage_context.as_ref().unwrap().on_banking_packet_receive)(banking_packet, &move |task| { + new_task_sender + .send(NewTaskPayload::Payload(task)) + .unwrap(); + }); continue; + }, + }; + defer! { + if !thread::panicking() { + return; } - }, - recv(runnable_task_receiver.aux_for_select()) -> task => { - if let Ok(task) = task { - (task, &finished_idle_task_sender) + + // The scheduler thread can't detect panics in handler threads with + // disconnected channel errors, unless all of them has died. So, send an + // explicit Err promptly. + let current_thread = thread::current(); + error!("handler thread is panicking: {:?}", current_thread); + if sender.send(Err(HandlerPanicked)).is_ok() { + info!("notified a panic from {:?}", current_thread); } else { - runnable_task_receiver.never_receive_from_aux(); - continue; + // It seems that the scheduler thread has been aborted already... + warn!("failed to notify a panic from {:?}", current_thread); } - }, - }; - defer! { - if !thread::panicking() { - return; } - - // The scheduler thread can't detect panics in handler threads with - // disconnected channel errors, unless all of them has died. So, send an - // explicit Err promptly. - let current_thread = thread::current(); - error!("handler thread is panicking: {:?}", current_thread); - if sender.send(Err(HandlerPanicked)).is_ok() { - info!("notified a panic from {:?}", current_thread); - } else { - // It seems that the scheduler thread has been aborted already... - warn!("failed to notify a panic from {:?}", current_thread); + let mut task = ExecutedTask::new_boxed(task); + Self::execute_task_with_handler( + runnable_task_receiver.context(), + &mut task, + &pool.handler_context, + ); + if sender.send(Ok(task)).is_err() { + warn!("handler_thread: scheduler thread aborted..."); + break; } } - let mut task = ExecutedTask::new_boxed(task); - Self::execute_task_with_handler( - runnable_task_receiver.context().bank(), - &mut task, - &pool.handler_context, - ); - if sender.send(Ok(task)).is_err() { - warn!("handler_thread: scheduler thread aborted..."); - break; - } } }; self.scheduler_thread = Some( thread::Builder::new() - .name("solScheduler".to_owned()) + .name(format!("solSchedule{postfix}")) .spawn_tracked(scheduler_main_loop) .unwrap(), ); - self.handler_threads = (0..self.pool.handler_count) + self.handler_threads = (0..handler_count) .map({ |thx| { thread::Builder::new() - .name(format!("solScHandler{:02}", thx)) + .name(format!("solScHandle{postfix}{:02}", thx)) .spawn_tracked(handler_main_loop()) .unwrap() } @@ -1266,17 +1723,8 @@ impl, TH: TaskHandler> ThreadManager { }; } - fn ensure_join_threads_after_abort( - &mut self, - should_receive_aborted_session_result: bool, - ) -> TransactionError { + fn ensure_join_threads_after_abort(&mut self, should_receive_aborted_session_result: bool) { self.ensure_join_threads(should_receive_aborted_session_result); - self.session_result_with_timings - .as_mut() - .unwrap() - .0 - .clone() - .unwrap_err() } fn are_threads_joined(&self) -> bool { @@ -1290,7 +1738,7 @@ impl, TH: TaskHandler> ThreadManager { } } - fn end_session(&mut self) { + fn do_end_session(&mut self, nonblocking: bool) { if self.are_threads_joined() { assert!(self.session_result_with_timings.is_some()); debug!("end_session(): skipping; already joined the aborted threads.."); @@ -1314,6 +1762,10 @@ impl, TH: TaskHandler> ThreadManager { return; } + if nonblocking { + return; + } + // Even if abort is detected, it's guaranteed that the scheduler thread puts the last // message into the session_result_sender before terminating. let result_with_timings = self.session_result_receiver.recv().unwrap(); @@ -1325,6 +1777,10 @@ impl, TH: TaskHandler> ThreadManager { debug!("end_session(): ended session at {:?}...", thread::current()); } + fn end_session(&mut self) { + self.do_end_session(false) + } + fn start_session( &mut self, context: SchedulingContext, @@ -1339,10 +1795,21 @@ impl, TH: TaskHandler> ThreadManager { )))) .expect("no new session after aborted"); } + + fn disconnect_new_task_sender(&mut self) { + self.new_task_sender = Arc::new(crossbeam_channel::unbounded().0); + } +} + +pub trait SchedulerInner { + fn id(&self) -> SchedulerId; + fn is_overgrown(&self) -> bool; + fn reset(&self); + fn ensure_abort(&mut self); } pub trait SpawnableScheduler: InstalledScheduler { - type Inner: Debug + Send + Sync; + type Inner: SchedulerInner + Debug + Send + Sync; fn into_inner(self) -> (ResultWithTimings, Self::Inner); @@ -1353,9 +1820,11 @@ pub trait SpawnableScheduler: InstalledScheduler { ) -> Self; fn spawn( + handler_count: usize, pool: Arc>, context: SchedulingContext, result_with_timings: ResultWithTimings, + banking_stage_context: Option, ) -> Self where Self: Sized; @@ -1385,21 +1854,82 @@ impl SpawnableScheduler for PooledScheduler { } fn spawn( + handler_count: usize, pool: Arc>, context: SchedulingContext, result_with_timings: ResultWithTimings, + banking_stage_context: Option, ) -> Self { + info!("spawning new scheduler for slot: {:?}", context.slot()); + let task_creator = match context.mode() { + SchedulingMode::BlockVerification => TaskCreator::BlockVerification { + usage_queue_loader: UsageQueueLoader::default(), + }, + SchedulingMode::BlockProduction => TaskCreator::BlockProduction { + banking_stage_adapter: banking_stage_context.as_ref().unwrap().adapter.clone(), + }, + }; let mut inner = Self::Inner { thread_manager: ThreadManager::new(pool), - usage_queue_loader: UsageQueueLoader::default(), + task_creator, }; - inner - .thread_manager - .start_threads(context.clone(), result_with_timings); + inner.thread_manager.start_threads( + handler_count, + context.clone(), + result_with_timings, + banking_stage_context, + ); Self { inner, context } } } +#[derive(Debug)] +pub enum BankingStageStatus { + Active, + Inactive, + Exited, +} + +pub trait BankingStageMonitor: Send + Debug { + fn status(&self) -> BankingStageStatus; +} + +#[derive(Debug, Default)] +pub struct BankingStageAdapter { + usage_queue_loader: UsageQueueLoader, + next_task_id: AtomicUsize, +} + +impl BankingStageAdapter { + pub fn generate_task_ids(&self, count: usize) -> usize { + self.next_task_id.fetch_add(count, Relaxed) + } + + fn do_create_task( + &self, + transaction: RuntimeTransaction, + index: usize, + ) -> Task { + SchedulingStateMachine::do_create_task(transaction, index, &mut |pubkey| { + self.usage_queue_loader.load(pubkey) + }) + } + + pub fn create_new_task( + &self, + transaction: RuntimeTransaction, + index: usize, + ) -> Task { + self.do_create_task(transaction, index) + } + + fn recreate_task(&self, task: Task) -> Task { + let new_index = self.generate_task_ids(1); + let transaction = task.into_transaction(); + self.do_create_task(transaction, new_index) + } +} + impl InstalledScheduler for PooledScheduler { fn id(&self) -> SchedulerId { self.inner.id() @@ -1414,8 +1944,9 @@ impl InstalledScheduler for PooledScheduler { transaction: RuntimeTransaction, index: usize, ) -> ScheduleResult { + assert_matches!(self.context().mode(), SchedulingMode::BlockVerification); let task = SchedulingStateMachine::create_task(transaction, index, &mut |pubkey| { - self.inner.usage_queue_loader.load(pubkey) + self.inner.task_creator.usage_queue_loader().load(pubkey) }); self.inner.thread_manager.send_task(task) } @@ -1423,7 +1954,15 @@ impl InstalledScheduler for PooledScheduler { fn recover_error_after_abort(&mut self) -> TransactionError { self.inner .thread_manager - .ensure_join_threads_after_abort(true) + .ensure_join_threads_after_abort(true); + self.inner + .thread_manager + .session_result_with_timings + .as_mut() + .unwrap() + .0 + .clone() + .unwrap_err() } fn wait_for_termination( @@ -1435,13 +1974,16 @@ impl InstalledScheduler for PooledScheduler { } fn pause_for_recent_blockhash(&mut self) { - self.inner.thread_manager.end_session(); + // this fn is called from poh thread, while it's being locked. so, we can't wait scheduler + // termination here to avoid deadlock. just async signaling is enough + let nonblocking = matches!(self.context().mode(), SchedulingMode::BlockProduction); + self.inner.thread_manager.do_end_session(nonblocking); } } impl UninstalledScheduler for PooledSchedulerInner where - S: SpawnableScheduler>, + S: SpawnableScheduler, TH: TaskHandler, { fn return_to_pool(self: Box) { @@ -1458,6 +2000,38 @@ where } } +impl SchedulerInner for PooledSchedulerInner +where + S: SpawnableScheduler, + TH: TaskHandler, +{ + fn id(&self) -> SchedulerId { + self.thread_manager.scheduler_id + } + + fn is_overgrown(&self) -> bool { + self.task_creator + .is_overgrown(self.thread_manager.pool.max_usage_queue_count) + } + + fn reset(&self) { + if let Err(a) = self + .thread_manager + .new_task_sender + .send(NewTaskPayload::Reset) + { + warn!("failed to send a reset due to error: {a:?}"); + } + } + + fn ensure_abort(&mut self) { + if self.thread_manager.are_threads_joined() { + return; + } + self.thread_manager.disconnect_new_task_sender() + } +} + #[cfg(test)] mod tests { use { @@ -1468,7 +2042,9 @@ mod tests { bank::Bank, bank_forks::BankForks, genesis_utils::{create_genesis_config, GenesisConfigInfo}, - installed_scheduler_pool::{BankWithScheduler, SchedulingContext}, + installed_scheduler_pool::{ + BankWithScheduler, InstalledSchedulerPoolArc, SchedulingContext, + }, prioritization_fee_cache::PrioritizationFeeCache, }, solana_sdk::{ @@ -1485,6 +2061,58 @@ mod tests { }, }; + impl SchedulerPool + where + S: SpawnableScheduler, + TH: TaskHandler, + { + fn do_new_for_verification( + handler_count: Option, + log_messages_bytes_limit: Option, + transaction_status_sender: Option, + replay_vote_sender: Option, + prioritization_fee_cache: Arc, + pool_cleaner_interval: Duration, + max_pooling_duration: Duration, + max_usage_queue_count: usize, + timeout_duration: Duration, + ) -> Arc { + Self::do_new( + SupportedSchedulingMode::block_verification_only(), + handler_count, + log_messages_bytes_limit, + transaction_status_sender, + replay_vote_sender, + prioritization_fee_cache, + TransactionRecorder::new_dummy(), + pool_cleaner_interval, + max_pooling_duration, + max_usage_queue_count, + timeout_duration, + ) + } + + // This apparently-meaningless wrapper is handy, because some callers explicitly want + // `dyn InstalledSchedulerPool` to be returned for type inference convenience. + fn new_dyn_for_verification( + handler_count: Option, + log_messages_bytes_limit: Option, + transaction_status_sender: Option, + replay_vote_sender: Option, + prioritization_fee_cache: Arc, + ) -> InstalledSchedulerPoolArc { + Self::new( + SupportedSchedulingMode::block_verification_only(), + handler_count, + log_messages_bytes_limit, + transaction_status_sender, + replay_vote_sender, + prioritization_fee_cache, + TransactionRecorder::new_dummy(), + ) + } + } + #[derive(Debug)] enum TestCheckPoint { BeforeNewTask, @@ -1505,8 +2133,13 @@ mod tests { solana_logger::setup(); let ignored_prioritization_fee_cache = Arc::new(PrioritizationFeeCache::new(0u64)); - let pool = - DefaultSchedulerPool::new_dyn(None, None, None, None, ignored_prioritization_fee_cache); + let pool = DefaultSchedulerPool::new_dyn_for_verification( + None, + None, + None, + None, + ignored_prioritization_fee_cache, + ); // this indirectly proves that there should be circular link because there's only one Arc // at this moment now @@ -1521,11 +2154,16 @@ mod tests { solana_logger::setup(); let ignored_prioritization_fee_cache = Arc::new(PrioritizationFeeCache::new(0u64)); - let pool = - DefaultSchedulerPool::new_dyn(None, None, None, None, ignored_prioritization_fee_cache); + let pool = DefaultSchedulerPool::new_dyn_for_verification( + None, + None, + None, + None, + ignored_prioritization_fee_cache, + ); let bank = Arc::new(Bank::default_for_tests()); - let context = SchedulingContext::new(bank); - let scheduler = pool.take_scheduler(context); + let context = SchedulingContext::for_verification(bank); + let scheduler = pool.take_scheduler(context).unwrap(); let debug = format!("{scheduler:#?}"); assert!(!debug.is_empty()); @@ -1546,7 +2184,7 @@ mod tests { ]); let ignored_prioritization_fee_cache = Arc::new(PrioritizationFeeCache::new(0u64)); - let pool_raw = DefaultSchedulerPool::do_new( + let pool_raw = DefaultSchedulerPool::do_new_for_verification( None, None, None, @@ -1559,7 +2197,7 @@ mod tests { ); let pool = pool_raw.clone(); let bank = Arc::new(Bank::default_for_tests()); - let context1 = SchedulingContext::new(bank); + let context1 = SchedulingContext::for_verification(bank); let context2 = context1.clone(); let old_scheduler = pool.do_take_scheduler(context1); @@ -1611,7 +2249,7 @@ mod tests { let ignored_prioritization_fee_cache = Arc::new(PrioritizationFeeCache::new(0u64)); const REDUCED_MAX_USAGE_QUEUE_COUNT: usize = 1; - let pool_raw = DefaultSchedulerPool::do_new( + let pool_raw = DefaultSchedulerPool::do_new_for_verification( None, None, None, @@ -1624,7 +2262,7 @@ mod tests { ); let pool = pool_raw.clone(); let bank = Arc::new(Bank::default_for_tests()); - let context1 = SchedulingContext::new(bank); + let context1 = SchedulingContext::for_verification(bank); let context2 = context1.clone(); let small_scheduler = pool.do_take_scheduler(context1); @@ -1632,14 +2270,16 @@ mod tests { for _ in 0..REDUCED_MAX_USAGE_QUEUE_COUNT { small_scheduler .inner - .usage_queue_loader + .task_creator + .usage_queue_loader() .load(Pubkey::new_unique()); } let big_scheduler = pool.do_take_scheduler(context2); for _ in 0..REDUCED_MAX_USAGE_QUEUE_COUNT + 1 { big_scheduler .inner - .usage_queue_loader + .task_creator + .usage_queue_loader() .load(Pubkey::new_unique()); } @@ -1686,7 +2326,7 @@ mod tests { ]); let ignored_prioritization_fee_cache = Arc::new(PrioritizationFeeCache::new(0u64)); - let pool_raw = DefaultSchedulerPool::do_new( + let pool_raw = DefaultSchedulerPool::do_new_for_verification( None, None, None, @@ -1699,8 +2339,8 @@ mod tests { ); let pool = pool_raw.clone(); let bank = Arc::new(Bank::default_for_tests()); - let context = SchedulingContext::new(bank.clone()); - let scheduler = pool.take_scheduler(context); + let context = SchedulingContext::for_verification(bank.clone()); + let scheduler = pool.take_scheduler(context).unwrap(); let bank = BankWithScheduler::new(bank, Some(scheduler)); pool.register_timeout_listener(bank.create_timeout_listener()); assert_eq!(pool_raw.scheduler_inners.lock().unwrap().len(), 0); @@ -1734,17 +2374,18 @@ mod tests { ]); let ignored_prioritization_fee_cache = Arc::new(PrioritizationFeeCache::new(0u64)); - let pool_raw = SchedulerPool::, _>::do_new( - None, - None, - None, - None, - ignored_prioritization_fee_cache, - SHORTENED_POOL_CLEANER_INTERVAL, - DEFAULT_MAX_POOLING_DURATION, - DEFAULT_MAX_USAGE_QUEUE_COUNT, - SHORTENED_TIMEOUT_DURATION, - ); + let pool_raw = + SchedulerPool::, _>::do_new_for_verification( + None, + None, + None, + None, + ignored_prioritization_fee_cache, + SHORTENED_POOL_CLEANER_INTERVAL, + DEFAULT_MAX_POOLING_DURATION, + DEFAULT_MAX_USAGE_QUEUE_COUNT, + SHORTENED_TIMEOUT_DURATION, + ); #[derive(Debug)] struct ExecuteTimingCounter; @@ -1752,9 +2393,8 @@ mod tests { fn handle( _result: &mut Result<()>, timings: &mut ExecuteTimings, - _bank: &Arc, - _transaction: &RuntimeTransaction, - _index: usize, + _bank: &SchedulingContext, + _task: &Task, _handler_context: &HandlerContext, ) { timings.metrics[ExecuteTimingType::CheckUs] += 123; @@ -1770,9 +2410,9 @@ mod tests { let bank = Bank::new_for_tests(&genesis_config); let (bank, _bank_forks) = setup_dummy_fork_graph(bank); - let context = SchedulingContext::new(bank.clone()); + let context = SchedulingContext::for_verification(bank.clone()); - let scheduler = pool.take_scheduler(context); + let scheduler = pool.take_scheduler(context).unwrap(); let bank = BankWithScheduler::new(bank, Some(scheduler)); pool.register_timeout_listener(bank.create_timeout_listener()); @@ -1821,7 +2461,7 @@ mod tests { ]); let ignored_prioritization_fee_cache = Arc::new(PrioritizationFeeCache::new(0u64)); - let pool_raw = DefaultSchedulerPool::do_new( + let pool_raw = DefaultSchedulerPool::do_new_for_verification( None, None, None, @@ -1838,9 +2478,9 @@ mod tests { let bank = Bank::new_for_tests(&genesis_config); let (bank, _bank_forks) = setup_dummy_fork_graph(bank); - let context = SchedulingContext::new(bank.clone()); + let context = SchedulingContext::for_verification(bank.clone()); - let scheduler = pool.take_scheduler(context); + let scheduler = pool.take_scheduler(context).unwrap(); let bank = BankWithScheduler::new(bank, Some(scheduler)); pool.register_timeout_listener(bank.create_timeout_listener()); @@ -1868,7 +2508,7 @@ mod tests { ]); let ignored_prioritization_fee_cache = Arc::new(PrioritizationFeeCache::new(0u64)); - let pool_raw = SchedulerPool::, _>::do_new( + let pool_raw = SchedulerPool::, _>::do_new_for_verification( None, None, None, @@ -1890,9 +2530,9 @@ mod tests { let bank = Bank::new_for_tests(&genesis_config); let (bank, _bank_forks) = setup_dummy_fork_graph(bank); - let context = SchedulingContext::new(bank.clone()); + let context = SchedulingContext::for_verification(bank.clone()); - let scheduler = pool.take_scheduler(context); + let scheduler = pool.take_scheduler(context).unwrap(); let bank = BankWithScheduler::new(bank, Some(scheduler)); pool.register_timeout_listener(bank.create_timeout_listener()); @@ -1935,9 +2575,8 @@ mod tests { fn handle( result: &mut Result<()>, _timings: &mut ExecuteTimings, - _bank: &Arc, - _transaction: &RuntimeTransaction, - _index: usize, + _bank: &SchedulingContext, + _task: &Task, _handler_context: &HandlerContext, ) { *result = Err(TransactionError::AccountNotFound); @@ -1971,14 +2610,14 @@ mod tests { let bank = Bank::new_for_tests(&genesis_config); let (bank, _bank_forks) = setup_dummy_fork_graph(bank); let ignored_prioritization_fee_cache = Arc::new(PrioritizationFeeCache::new(0u64)); - let pool = SchedulerPool::, _>::new( + let pool = SchedulerPool::, _>::new_for_verification( None, None, None, None, ignored_prioritization_fee_cache, ); - let context = SchedulingContext::new(bank.clone()); + let context = SchedulingContext::for_verification(bank.clone()); let scheduler = pool.do_take_scheduler(context); scheduler.schedule_execution(tx, 0).unwrap(); @@ -2046,9 +2685,8 @@ mod tests { fn handle( _result: &mut Result<()>, _timings: &mut ExecuteTimings, - _bank: &Arc, - _transaction: &RuntimeTransaction, - _index: usize, + _bank: &SchedulingContext, + _task: &Task, _handler_context: &HandlerContext, ) { *TASK_COUNT.lock().unwrap() += 1; @@ -2064,14 +2702,14 @@ mod tests { let bank = Bank::new_for_tests(&genesis_config); let (bank, _bank_forks) = setup_dummy_fork_graph(bank); let ignored_prioritization_fee_cache = Arc::new(PrioritizationFeeCache::new(0u64)); - let pool = SchedulerPool::, _>::new( + let pool = SchedulerPool::, _>::new_for_verification( None, None, None, None, ignored_prioritization_fee_cache, ); - let context = SchedulingContext::new(bank.clone()); + let context = SchedulingContext::for_verification(bank.clone()); let scheduler = pool.do_take_scheduler(context); // This test is racy. @@ -2104,10 +2742,15 @@ mod tests { solana_logger::setup(); let ignored_prioritization_fee_cache = Arc::new(PrioritizationFeeCache::new(0u64)); - let pool = - DefaultSchedulerPool::new(None, None, None, None, ignored_prioritization_fee_cache); + let pool = DefaultSchedulerPool::new_for_verification( + None, + None, + None, + None, + ignored_prioritization_fee_cache, + ); let bank = Arc::new(Bank::default_for_tests()); - let context = &SchedulingContext::new(bank); + let context = &SchedulingContext::for_verification(bank); let scheduler1 = pool.do_take_scheduler(context.clone()); let scheduler_id1 = scheduler1.id(); @@ -2133,10 +2776,15 @@ mod tests { solana_logger::setup(); let ignored_prioritization_fee_cache = Arc::new(PrioritizationFeeCache::new(0u64)); - let pool = - DefaultSchedulerPool::new(None, None, None, None, ignored_prioritization_fee_cache); + let pool = DefaultSchedulerPool::new_for_verification( + None, + None, + None, + None, + ignored_prioritization_fee_cache, + ); let bank = Arc::new(Bank::default_for_tests()); - let context = &SchedulingContext::new(bank); + let context = &SchedulingContext::for_verification(bank); let mut scheduler = pool.do_take_scheduler(context.clone()); // should never panic. @@ -2152,20 +2800,25 @@ mod tests { solana_logger::setup(); let ignored_prioritization_fee_cache = Arc::new(PrioritizationFeeCache::new(0u64)); - let pool = - DefaultSchedulerPool::new(None, None, None, None, ignored_prioritization_fee_cache); + let pool = DefaultSchedulerPool::new_for_verification( + None, + None, + None, + None, + ignored_prioritization_fee_cache, + ); let old_bank = &Arc::new(Bank::default_for_tests()); let new_bank = &Arc::new(Bank::default_for_tests()); assert!(!Arc::ptr_eq(old_bank, new_bank)); - let old_context = &SchedulingContext::new(old_bank.clone()); - let new_context = &SchedulingContext::new(new_bank.clone()); + let old_context = &SchedulingContext::for_verification(old_bank.clone()); + let new_context = &SchedulingContext::for_verification(new_bank.clone()); let scheduler = pool.do_take_scheduler(old_context.clone()); let scheduler_id = scheduler.id(); pool.return_scheduler(scheduler.into_inner().1, false); - let scheduler = pool.take_scheduler(new_context.clone()); + let scheduler = pool.take_scheduler(new_context.clone()).unwrap(); assert_eq!(scheduler_id, scheduler.id()); assert!(Arc::ptr_eq(scheduler.context().bank(), new_bank)); } @@ -2178,8 +2831,13 @@ mod tests { let bank_forks = BankForks::new_rw_arc(bank); let mut bank_forks = bank_forks.write().unwrap(); let ignored_prioritization_fee_cache = Arc::new(PrioritizationFeeCache::new(0u64)); - let pool = - DefaultSchedulerPool::new_dyn(None, None, None, None, ignored_prioritization_fee_cache); + let pool = DefaultSchedulerPool::new_dyn_for_verification( + None, + None, + None, + None, + ignored_prioritization_fee_cache, + ); bank_forks.install_scheduler_pool(pool); } @@ -2192,8 +2850,13 @@ mod tests { let child_bank = Bank::new_from_parent(bank, &Pubkey::default(), 1); let ignored_prioritization_fee_cache = Arc::new(PrioritizationFeeCache::new(0u64)); - let pool = - DefaultSchedulerPool::new_dyn(None, None, None, None, ignored_prioritization_fee_cache); + let pool = DefaultSchedulerPool::new_dyn_for_verification( + None, + None, + None, + None, + ignored_prioritization_fee_cache, + ); let bank = Bank::default_for_tests(); let bank_forks = BankForks::new_rw_arc(bank); @@ -2242,12 +2905,17 @@ mod tests { let bank = Bank::new_for_tests(&genesis_config); let (bank, _bank_forks) = setup_dummy_fork_graph(bank); let ignored_prioritization_fee_cache = Arc::new(PrioritizationFeeCache::new(0u64)); - let pool = - DefaultSchedulerPool::new_dyn(None, None, None, None, ignored_prioritization_fee_cache); - let context = SchedulingContext::new(bank.clone()); + let pool = DefaultSchedulerPool::new_dyn_for_verification( + None, + None, + None, + None, + ignored_prioritization_fee_cache, + ); + let context = SchedulingContext::for_verification(bank.clone()); assert_eq!(bank.transaction_count(), 0); - let scheduler = pool.take_scheduler(context); + let scheduler = pool.take_scheduler(context).unwrap(); scheduler.schedule_execution(tx0, 0).unwrap(); let bank = BankWithScheduler::new(bank, Some(scheduler)); assert_matches!(bank.wait_for_completed_scheduler(), Some((Ok(()), _))); @@ -2277,7 +2945,7 @@ mod tests { let (bank, _bank_forks) = setup_dummy_fork_graph(bank); let ignored_prioritization_fee_cache = Arc::new(PrioritizationFeeCache::new(0u64)); - let pool_raw = DefaultSchedulerPool::do_new( + let pool_raw = DefaultSchedulerPool::do_new_for_verification( None, None, None, @@ -2289,8 +2957,8 @@ mod tests { DEFAULT_TIMEOUT_DURATION, ); let pool = pool_raw.clone(); - let context = SchedulingContext::new(bank.clone()); - let scheduler = pool.take_scheduler(context); + let context = SchedulingContext::for_verification(bank.clone()); + let scheduler = pool.take_scheduler(context).unwrap(); let unfunded_keypair = Keypair::new(); let bad_tx = RuntimeTransaction::from_transaction_for_tests(system_transaction::transfer( @@ -2383,11 +3051,11 @@ mod tests { fn handle( _result: &mut Result<()>, _timings: &mut ExecuteTimings, - _bank: &Arc, - _transaction: &RuntimeTransaction, - index: usize, + _bank: &SchedulingContext, + task: &Task, _handler_context: &HandlerContext, ) { + let index = task.task_index(); if index == 0 { sleepless_testing::at(PanickingHanlderCheckPoint::BeforeNotifiedPanic); } else if index == 1 { @@ -2410,16 +3078,16 @@ mod tests { const TX_COUNT: usize = 2; let ignored_prioritization_fee_cache = Arc::new(PrioritizationFeeCache::new(0u64)); - let pool = SchedulerPool::, _>::new_dyn( + let pool = SchedulerPool::, _>::new_dyn_for_verification( Some(TX_COUNT), // fix to use exactly 2 handlers None, None, None, ignored_prioritization_fee_cache, ); - let context = SchedulingContext::new(bank.clone()); + let context = SchedulingContext::for_verification(bank.clone()); - let scheduler = pool.take_scheduler(context); + let scheduler = pool.take_scheduler(context).unwrap(); for index in 0..TX_COUNT { // Use 2 non-conflicting txes to exercise the channel disconnected case as well. @@ -2463,11 +3131,11 @@ mod tests { fn handle( result: &mut Result<()>, _timings: &mut ExecuteTimings, - _bank: &Arc, - _transaction: &RuntimeTransaction, - index: usize, + _bank: &SchedulingContext, + task: &Task, _handler_context: &HandlerContext, ) { + let index = task.task_index(); *TASK_COUNT.lock().unwrap() += 1; if index == 1 { *result = Err(TransactionError::AccountNotFound); @@ -2485,14 +3153,14 @@ mod tests { let bank = Bank::new_for_tests(&genesis_config); let (bank, _bank_forks) = setup_dummy_fork_graph(bank); let ignored_prioritization_fee_cache = Arc::new(PrioritizationFeeCache::new(0u64)); - let pool = SchedulerPool::, _>::new( + let pool = SchedulerPool::, _>::new_for_verification( None, None, None, None, ignored_prioritization_fee_cache, ); - let context = SchedulingContext::new(bank.clone()); + let context = SchedulingContext::for_verification(bank.clone()); let scheduler = pool.do_take_scheduler(context); for i in 0..10 { @@ -2532,24 +3200,17 @@ mod tests { fn handle( result: &mut Result<()>, timings: &mut ExecuteTimings, - bank: &Arc, - transaction: &RuntimeTransaction, - index: usize, + bank: &SchedulingContext, + task: &Task, handler_context: &HandlerContext, ) { + let index = task.task_index(); match index { STALLED_TRANSACTION_INDEX => *LOCK_TO_STALL.lock().unwrap(), BLOCKED_TRANSACTION_INDEX => {} _ => unreachable!(), }; - DefaultTaskHandler::handle( - result, - timings, - bank, - transaction, - index, - handler_context, - ); + DefaultTaskHandler::handle(result, timings, bank, task, handler_context); } } @@ -2576,17 +3237,17 @@ mod tests { let bank = Bank::new_for_tests(&genesis_config); let (bank, _bank_forks) = setup_dummy_fork_graph(bank); let ignored_prioritization_fee_cache = Arc::new(PrioritizationFeeCache::new(0u64)); - let pool = SchedulerPool::, _>::new_dyn( + let pool = SchedulerPool::, _>::new_dyn_for_verification( None, None, None, None, ignored_prioritization_fee_cache, ); - let context = SchedulingContext::new(bank.clone()); + let context = SchedulingContext::for_verification(bank.clone()); assert_eq!(bank.transaction_count(), 0); - let scheduler = pool.take_scheduler(context); + let scheduler = pool.take_scheduler(context).unwrap(); // Stall handling tx0 and tx1 let lock_to_stall = LOCK_TO_STALL.lock().unwrap(); @@ -2617,13 +3278,12 @@ mod tests { fn handle( _result: &mut Result<()>, _timings: &mut ExecuteTimings, - bank: &Arc, - _transaction: &RuntimeTransaction, - index: usize, + context: &SchedulingContext, + task: &Task, _handler_context: &HandlerContext, ) { // The task index must always be matched to the slot. - assert_eq!(index as Slot, bank.slot()); + assert_eq!(task.task_index() as Slot, context.bank().slot()); } } @@ -2643,7 +3303,7 @@ mod tests { )); let ignored_prioritization_fee_cache = Arc::new(PrioritizationFeeCache::new(0u64)); - let pool = SchedulerPool::, _>::new( + let pool = SchedulerPool::, _>::new_for_verification( Some(4), // spawn 4 threads None, None, @@ -2659,8 +3319,8 @@ mod tests { 2, genesis_config.hash(), )); - let context0 = &SchedulingContext::new(bank0.clone()); - let context1 = &SchedulingContext::new(bank1.clone()); + let context0 = &SchedulingContext::for_verification(bank0.clone()); + let context1 = &SchedulingContext::for_verification(bank1.clone()); // Exercise the scheduler by busy-looping to expose the race condition for (context, index) in [(context0, 0), (context1, 1)] @@ -2668,7 +3328,7 @@ mod tests { .cycle() .take(10000) { - let scheduler = pool.take_scheduler(context.clone()); + let scheduler = pool.take_scheduler(context.clone()).unwrap(); scheduler .schedule_execution(dummy_tx.clone(), index) .unwrap(); @@ -2716,7 +3376,6 @@ mod tests { transaction: RuntimeTransaction, index: usize, ) -> ScheduleResult { - let transaction_and_index = (transaction, index); let context = self.context().clone(); let pool = self.3.clone(); @@ -2728,12 +3387,15 @@ mod tests { let mut result = Ok(()); let mut timings = ExecuteTimings::default(); + let task = SchedulingStateMachine::create_task(transaction, index, &mut |_| { + UsageQueue::default() + }); + ::handle( &mut result, &mut timings, - context.bank(), - &transaction_and_index.0, - transaction_and_index.1, + &context, + &task, &pool.handler_context, ); (result, timings) @@ -2776,6 +3438,24 @@ mod tests { } } + impl SchedulerInner for AsyncScheduler { + fn id(&self) -> SchedulerId { + 42 + } + + fn is_overgrown(&self) -> bool { + todo!() + } + + fn reset(&self) { + todo!() + } + + fn ensure_abort(&mut self) { + todo!() + } + } + impl SpawnableScheduler for AsyncScheduler { @@ -2795,9 +3475,11 @@ mod tests { } fn spawn( + _handler_count: usize, pool: Arc>, context: SchedulingContext, _result_with_timings: ResultWithTimings, + _banking_stage_context: Option, ) -> Self { AsyncScheduler::( Mutex::new(initialized_result_with_timings()), @@ -2837,18 +3519,18 @@ mod tests { ); } let (bank, _bank_forks) = setup_dummy_fork_graph(bank); - let context = SchedulingContext::new(bank.clone()); + let context = SchedulingContext::for_verification(bank.clone()); let ignored_prioritization_fee_cache = Arc::new(PrioritizationFeeCache::new(0u64)); let pool = - SchedulerPool::, DefaultTaskHandler>::new_dyn( + SchedulerPool::, DefaultTaskHandler>::new_dyn_for_verification( None, None, None, None, ignored_prioritization_fee_cache, ); - let scheduler = pool.take_scheduler(context); + let scheduler = pool.take_scheduler(context).unwrap(); let bank = BankWithScheduler::new(bank, Some(scheduler)); assert_eq!(bank.transaction_count(), 0); @@ -2923,14 +3605,17 @@ mod tests { let result = &mut Ok(()); let timings = &mut ExecuteTimings::default(); let prioritization_fee_cache = Arc::new(PrioritizationFeeCache::new(0u64)); + let scheduling_context = &SchedulingContext::for_verification(bank.clone()); let handler_context = &HandlerContext { log_messages_bytes_limit: None, transaction_status_sender: None, replay_vote_sender: None, prioritization_fee_cache, + transaction_recorder: TransactionRecorder::new_dummy(), }; - DefaultTaskHandler::handle(result, timings, bank, &tx, 0, handler_context); + let task = SchedulingStateMachine::create_task(tx, 0, &mut |_| UsageQueue::default()); + DefaultTaskHandler::handle(result, timings, scheduling_context, &task, handler_context); assert_matches!(result, Err(TransactionError::AccountLoadedTwice)); } } diff --git a/validator/src/cli.rs b/validator/src/cli.rs index 19a2b7f77e7b0a..4669dcf52d7a24 100644 --- a/validator/src/cli.rs +++ b/validator/src/cli.rs @@ -1595,6 +1595,15 @@ pub fn app<'a>(version: &'a str, default_args: &'a DefaultArgs) -> App<'a, 'a> { .validator(|s| is_within_range(s, 1..)) .help(DefaultSchedulerPool::cli_message()), ) + .arg( + Arg::with_name("enable_experimental_block_production_method") + .long("enable-experimental-block-production-method") + .takes_value(false) + .help( + "Accept unified-scheduler to be used as an experimental block \ + production method", + ), + ) .arg( Arg::with_name("wen_restart") .long("wen-restart") diff --git a/validator/src/main.rs b/validator/src/main.rs index bee65e487ba92a..3c9067c14b5fee 100644 --- a/validator/src/main.rs +++ b/validator/src/main.rs @@ -1845,6 +1845,17 @@ pub fn main() { "block_production_method", BlockProductionMethod ) + .inspect(|method| { + if matches!(method, BlockProductionMethod::UnifiedScheduler) + && !matches.is_present("enable_experimental_block_production_method") + { + eprintln!( + "Currently, the unified-scheduler method is experimental for block-production. \ + Explicitly pass --enable-experimental-block-production-method to use it." + ); + exit(1); + } + }) .unwrap_or_default(); validator_config.enable_block_production_forwarding = staked_nodes_overrides_path.is_some(); validator_config.unified_scheduler_handler_threads =