diff --git a/Cargo.lock b/Cargo.lock index ce16dc9..269a491 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7136,6 +7136,7 @@ dependencies = [ "rustc_version", "rustls", "solana-core", + "solana-ledger", "solana-logger", "solana-sdk", "strum 0.26.2", diff --git a/Cargo.toml b/Cargo.toml index c98d591..7137d4c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,6 +20,7 @@ rand = "0.8.5" reqwest = { version = "0.11.23", features = ["blocking", "brotli", "deflate", "gzip", "rustls-tls", "json"] } rustls = { version = "0.21.10", default-features = false, features = ["quic"] } solana-core = "1.18.8" +solana-ledger = "1.18.8" solana-logger = "1.18.8" solana-sdk = "1.18.8" strum = "0.26.2" diff --git a/PROGRESS.md b/PROGRESS.md index 45820d4..bad7044 100644 --- a/PROGRESS.md +++ b/PROGRESS.md @@ -15,33 +15,33 @@ - [x] Create Genesis - [x] Generate faucet and bootstrap accounts - [x] Build genesis -- [ ] Docker Build - - [ ] Build Bootstrap Image - - [ ] Push Image to registry +- [x] Docker Build + - [x] Build Bootstrap Image + - [x] Push Image to registry - [ ] Create & Deploy Secrets - - [ ] Bootstrap + - [x] Bootstrap - [ ] Validator (regular) - [ ] RPC nodes - [ ] Client - [ ] Create & Deploy Selector - - [ ] Bootstrap + - [x] Bootstrap - [ ] Validator (regular) - [ ] RPC nodes - [ ] Client - [ ] Create & Deploy Replica Set - - [ ] Bootstrap + - [x] Bootstrap - [ ] Validator (regular) - [ ] RPC nodes - [ ] Client - [ ] Create & Deploy Services - - [ ] Bootstrap + - [x] Bootstrap - [ ] Validator (regular) - [ ] RPC nodes - [ ] Client -- [ ] Check Bootstrap is deployed and running -- [ ] Build and deploy Load Balancer (sits in front of bootstrap and RPC nodes) +- [x] Check Bootstrap is deployed and running +- [x] Build and deploy Load Balancer (sits in front of bootstrap and RPC nodes) - [ ] Add metrics - - [ ] Bootstrap + - [x] Bootstrap - [ ] Validator (regular) - [ ] RPC nodes - [ ] Client @@ -69,7 +69,7 @@ Above, we start with bootstrap, and then we do validators (regular), and then we - Use command line flags to set type of client, tx-count, etc - [ ] Add in kubernetes deployment flags - - [ ] CPU/Memory Requests + - [x] CPU/Memory Requests - [ ] Node Affinity -> Regions - [ ] Node Affinity -> Node Type (Equinix/Lumen) diff --git a/README.md b/README.md index 2c0d47d..7b5c818 100644 --- a/README.md +++ b/README.md @@ -36,7 +36,7 @@ cargo run --bin cluster -- --release-channel # note: MUST include the "v" ``` -#### Build from Local Repo and Configure Genesis +#### Build from Local Repo and Configure Genesis and Bootstrap Validator Image Example: ``` cargo run --bin cluster -- @@ -51,4 +51,57 @@ cargo run --bin cluster -- --max-genesis-archive-unpacked-size --target-lamports-per-signature --slots-per-epoch + # docker config + --registry # e.g. gregcusack + --tag # e.g. v1 + --base-image # e.g. ubuntu:20.04 + --image-name # e.g. cluster-image +``` + +## Metrics +1) Setup metrics database: +``` +cd scripts/ +./init-metrics -c +# enter password when promted +``` +2) add the following to your `cluster` command from above +``` +--metrics-host https://internal-metrics.solana.com # need the `https://` here +--metrics-port 8086 +--metrics-db # from (1) +--metrics-username # from (1) +--metrics-password # from (1) +``` + + +## Kubernetes Cheatsheet +Create namespace: +``` +kubectl create ns +``` + +Delete namespace: +``` +kubectl delete ns +``` + +Get running pods: +``` +kubectl get pods -n +``` + +Get pod logs: +``` +kubectl logs -n +``` + +Exec into pod: +``` +kubectl exec -it -n -- /bin/bash +``` + +Get information about pod: +``` +kubectl describe pod -n ``` \ No newline at end of file diff --git a/scripts/init-metrics.sh b/scripts/init-metrics.sh new file mode 100755 index 0000000..89eae4f --- /dev/null +++ b/scripts/init-metrics.sh @@ -0,0 +1,87 @@ +#!/usr/bin/env bash +set -e + +here=$(dirname "$0") +# shellcheck source=net/common.sh +source "$here"/common.sh + +usage() { + exitcode=0 + if [[ -n "$1" ]]; then + exitcode=1 + echo "Error: $*" + fi + cat < Self { + DockerImage { + registry, + validator_type, + image_name, + tag, + } + } + + pub fn validator_type(&self) -> ValidatorType { + self.validator_type + } +} + +// Put DockerImage in format for building, pushing, and pulling +impl Display for DockerImage { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!( + f, + "{}/{}-{}:{}", + self.registry, self.validator_type, self.image_name, self.tag + ) + } +} + +pub struct DockerConfig { + pub base_image: String, + deploy_method: DeployMethod, +} + +impl DockerConfig { + pub fn new(base_image: String, deploy_method: DeployMethod) -> Self { + DockerConfig { + base_image, + deploy_method, + } + } + + pub fn build_image( + &self, + solana_root_path: &Path, + docker_image: &DockerImage, + ) -> Result<(), Box> { + let validator_type = docker_image.validator_type(); + match validator_type { + ValidatorType::Bootstrap => (), + ValidatorType::Standard | ValidatorType::RPC | ValidatorType::Client => { + return Err(format!( + "Build docker image for validator type: {validator_type} not supported yet" + ) + .into()); + } + } + + let docker_path = solana_root_path.join(format!("docker-build/{validator_type}")); + self.create_base_image( + solana_root_path, + docker_image, + &docker_path, + &validator_type, + )?; + + Ok(()) + } + + fn create_base_image( + &self, + solana_root_path: &Path, + docker_image: &DockerImage, + docker_path: &PathBuf, + validator_type: &ValidatorType, + ) -> Result<(), Box> { + self.create_dockerfile(validator_type, docker_path, None)?; + + // We use std::process::Command here because Docker-rs is very slow building dockerfiles + // when they are in large repos. Docker-rs doesn't seem to support the `--file` flag natively. + // so we result to using std::process::Command + let dockerfile = docker_path.join("Dockerfile"); + let context_path = solana_root_path.display().to_string(); + + let progress_bar = new_spinner_progress_bar(); + progress_bar.set_message(format!("{BUILD}Building {validator_type} docker image...",)); + + let command = format!("docker build -t {docker_image} -f {dockerfile:?} {context_path}"); + + let output = match Command::new("sh") + .arg("-c") + .arg(&command) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .spawn() + .expect("Failed to execute command") + .wait_with_output() + { + Ok(res) => Ok(res), + Err(err) => Err(Box::new(err) as Box), + }?; + + if !output.status.success() { + return Err(output.status.to_string().into()); + } + progress_bar.finish_and_clear(); + info!("{validator_type} image build complete"); + + Ok(()) + } + + fn copy_file_to_docker( + source_dir: &Path, + docker_dir: &Path, + file_name: &str, + ) -> std::io::Result<()> { + let source_path = source_dir.join("src/scripts").join(file_name); + let destination_path = docker_dir.join(file_name); + fs::copy(source_path, destination_path)?; + Ok(()) + } + + fn create_dockerfile( + &self, + validator_type: &ValidatorType, + docker_path: &PathBuf, + content: Option<&str>, + ) -> Result<(), Box> { + if docker_path.exists() { + fs::remove_dir_all(docker_path)?; + } + fs::create_dir_all(docker_path)?; + + if let DeployMethod::Local(_) = self.deploy_method { + if validator_type == &ValidatorType::Bootstrap { + let manifest_path = + PathBuf::from(env::var("CARGO_MANIFEST_DIR").expect("$CARGO_MANIFEST_DIR")); + let files_to_copy = ["bootstrap-startup-script.sh", "common.sh"]; + for file_name in files_to_copy.iter() { + Self::copy_file_to_docker(&manifest_path, docker_path, file_name)?; + } + } + } + + let (solana_build_directory, startup_script_directory) = + if let DeployMethod::ReleaseChannel(_) = self.deploy_method { + ("solana-release", "./src/scripts".to_string()) + } else { + ("farf", format!("./docker-build/{validator_type}")) + }; + + let dockerfile = format!( + r#" +FROM {} +RUN apt-get update +RUN apt-get install -y iputils-ping curl vim bzip2 + +RUN useradd -ms /bin/bash solana +RUN adduser solana sudo +USER solana + +RUN mkdir -p /home/solana/k8s-cluster-scripts +# TODO: this needs to be changed for non bootstrap, this should be ./src/scripts/-startup-scripts.sh +COPY {startup_script_directory} /home/solana/k8s-cluster-scripts + +RUN mkdir -p /home/solana/ledger +COPY --chown=solana:solana ./config-k8s/bootstrap-validator /home/solana/ledger + +RUN mkdir -p /home/solana/.cargo/bin + +COPY ./{solana_build_directory}/bin/ /home/solana/.cargo/bin/ +COPY ./{solana_build_directory}/version.yml /home/solana/ + +RUN mkdir -p /home/solana/config +ENV PATH="/home/solana/.cargo/bin:${{PATH}}" + +WORKDIR /home/solana + +"#, + self.base_image + ); + + debug!("dockerfile: {dockerfile:?}"); + std::fs::write( + docker_path.join("Dockerfile"), + content.unwrap_or(dockerfile.as_str()), + )?; + Ok(()) + } + + pub fn push_image(docker_image: &DockerImage) -> Result<(), Box> { + let progress_bar = new_spinner_progress_bar(); + progress_bar.set_message(format!( + "{ROCKET}Pushing {} image to registry...", + docker_image.validator_type() + )); + let command = format!("docker push '{}'", docker_image); + let output = match Command::new("sh") + .arg("-c") + .arg(&command) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .spawn() + .expect("Failed to execute command") + .wait_with_output() + { + Ok(res) => Ok(res), + Err(err) => Err(Box::new(err) as Box), + }?; + + if !output.status.success() { + return Err(output.status.to_string().into()); + } + progress_bar.finish_and_clear(); + Ok(()) + } +} diff --git a/src/genesis.rs b/src/genesis.rs index 129db00..d6bb7a7 100644 --- a/src/genesis.rs +++ b/src/genesis.rs @@ -117,8 +117,7 @@ pub struct Genesis { } impl Genesis { - pub fn new(solana_root: &Path, flags: GenesisFlags) -> Self { - let config_dir = solana_root.join("config-k8s"); + pub fn new(config_dir: PathBuf, flags: GenesisFlags) -> Self { if config_dir.exists() { std::fs::remove_dir_all(&config_dir).unwrap(); } @@ -250,7 +249,7 @@ impl Genesis { .for_each(|account_type| { args.push( self.config_dir - .join(format!("bootstrap-validator/{}.json", account_type)) + .join(format!("bootstrap-validator/{account_type}.json")) .to_string_lossy() .to_string(), ); @@ -269,22 +268,19 @@ impl Genesis { args } - pub fn setup_spl_args( - &self, - solana_root_path: &PathBuf, - ) -> Result, Box> { + pub fn setup_spl_args(&self, solana_root_path: &Path) -> Result, Box> { let fetch_spl_file = solana_root_path.join("fetch-spl.sh"); fetch_spl(&fetch_spl_file)?; - // add in spl stuff + // add in spl let spl_file = solana_root_path.join("spl-genesis-args.sh"); parse_spl_genesis_file(&spl_file) } pub fn generate( &mut self, - solana_root_path: &PathBuf, - build_path: &PathBuf, + solana_root_path: &Path, + build_path: &Path, ) -> Result<(), Box> { let mut args = self.setup_genesis_flags(); let mut spl_args = self.setup_spl_args(solana_root_path)?; diff --git a/src/k8s_helpers.rs b/src/k8s_helpers.rs new file mode 100644 index 0000000..d1289a6 --- /dev/null +++ b/src/k8s_helpers.rs @@ -0,0 +1,159 @@ +use { + crate::{docker::DockerImage, ValidatorType}, + k8s_openapi::{ + api::{ + apps::v1::{ReplicaSet, ReplicaSetSpec}, + core::v1::{ + Container, EnvVar, PodSecurityContext, PodSpec, PodTemplateSpec, Probe, + ResourceRequirements, Secret, Service, ServicePort, ServiceSpec, Volume, + VolumeMount, + }, + }, + apimachinery::pkg::{api::resource::Quantity, apis::meta::v1::LabelSelector}, + ByteString, + }, + kube::api::ObjectMeta, + std::{collections::BTreeMap, error::Error, path::PathBuf}, +}; + +pub fn create_secret(name: &str, data: BTreeMap) -> Secret { + Secret { + metadata: ObjectMeta { + name: Some(name.to_string()), + ..Default::default() + }, + data: Some(data), + ..Default::default() + } +} + +pub fn create_secret_from_files( + secret_name: &str, + key_files: &[(PathBuf, &str)], //[pathbuf, key type] +) -> Result> { + let mut data = BTreeMap::new(); + for (file_path, key_type) in key_files { + let file_content = std::fs::read(file_path) + .map_err(|err| format!("Failed to read file '{:?}': {}", file_path, err))?; + data.insert(format!("{}.json", key_type), ByteString(file_content)); + } + + Ok(create_secret(secret_name, data)) +} + +#[allow(clippy::too_many_arguments)] +pub fn create_replica_set( + name: &ValidatorType, + namespace: &str, + label_selector: &BTreeMap, + image_name: &DockerImage, + environment_variables: Vec, + command: &[String], + volumes: Option>, + volume_mounts: Option>, + readiness_probe: Option, + pod_requests: BTreeMap, +) -> Result> { + let pod_spec = PodTemplateSpec { + metadata: Some(ObjectMeta { + labels: Some(label_selector.clone()), + ..Default::default() + }), + spec: Some(PodSpec { + containers: vec![Container { + name: format!("{}-{}", image_name.validator_type(), "container"), + image: Some(image_name.to_string()), + image_pull_policy: Some("Always".to_string()), + env: Some(environment_variables), + command: Some(command.to_owned()), + volume_mounts, + readiness_probe, + resources: Some(ResourceRequirements { + requests: Some(pod_requests), + ..Default::default() + }), + ..Default::default() + }], + volumes, + security_context: Some(PodSecurityContext { + run_as_user: Some(1000), + run_as_group: Some(1000), + ..Default::default() + }), + ..Default::default() + }), + }; + + let replicas_set_spec = ReplicaSetSpec { + replicas: Some(1), + selector: LabelSelector { + match_labels: Some(label_selector.clone()), + ..Default::default() + }, + template: Some(pod_spec), + ..Default::default() + }; + + Ok(ReplicaSet { + metadata: ObjectMeta { + name: Some(format!("{}-replicaset", name)), + namespace: Some(namespace.to_string()), + ..Default::default() + }, + spec: Some(replicas_set_spec), + ..Default::default() + }) +} + +pub fn create_service( + service_name: &str, + namespace: &str, + label_selector: &BTreeMap, + is_load_balancer: bool, +) -> Service { + Service { + metadata: ObjectMeta { + name: Some(service_name.to_string()), + namespace: Some(namespace.to_string()), + ..Default::default() + }, + spec: Some(ServiceSpec { + selector: Some(label_selector.clone()), + type_: if is_load_balancer { + Some("LoadBalancer".to_string()) + } else { + None + }, + cluster_ip: if is_load_balancer { + None + } else { + Some("None".to_string()) + }, + ports: Some(vec![ + ServicePort { + port: 8899, // RPC Port + name: Some("rpc-port".to_string()), + ..Default::default() + }, + ServicePort { + port: 8001, //Gossip Port + name: Some("gossip-port".to_string()), + ..Default::default() + }, + ServicePort { + port: 9900, //Faucet Port + name: Some("faucet-port".to_string()), + ..Default::default() + }, + ]), + ..Default::default() + }), + ..Default::default() + } +} + +pub fn create_selector(key: &str, value: &str) -> BTreeMap { + let mut btree = BTreeMap::new(); + btree.insert(key.to_string(), value.to_string()); + btree +} diff --git a/src/kubernetes.rs b/src/kubernetes.rs index 869e901..5a0c3c2 100644 --- a/src/kubernetes.rs +++ b/src/kubernetes.rs @@ -1,21 +1,66 @@ use { - k8s_openapi::api::core::v1::Namespace, + crate::{ + docker::DockerImage, k8s_helpers, validator_config::ValidatorConfig, Metrics, ValidatorType, + }, + k8s_openapi::{ + api::{ + apps::v1::ReplicaSet, + core::v1::{ + EnvVar, EnvVarSource, Namespace, ObjectFieldSelector, Secret, SecretKeySelector, + SecretVolumeSource, Service, Volume, VolumeMount, + }, + }, + apimachinery::pkg::api::resource::Quantity, + ByteString, + }, kube::{ - api::{Api, ListParams}, + api::{Api, ListParams, PostParams}, Client, }, + log::*, + solana_sdk::{pubkey::Pubkey, signature::keypair::read_keypair_file, signer::Signer}, + std::{collections::BTreeMap, error::Error, path::Path}, }; -pub struct Kubernetes { +#[derive(Debug, Clone)] +pub struct PodRequests { + requests: BTreeMap, +} + +impl PodRequests { + pub fn new(cpu_requests: String, memory_requests: String) -> PodRequests { + PodRequests { + requests: vec![ + ("cpu".to_string(), Quantity(cpu_requests)), + ("memory".to_string(), Quantity(memory_requests)), + ] + .into_iter() + .collect(), + } + } +} + +pub struct Kubernetes<'a> { k8s_client: Client, namespace: String, + validator_config: &'a mut ValidatorConfig, + pod_requests: PodRequests, + pub metrics: Option, } -impl Kubernetes { - pub async fn new(namespace: &str) -> Kubernetes { +impl<'a> Kubernetes<'a> { + pub async fn new( + namespace: &str, + validator_config: &'a mut ValidatorConfig, + pod_requests: PodRequests, + metrics: Option, + ) -> Kubernetes<'a> { Self { k8s_client: Client::try_default().await.unwrap(), namespace: namespace.to_owned(), + validator_config, + pod_requests, + metrics, } } @@ -30,4 +75,229 @@ impl Kubernetes { Ok(exists) } + + pub fn create_bootstrap_secret( + &mut self, + secret_name: &str, + config_dir: &Path, + ) -> Result> { + let faucet_key_path = config_dir.join("faucet.json"); + let identity_key_path = config_dir.join("bootstrap-validator/identity.json"); + let vote_key_path = config_dir.join("bootstrap-validator/vote-account.json"); + let stake_key_path = config_dir.join("bootstrap-validator/stake-account.json"); + + let bootstrap_keypair = read_keypair_file(identity_key_path.clone()) + .expect("Failed to read bootstrap validator keypair file"); + + //TODO: need to fix and not read the json path twice + self.add_known_validator(bootstrap_keypair.pubkey()); + + let key_files = vec![ + (faucet_key_path, "faucet"), + (identity_key_path, "identity"), + (vote_key_path, "vote"), + (stake_key_path, "stake"), + ]; + + k8s_helpers::create_secret_from_files(secret_name, &key_files) + } + + fn add_known_validator(&mut self, pubkey: Pubkey) { + if let Some(ref mut known_validators) = self.validator_config.known_validators { + known_validators.push(pubkey); + } else { + let new_known_validators = vec![pubkey]; + self.validator_config.known_validators = Some(new_known_validators); + } + + info!("pubkey added to known validators: {:?}", pubkey); + } + + pub async fn deploy_secret(&self, secret: &Secret) -> Result { + let secrets_api: Api = + Api::namespaced(self.k8s_client.clone(), self.namespace.as_str()); + secrets_api.create(&PostParams::default(), secret).await + } + + pub fn create_bootstrap_validator_replica_set( + &mut self, + image_name: &DockerImage, + secret_name: Option, + label_selector: &BTreeMap, + ) -> Result> { + let mut env_vars = vec![EnvVar { + name: "MY_POD_IP".to_string(), + value_from: Some(EnvVarSource { + field_ref: Some(ObjectFieldSelector { + field_path: "status.podIP".to_string(), + ..Default::default() + }), + ..Default::default() + }), + ..Default::default() + }]; + + if self.metrics.is_some() { + env_vars.push(self.get_metrics_env_var_secret()) + } + + let accounts_volume = Some(vec![Volume { + name: "bootstrap-accounts-volume".into(), + secret: Some(SecretVolumeSource { + secret_name, + ..Default::default() + }), + ..Default::default() + }]); + + let accounts_volume_mount = Some(vec![VolumeMount { + name: "bootstrap-accounts-volume".to_string(), + mount_path: "/home/solana/bootstrap-accounts".to_string(), + ..Default::default() + }]); + + let mut command = + vec!["/home/solana/k8s-cluster-scripts/bootstrap-startup-script.sh".to_string()]; + command.extend(self.generate_bootstrap_command_flags()); + + k8s_helpers::create_replica_set( + &ValidatorType::Bootstrap, + self.namespace.as_str(), + label_selector, + image_name, + env_vars, + &command, + accounts_volume, + accounts_volume_mount, + None, + self.pod_requests.requests.clone(), + ) + } + + fn generate_command_flags(&self, flags: &mut Vec) { + if self.validator_config.tpu_enable_udp { + flags.push("--tpu-enable-udp".to_string()); + } + if self.validator_config.tpu_disable_quic { + flags.push("--tpu-disable-quic".to_string()); + } + if self.validator_config.skip_poh_verify { + flags.push("--skip-poh-verify".to_string()); + } + if self.validator_config.no_snapshot_fetch { + flags.push("--no-snapshot-fetch".to_string()); + } + if self.validator_config.require_tower { + flags.push("--require-tower".to_string()); + } + if self.validator_config.enable_full_rpc { + flags.push("--enable-rpc-transaction-history".to_string()); + flags.push("--enable-extended-tx-metadata-storage".to_string()); + } + + if let Some(limit_ledger_size) = self.validator_config.max_ledger_size { + flags.push("--limit-ledger-size".to_string()); + flags.push(limit_ledger_size.to_string()); + } + } + + fn generate_bootstrap_command_flags(&self) -> Vec { + let mut flags: Vec = Vec::new(); + self.generate_command_flags(&mut flags); + + flags + } + + pub async fn deploy_replicas_set( + &self, + replica_set: &ReplicaSet, + ) -> Result { + let api: Api = + Api::namespaced(self.k8s_client.clone(), self.namespace.as_str()); + let post_params = PostParams::default(); + // Apply the ReplicaSet + api.create(&post_params, replica_set).await + } + + pub fn create_bootstrap_service( + &self, + service_name: &str, + label_selector: &BTreeMap, + ) -> Service { + k8s_helpers::create_service(service_name, self.namespace.as_str(), label_selector, false) + } + + pub async fn deploy_service(&self, service: &Service) -> Result { + let post_params = PostParams::default(); + // Create an API instance for Services in the specified namespace + let service_api: Api = + Api::namespaced(self.k8s_client.clone(), self.namespace.as_str()); + + // Create the Service object in the cluster + service_api.create(&post_params, service).await + } + + pub fn create_validator_load_balancer( + &self, + service_name: &str, + label_selector: &BTreeMap, + ) -> Service { + k8s_helpers::create_service(service_name, self.namespace.as_str(), label_selector, true) + } + + pub fn create_selector(&self, key: &str, value: &str) -> BTreeMap { + k8s_helpers::create_selector(key, value) + } + + pub async fn check_replica_set_ready( + &self, + replica_set_name: &str, + ) -> Result { + let replica_sets: Api = + Api::namespaced(self.k8s_client.clone(), self.namespace.as_str()); + let replica_set = replica_sets.get(replica_set_name).await?; + + let desired_validators = replica_set.spec.as_ref().unwrap().replicas.unwrap_or(1); + let available_validators = replica_set + .status + .as_ref() + .unwrap() + .available_replicas + .unwrap_or(0); + + Ok(available_validators >= desired_validators) + } + + pub fn create_metrics_secret(&self) -> Result> { + let mut data = BTreeMap::new(); + if let Some(metrics) = &self.metrics { + data.insert( + "SOLANA_METRICS_CONFIG".to_string(), + ByteString(metrics.to_env_string().into_bytes()), + ); + } else { + return Err( + "Called create_metrics_secret() but metrics were not provided." + .to_string() + .into(), + ); + } + + Ok(k8s_helpers::create_secret("solana-metrics-secret", data)) + } + + pub fn get_metrics_env_var_secret(&self) -> EnvVar { + EnvVar { + name: "SOLANA_METRICS_CONFIG".to_string(), + value_from: Some(EnvVarSource { + secret_key_ref: Some(SecretKeySelector { + name: Some("solana-metrics-secret".to_string()), + key: "SOLANA_METRICS_CONFIG".to_string(), + ..Default::default() + }), + ..Default::default() + }), + ..Default::default() + } + } } diff --git a/src/lib.rs b/src/lib.rs index 67cfe23..79e3c4e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -54,12 +54,62 @@ pub enum ValidatorType { Client, } +// impl std::fmt::Display for ValidatorType { +// fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { +// match *self { +// ValidatorType::Bootstrap => write!(f, "bootstrap-validator"), +// ValidatorType::Standard => write!(f, "validator"), +// ValidatorType::RPC => write!(f, "rpc-node"), +// ValidatorType::Client => write!(f, "client"), +// } +// } +// } + +#[derive(Clone, Debug, Default)] +pub struct Metrics { + pub host: String, + pub port: String, + pub database: String, + pub username: String, + password: String, +} + +impl Metrics { + pub fn new( + host: String, + port: String, + database: String, + username: String, + password: String, + ) -> Self { + Metrics { + host, + port, + database, + username, + password, + } + } + pub fn to_env_string(&self) -> String { + format!( + "host={}:{},db={},u={},p={}", + self.host, self.port, self.database, self.username, self.password + ) + } +} + +pub mod docker; pub mod genesis; +pub mod k8s_helpers; pub mod kubernetes; pub mod release; +pub mod validator; +pub mod validator_config; -static SUN: Emoji = Emoji("🌞 ", ""); +static BUILD: Emoji = Emoji("👷 ", ""); static PACKAGE: Emoji = Emoji("📦 ", ""); +static ROCKET: Emoji = Emoji("🚀 ", ""); +static SUN: Emoji = Emoji("🌞 ", ""); static TRUCK: Emoji = Emoji("🚚 ", ""); /// Creates a new process bar for processing that will take an unknown amount of time @@ -78,7 +128,7 @@ pub fn cat_file(path: &PathBuf) -> std::io::Result<()> { let mut file = File::open(path)?; let mut contents = String::new(); file.read_to_string(&mut contents)?; - info!("{:?}:\n{}", path.file_name(), contents); + info!("{:?}:\n{contents}", path.file_name()); Ok(()) } diff --git a/src/main.rs b/src/main.rs index 3319913..b7bfe20 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,13 +1,20 @@ use { - clap::{command, Arg, ArgGroup}, + clap::{command, value_t_or_exit, Arg, ArgGroup}, log::*, - std::fs, + solana_ledger::blockstore_cleanup_service::{ + DEFAULT_MAX_LEDGER_SHREDS, DEFAULT_MIN_MAX_LEDGER_SHREDS, + }, + solana_sdk::{signature::keypair::read_keypair_file, signer::Signer}, + std::{fs, thread, time::Duration}, strum::VariantNames, validator_lab::{ + docker::{DockerConfig, DockerImage}, genesis::{Genesis, GenesisFlags}, - kubernetes::Kubernetes, + kubernetes::{Kubernetes, PodRequests}, release::{BuildConfig, BuildType, DeployMethod}, - SolanaRoot, ValidatorType, + validator::{LabelType, Validator}, + validator_config::ValidatorConfig, + Metrics, SolanaRoot, ValidatorType, }, }; @@ -76,7 +83,7 @@ fn parse_matches() -> clap::ArgMatches { Arg::with_name("enable_warmup_epochs") .long("enable-warmup-epochs") .takes_value(true) - .possible_values(&["true", "false"]) + .possible_values(["true", "false"]) .default_value("true") .help("Genesis config. enable warmup epoch. defaults to true"), ) @@ -89,7 +96,7 @@ fn parse_matches() -> clap::ArgMatches { .arg( Arg::with_name("cluster_type") .long("cluster-type") - .possible_values(&["development", "devnet", "testnet", "mainnet-beta"]) + .possible_values(["development", "devnet", "testnet", "mainnet-beta"]) .takes_value(true) .default_value("development") .help( @@ -108,6 +115,138 @@ fn parse_matches() -> clap::ArgMatches { .takes_value(true) .help("Genesis config. bootstrap validator stake sol"), ) + //Docker config + .arg( + Arg::with_name("skip_docker_build") + .long("skip-docker-build") + .help("Skips build Docker images"), + ) + .arg( + Arg::with_name("registry_name") + .long("registry") + .takes_value(true) + .required(true) + .help("Registry to push docker image to"), + ) + .arg( + Arg::with_name("image_name") + .long("image-name") + .takes_value(true) + .default_value("k8s-cluster-image") + .help("Docker image name. Will be prepended with validator_type (bootstrap or validator)"), + ) + .arg( + Arg::with_name("base_image") + .long("base-image") + .takes_value(true) + .default_value("ubuntu:20.04") + .help("Docker base image"), + ) + .arg( + Arg::with_name("image_tag") + .long("tag") + .takes_value(true) + .default_value("latest") + .help("Docker image tag."), + ) + // Bootstrap/Validator Config + .arg( + Arg::with_name("tpu_enable_udp") + .long("tpu-enable-udp") + .help("Validator config. Enable UDP for tpu transactions."), + ) + .arg( + Arg::with_name("tpu_disable_quic") + .long("tpu-disable-quic") + .help("Validator config. Disable quic for tpu packet forwarding"), + ) + .arg( + Arg::with_name("limit_ledger_size") + .long("limit-ledger-size") + .takes_value(true) + .help("Validator Config. The `--limit-ledger-size` parameter allows you to specify how many ledger + shreds your node retains on disk. If you do not + include this parameter, the validator will keep the entire ledger until it runs + out of disk space. The default value attempts to keep the ledger disk usage + under 500GB. More or less disk usage may be requested by adding an argument to + `--limit-ledger-size` if desired. Check `agave-validator --help` for the + default limit value used by `--limit-ledger-size`. More information about + selecting a custom limit value is at : https://github.com/solana-labs/solana/blob/583cec922b6107e0f85c7e14cb5e642bc7dfb340/core/src/ledger_cleanup_service.rs#L15-L26"), + ) + .arg( + Arg::with_name("skip_poh_verify") + .long("skip-poh-verify") + .help("Validator config. If set, validators will skip verifying + the ledger they already have saved to disk at + boot (results in a much faster boot)"), + ) + .arg( + Arg::with_name("no_snapshot_fetch") + .long("no-snapshot-fetch") + .help("Validator config. If set, disables booting validators from a snapshot"), + ) + .arg( + Arg::with_name("require_tower") + .long("require-tower") + .help("Validator config. Refuse to start if saved tower state is not found. + Off by default since validator won't restart if the pod restarts"), + ) + .arg( + Arg::with_name("enable_full_rpc") + .long("full-rpc") + .help("Validator config. Support full RPC services on all nodes"), + ) + // kubernetes config + .arg( + Arg::with_name("cpu_requests") + .long("cpu-requests") + .takes_value(true) + .default_value("20") // 20 cores + .help("Kubernetes pod config. Specify minimum CPUs required for deploying validator. + can use millicore notation as well. e.g. 500m (500 millicores) == 0.5 and is equivalent to half a core. + [default: 20]"), + ) + .arg( + Arg::with_name("memory_requests") + .long("memory-requests") + .takes_value(true) + .default_value("70Gi") // 70 Gigabytes + .help("Kubernetes pod config. Specify minimum memory required for deploying validator. + Can specify unit here (B, Ki, Mi, Gi, Ti) for bytes, kilobytes, etc (2^N notation) + e.g. 1Gi == 1024Mi == 1024Ki == 1,047,576B. [default: 70Gi]"), + ) + //Metrics Config + .arg( + Arg::with_name("metrics_host") + .long("metrics-host") + .takes_value(true) + .requires_all(&["metrics_port", "metrics_db", "metrics_username", "metrics_password"]) + .help("Metrics Config. Optional: specify metrics host. e.g. https://internal-metrics.solana.com"), + ) + .arg( + Arg::with_name("metrics_port") + .long("metrics-port") + .takes_value(true) + .help("Metrics Config. Optional: specify metrics port. e.g. 8086"), + ) + .arg( + Arg::with_name("metrics_db") + .long("metrics-db") + .takes_value(true) + .help("Metrics Config. Optional: specify metrics database. e.g. k8s-cluster-"), + ) + .arg( + Arg::with_name("metrics_username") + .long("metrics-username") + .takes_value(true) + .help("Metrics Config. Optional: specify metrics username"), + ) + .arg( + Arg::with_name("metrics_password") + .long("metrics-password") + .takes_value(true) + .help("Metrics Config. Optional: Specify metrics password"), + ) .get_matches() } @@ -164,23 +303,12 @@ async fn main() { ); } - let kub_controller = Kubernetes::new(environment_config.namespace).await; - match kub_controller.namespace_exists().await { - Ok(true) => (), - Ok(false) => { - error!( - "Namespace: '{}' doesn't exist. Exiting...", - environment_config.namespace - ); - return; - } - Err(err) => { - error!("Error: {err}"); - return; - } - } - - let build_config = BuildConfig::new(deploy_method, build_type, solana_root.get_root_path()); + let build_config = BuildConfig::new( + deploy_method.clone(), + build_type, + solana_root.get_root_path(), + !matches.is_present("skip_docker_build"), + ); let genesis_flags = GenesisFlags { hashes_per_tick: matches @@ -232,17 +360,82 @@ async fn main() { ), }; + let mut validator_config = ValidatorConfig { + tpu_enable_udp: matches.is_present("tpu_enable_udp"), + tpu_disable_quic: matches.is_present("tpu_disable_quic"), + max_ledger_size: if matches.is_present("limit_ledger_size") { + let limit_ledger_size = match matches.value_of("limit_ledger_size") { + Some(_) => value_t_or_exit!(matches, "limit_ledger_size", u64), + None => DEFAULT_MAX_LEDGER_SHREDS, + }; + if limit_ledger_size < DEFAULT_MIN_MAX_LEDGER_SHREDS { + error!( + "The provided --limit-ledger-size value was too small, the minimum value is {DEFAULT_MIN_MAX_LEDGER_SHREDS}" + ); + return; + } + Some(limit_ledger_size) + } else { + None + }, + skip_poh_verify: matches.is_present("skip_poh_verify"), + no_snapshot_fetch: matches.is_present("no_snapshot_fetch"), + require_tower: matches.is_present("require_tower"), + enable_full_rpc: matches.is_present("enable_full_rpc"), + known_validators: None, + }; + + let pod_requests = PodRequests::new( + matches.value_of("cpu_requests").unwrap().to_string(), + matches.value_of("memory_requests").unwrap().to_string(), + ); + + let metrics = matches.value_of("metrics_host").map(|host| { + Metrics::new( + host.to_string(), + matches.value_of("metrics_port").unwrap().to_string(), + matches.value_of("metrics_db").unwrap().to_string(), + matches.value_of("metrics_username").unwrap().to_string(), + matches.value_of("metrics_password").unwrap().to_string(), + ) + }); + + let mut kub_controller = Kubernetes::new( + environment_config.namespace, + &mut validator_config, + pod_requests, + metrics, + ) + .await; + + match kub_controller.namespace_exists().await { + Ok(true) => (), + Ok(false) => { + error!( + "Namespace: '{}' doesn't exist. Exiting...", + environment_config.namespace + ); + return; + } + Err(err) => { + error!("Error: {err}"); + return; + } + } + match build_config.prepare().await { Ok(_) => info!("Validator setup prepared successfully"), Err(err) => { - error!("Error: {}", err); + error!("Error: {err}"); return; } } - let mut genesis = Genesis::new(solana_root.get_root_path(), genesis_flags); + let config_directory = solana_root.get_root_path().join("config-k8s"); + let mut genesis = Genesis::new(config_directory.clone(), genesis_flags); + match genesis.generate_faucet() { - Ok(_) => (), + Ok(_) => info!("Generated faucet account"), Err(err) => { error!("generate faucet error! {err}"); return; @@ -250,7 +443,7 @@ async fn main() { } match genesis.generate_accounts(ValidatorType::Bootstrap, 1) { - Ok(_) => (), + Ok(_) => info!("Generated bootstrap account"), Err(err) => { error!("generate accounts error! {err}"); return; @@ -259,10 +452,198 @@ async fn main() { // creates genesis and writes to binary file match genesis.generate(solana_root.get_root_path(), &build_path) { - Ok(_) => (), + Ok(_) => info!("Created genesis successfully"), + Err(err) => { + error!("generate genesis error! {err}"); + return; + } + } + + //unwraps are safe here. since their requirement is enforced by argmatches + let docker = DockerConfig::new( + matches + .value_of("base_image") + .unwrap_or_default() + .to_string(), + deploy_method, + ); + + let registry_name = matches.value_of("registry_name").unwrap().to_string(); + let image_name = matches.value_of("image_name").unwrap().to_string(); + let image_tag = matches + .value_of("image_tag") + .unwrap_or_default() + .to_string(); + + let mut bootstrap_validator = Validator::new(DockerImage::new( + registry_name.clone(), + ValidatorType::Bootstrap, + image_name.clone(), + image_tag.clone(), + )); + + if build_config.docker_build() { + let validators = vec![&bootstrap_validator]; + for v in &validators { + match docker.build_image(solana_root.get_root_path(), v.image()) { + Ok(_) => info!("{} image built successfully", v.validator_type()), + Err(err) => { + error!("Failed to build docker image {err}"); + return; + } + } + + match DockerConfig::push_image(v.image()) { + Ok(_) => info!("{} image pushed successfully", v.validator_type()), + Err(err) => { + error!("Failed to push docker image {err}"); + return; + } + } + } + } + + // metrics secret create once and use by all pods + if kub_controller.metrics.is_some() { + let metrics_secret = match kub_controller.create_metrics_secret() { + Ok(secret) => secret, + Err(err) => { + error!("Failed to create metrics secret! {err}"); + return; + } + }; + match kub_controller.deploy_secret(&metrics_secret).await { + Ok(_) => (), + Err(err) => { + error!("{err}"); + return; + } + } + }; + + match kub_controller.create_bootstrap_secret("bootstrap-accounts-secret", &config_directory) { + Ok(secret) => bootstrap_validator.set_secret(secret), + Err(err) => { + error!("Failed to create bootstrap secret! {err}"); + return; + } + }; + + match kub_controller + .deploy_secret(bootstrap_validator.secret()) + .await + { + Ok(_) => info!("Deployed Bootstrap Secret"), + Err(err) => { + error!("{err}"); + return; + } + } + + // Create bootstrap labels + let identity_path = config_directory.join("bootstrap-validator/identity.json"); + let bootstrap_keypair = + read_keypair_file(identity_path).expect("Failed to read bootstrap keypair file"); + bootstrap_validator.add_label( + "load-balancer/name", + "load-balancer-selector", + LabelType::ValidatorReplicaSet, + ); + bootstrap_validator.add_label( + "service/name", + "bootstrap-validator-selector", + LabelType::ValidatorReplicaSet, + ); + bootstrap_validator.add_label( + "validator/type", + "bootstrap", + LabelType::ValidatorReplicaSet, + ); + bootstrap_validator.add_label( + "validator/identity", + bootstrap_keypair.pubkey().to_string(), + LabelType::ValidatorReplicaSet, + ); + + // create bootstrap replica set + match kub_controller.create_bootstrap_validator_replica_set( + bootstrap_validator.image(), + bootstrap_validator.secret().metadata.name.clone(), + bootstrap_validator.replica_set_labels(), + ) { + Ok(replica_set) => bootstrap_validator.set_replica_set(replica_set), + Err(err) => { + error!("Error creating bootstrap validator replicas_set: {err}"); + return; + } + }; + + match kub_controller + .deploy_replicas_set(bootstrap_validator.replica_set()) + .await + { + Ok(_) => { + info!( + "{} deployed successfully", + bootstrap_validator.replica_set_name() + ); + } Err(err) => { - error!("generate genesis error! {}", err); + error!("Error! Failed to deploy bootstrap validator replicas_set. err: {err}"); return; } + }; + + bootstrap_validator.add_label( + "service/name", + "bootstrap-validator-selector", + LabelType::ValidatorService, + ); + + let bootstrap_service = kub_controller.create_bootstrap_service( + "bootstrap-validator-service", + bootstrap_validator.service_labels(), + ); + match kub_controller.deploy_service(&bootstrap_service).await { + Ok(_) => info!("bootstrap validator service deployed successfully"), + Err(err) => error!( + "Error! Failed to deploy bootstrap validator service. err: {:?}", + err + ), + } + + //load balancer service. only create one and use for all deployments + let load_balancer_label = + kub_controller.create_selector("load-balancer/name", "load-balancer-selector"); + //create load balancer + let load_balancer = kub_controller.create_validator_load_balancer( + "bootstrap-and-non-voting-lb-service", + &load_balancer_label, + ); + + //deploy load balancer + match kub_controller.deploy_service(&load_balancer).await { + Ok(_) => info!("load balancer service deployed successfully"), + Err(err) => error!( + "Error! Failed to deploy load balancer service. err: {:?}", + err + ), + } + + // wait for bootstrap replicaset to deploy + while { + match kub_controller + .check_replica_set_ready(bootstrap_validator.replica_set_name().as_str()) + .await + { + Ok(ok) => !ok, // Continue the loop if replica set is not ready: Ok(false) + Err(_) => panic!("Error occurred while checking replica set readiness"), + } + } { + info!( + "replica set: {} not ready...", + bootstrap_validator.replica_set_name() + ); + thread::sleep(Duration::from_secs(1)); } } diff --git a/src/release.rs b/src/release.rs index d498bff..49fb08f 100644 --- a/src/release.rs +++ b/src/release.rs @@ -11,7 +11,7 @@ use { strum_macros::{EnumString, IntoStaticStr, VariantNames}, }; -#[derive(Debug, Clone)] +#[derive(Debug, PartialEq, Clone)] pub enum DeployMethod { Local(String), ReleaseChannel(String), @@ -29,6 +29,8 @@ pub struct BuildConfig { deploy_method: DeployMethod, build_type: BuildType, solana_root_path: PathBuf, + docker_build: bool, + build_path: PathBuf, } impl BuildConfig { @@ -36,14 +38,30 @@ impl BuildConfig { deploy_method: DeployMethod, build_type: BuildType, solana_root_path: &Path, + docker_build: bool, ) -> Self { - Self { + let build_path = match deploy_method { + DeployMethod::Local(_) => solana_root_path.join("farf/bin"), + DeployMethod::ReleaseChannel(_) => solana_root_path.join("solana-release/bin"), + }; + + BuildConfig { deploy_method, build_type, solana_root_path: solana_root_path.to_path_buf(), + docker_build, + build_path, } } + pub fn build_path(&self) -> PathBuf { + self.build_path.clone() + } + + pub fn docker_build(&self) -> bool { + self.docker_build + } + pub async fn prepare(&self) -> Result<(), Box> { match &self.deploy_method { DeployMethod::ReleaseChannel(channel) => match self.setup_tar_deploy(channel).await { @@ -64,7 +82,7 @@ impl BuildConfig { async fn setup_tar_deploy(&self, release_channel: &String) -> Result> { let file_name = "solana-release"; let tar_filename = format!("{file_name}.tar.bz2"); - info!("tar file: {}", tar_filename); + info!("tar file: {tar_filename}"); self.download_release_from_channel(&tar_filename, release_channel) .await?; @@ -128,14 +146,14 @@ impl BuildConfig { let tag_object = solana_repo.revparse_single(tag)?.id(); // Check if the commit associated with the tag is the same as the current commit if tag_object == commit { - info!("The current commit is associated with tag: {}", tag); + info!("The current commit is associated with tag: {tag}"); note = tag_object.to_string(); break; } } // Write to branch/tag and commit to version.yml - let content = format!("channel: devbuild {}\ncommit: {}", note, commit); + let content = format!("channel: devbuild {note}\ncommit: {commit}"); std::fs::write(self.solana_root_path.join("farf/version.yml"), content) .expect("Failed to write version.yml"); @@ -148,22 +166,19 @@ impl BuildConfig { tar_filename: &str, release_channel: &String, ) -> Result<(), Box> { - info!("Downloading release from channel: {}", release_channel); + info!("Downloading release from channel: {release_channel}"); let file_path = self.solana_root_path.join(tar_filename); // Remove file if let Err(err) = fs::remove_file(&file_path) { if err.kind() != std::io::ErrorKind::NotFound { - return Err(format!("{}: {:?}", "Error while removing file:", err).into()); + return Err(format!("{err}: {:?}", "Error while removing file:").into()); } } let download_url = format!( - "{}{}{}", - "https://release.solana.com/", - release_channel, - "/solana-release-x86_64-unknown-linux-gnu.tar.bz2" + "https://release.solana.com/{release_channel}/solana-release-x86_64-unknown-linux-gnu.tar.bz2" ); - info!("download_url: {}", download_url); + info!("download_url: {download_url}"); download_to_temp( download_url.as_str(), diff --git a/src/scripts/bootstrap-startup-script.sh b/src/scripts/bootstrap-startup-script.sh new file mode 100755 index 0000000..56e0a7e --- /dev/null +++ b/src/scripts/bootstrap-startup-script.sh @@ -0,0 +1,184 @@ +#!/bin/bash +set -e + +# start faucet +nohup solana-faucet --keypair bootstrap-accounts/faucet.json & + +# Start the bootstrap validator node +# shellcheck disable=SC1091 +source /home/solana/k8s-cluster-scripts/common.sh + +program="agave-validator" + +no_restart=0 + +echo "PROGRAM: $program" + +args=() +while [[ -n $1 ]]; do + if [[ ${1:0:1} = - ]]; then + if [[ $1 = --init-complete-file ]]; then + args+=("$1" "$2") + shift 2 + elif [[ $1 = --gossip-host ]]; then # set with env variables + args+=("$1" "$2") + shift 2 + elif [[ $1 = --gossip-port ]]; then # set with env variables + args+=("$1" "$2") + shift 2 + elif [[ $1 = --dev-halt-at-slot ]]; then # not enabled in net.sh + args+=("$1" "$2") + shift 2 + elif [[ $1 = --dynamic-port-range ]]; then # not enabled in net.sh + args+=("$1" "$2") + shift 2 + elif [[ $1 = --limit-ledger-size ]]; then + args+=("$1" "$2") + shift 2 + elif [[ $1 = --no-rocksdb-compaction ]]; then # not enabled in net.sh + args+=("$1") + shift + elif [[ $1 = --enable-rpc-transaction-history ]]; then # enabled through full-rpc + args+=("$1") + shift + elif [[ $1 = --rpc-pubsub-enable-block-subscription ]]; then # not enabled in net.sh + args+=("$1") + shift + elif [[ $1 = --enable-cpi-and-log-storage ]]; then # not enabled in net.sh + args+=("$1") + shift + elif [[ $1 = --enable-extended-tx-metadata-storage ]]; then # enabled through full-rpc + args+=("$1") + shift + elif [[ $1 = --enable-rpc-bigtable-ledger-storage ]]; then + args+=("$1") + shift + elif [[ $1 = --tpu-disable-quic ]]; then + args+=("$1") + shift + elif [[ $1 = --tpu-enable-udp ]]; then + args+=("$1") + shift + elif [[ $1 = --rpc-send-batch-ms ]]; then # not enabled in net.sh + args+=("$1" "$2") + shift 2 + elif [[ $1 = --rpc-send-batch-size ]]; then # not enabled in net.sh + args+=("$1" "$2") + shift 2 + elif [[ $1 = --skip-poh-verify ]]; then + args+=("$1") + shift + elif [[ $1 = --no-restart ]]; then # not enabled in net.sh + no_restart=1 + shift + elif [[ $1 == --wait-for-supermajority ]]; then + args+=("$1" "$2") + shift 2 + elif [[ $1 == --expected-bank-hash ]]; then + args+=("$1" "$2") + shift 2 + elif [[ $1 == --accounts ]]; then + args+=("$1" "$2") + shift 2 + elif [[ $1 == --maximum-snapshots-to-retain ]]; then # not enabled in net.sh + args+=("$1" "$2") + shift 2 + elif [[ $1 == --no-snapshot-fetch ]]; then + args+=("$1") + shift + elif [[ $1 == --accounts-db-skip-shrink ]]; then + args+=("$1") + shift + elif [[ $1 == --require-tower ]]; then + args+=("$1") + shift + elif [[ $1 = --log-messages-bytes-limit ]]; then # not enabled in net.sh + args+=("$1" "$2") + shift 2 + else + echo "Unknown argument: $1" + $program --help + exit 1 + fi + else + echo "Unknown argument: $1" + $program --help + exit 1 + fi +done + +# These keypairs are created by ./setup.sh and included in the genesis config +identity=bootstrap-accounts/identity.json +vote_account=bootstrap-accounts/vote.json + +ledger_dir=/home/solana/ledger +[[ -d "$ledger_dir" ]] || { + echo "$ledger_dir does not exist" + exit 1 +} + +args+=( + --no-os-network-limits-test \ + --no-wait-for-vote-to-start-leader \ + --snapshot-interval-slots 200 \ + --identity "$identity" \ + --vote-account "$vote_account" \ + --ledger ledger \ + --log - \ + --gossip-host "$MY_POD_IP" \ + --gossip-port 8001 \ + --rpc-port 8899 \ + --rpc-faucet-address "$MY_POD_IP":9900 \ + --no-poh-speed-test \ + --no-incremental-snapshots \ + --full-rpc-api \ + --allow-private-addr \ + --enable-rpc-transaction-history +) + +echo "Bootstrap Args" +for arg in "${args[@]}"; do + echo "$arg" +done + +pid= +kill_node() { + # Note: do not echo anything from this function to ensure $pid is actually + # killed when stdout/stderr are redirected + set +ex + if [[ -n $pid ]]; then + declare _pid=$pid + pid= + kill "$_pid" || true + wait "$_pid" || true + fi +} + +kill_node_and_exit() { + kill_node + exit +} + +trap 'kill_node_and_exit' INT TERM ERR + +while true; do + echo "$program ${args[*]}" + $program "${args[@]}" & + pid=$! + echo "pid: $pid" + + if ((no_restart)); then + wait "$pid" + exit $? + fi + + while true; do + if [[ -z $pid ]] || ! kill -0 "$pid"; then + echo "############## validator exited, restarting ##############" + break + fi + sleep 1 + done + + kill_node +done diff --git a/src/scripts/common.sh b/src/scripts/common.sh new file mode 100755 index 0000000..dd7a32d --- /dev/null +++ b/src/scripts/common.sh @@ -0,0 +1,123 @@ +# |source| this file +# +# Common utilities shared by other scripts in this directory +# +# The following directive disable complaints about unused variables in this +# file: +# shellcheck disable=2034 + +prebuild= +if [[ $1 = "--prebuild" ]]; then + prebuild=true +fi + +if [[ $(uname) != Linux ]]; then + # Protect against unsupported configurations to prevent non-obvious errors + # later. Arguably these should be fatal errors but for now prefer tolerance. + if [[ -n $SOLANA_CUDA ]]; then + echo "Warning: CUDA is not supported on $(uname)" + SOLANA_CUDA= + fi +fi + +if [[ -n $USE_INSTALL || ! -f "$SOLANA_ROOT"/Cargo.toml ]]; then + # echo "define if solana program" + solana_program() { + # echo "call if solana program" + declare program="$1" + if [[ -z $program ]]; then + printf "solana" + else + printf "solana-%s" "$program" + fi + } +else + echo "define else solana program" + solana_program() { + echo "call if solana program" + declare program="$1" + declare crate="$program" + if [[ -z $program ]]; then + crate="cli" + program="solana" + else + program="solana-$program" + fi + + if [[ -n $NDEBUG ]]; then + maybe_release=--release + fi + + # Prebuild binaries so that CI sanity check timeout doesn't include build time + if [[ $prebuild ]]; then + ( + set -x + # shellcheck disable=SC2086 # Don't want to double quote + cargo $CARGO_TOOLCHAIN build $maybe_release --bin $program + ) + fi + + printf "cargo $CARGO_TOOLCHAIN run $maybe_release --bin %s %s -- " "$program" + } +fi + +solana_bench_tps=$(solana_program bench-tps) +solana_faucet=$(solana_program faucet) +solana_validator=$(solana_program validator) +solana_validator_cuda="$solana_validator --cuda" +solana_genesis=$(solana_program genesis) +solana_gossip=$(solana_program gossip) +solana_keygen=$(solana_program keygen) +solana_ledger_tool=$(solana_program ledger-tool) +solana_cli=$(solana_program) + +export RUST_BACKTRACE=1 + +# https://gist.github.com/cdown/1163649 +urlencode() { + declare s="$1" + declare l=$((${#s} - 1)) + for i in $(seq 0 $l); do + declare c="${s:$i:1}" + case $c in + [a-zA-Z0-9.~_-]) + echo -n "$c" + ;; + *) + printf '%%%02X' "'$c" + ;; + esac + done +} + +default_arg() { + declare name=$1 + declare value=$2 + + for arg in "${args[@]}"; do + if [[ $arg = "$name" ]]; then + return + fi + done + + if [[ -n $value ]]; then + args+=("$name" "$value") + else + args+=("$name") + fi +} + +replace_arg() { + declare name=$1 + declare value=$2 + + default_arg "$name" "$value" + + declare index=0 + for arg in "${args[@]}"; do + index=$((index + 1)) + if [[ $arg = "$name" ]]; then + args[$index]="$value" + fi + done +} diff --git a/src/validator.rs b/src/validator.rs new file mode 100644 index 0000000..41d4164 --- /dev/null +++ b/src/validator.rs @@ -0,0 +1,83 @@ +use { + crate::{docker::DockerImage, ValidatorType}, + k8s_openapi::api::{apps::v1::ReplicaSet, core::v1::Secret}, + std::{collections::BTreeMap, string::String}, +}; + +pub enum LabelType { + ValidatorReplicaSet, + ValidatorService, +} + +pub struct Validator { + validator_type: ValidatorType, + image: DockerImage, + secret: Secret, + replica_set_labels: BTreeMap, + replica_set: ReplicaSet, + service_labels: BTreeMap, +} + +impl Validator { + pub fn new(image: DockerImage) -> Self { + Self { + validator_type: image.validator_type(), + image, + secret: Secret::default(), + replica_set_labels: BTreeMap::new(), + replica_set: ReplicaSet::default(), + service_labels: BTreeMap::new(), + } + } + + pub fn image(&self) -> &DockerImage { + &self.image + } + + pub fn secret(&self) -> &Secret { + &self.secret + } + + pub fn validator_type(&self) -> &ValidatorType { + &self.validator_type + } + + pub fn add_label(&mut self, key: K, value: V, label_type: LabelType) + where + K: Into, + V: Into, + { + match label_type { + LabelType::ValidatorReplicaSet => { + self.replica_set_labels.insert(key.into(), value.into()); + } + LabelType::ValidatorService => { + self.service_labels.insert(key.into(), value.into()); + } + } + } + + pub fn replica_set_labels(&self) -> &BTreeMap { + &self.replica_set_labels + } + + pub fn service_labels(&self) -> &BTreeMap { + &self.service_labels + } + + pub fn set_secret(&mut self, secret: Secret) { + self.secret = secret; + } + + pub fn set_replica_set(&mut self, replica_set: ReplicaSet) { + self.replica_set = replica_set; + } + + pub fn replica_set(&self) -> &ReplicaSet { + &self.replica_set + } + + pub fn replica_set_name(&self) -> &String { + self.replica_set.metadata.name.as_ref().unwrap() + } +} diff --git a/src/validator_config.rs b/src/validator_config.rs new file mode 100644 index 0000000..35f4a4b --- /dev/null +++ b/src/validator_config.rs @@ -0,0 +1,45 @@ +use solana_sdk::pubkey::Pubkey; + +pub struct ValidatorConfig { + pub tpu_enable_udp: bool, + pub tpu_disable_quic: bool, + pub max_ledger_size: Option, + pub skip_poh_verify: bool, + pub no_snapshot_fetch: bool, + pub require_tower: bool, + pub enable_full_rpc: bool, + pub known_validators: Option>, +} + +impl std::fmt::Display for ValidatorConfig { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + let known_validators = match &self.known_validators { + Some(validators) => validators + .iter() + .map(|v| v.to_string()) + .collect::>() + .join(", "), + None => "None".to_string(), + }; + write!( + f, + "Runtime Config\n\ + tpu_enable_udp: {}\n\ + tpu_disable_quic: {}\n\ + max_ledger_size: {:?}\n\ + skip_poh_verify: {}\n\ + no_snapshot_fetch: {}\n\ + require_tower: {}\n\ + enable_full_rpc: {}\n\ + known_validators: {:?}", + self.tpu_enable_udp, + self.tpu_disable_quic, + self.max_ledger_size, + self.skip_poh_verify, + self.no_snapshot_fetch, + self.require_tower, + self.enable_full_rpc, + known_validators, + ) + } +}