diff --git a/javascript/packages/orchestrator/src/constants.ts b/javascript/packages/orchestrator/src/constants.ts index bb2dbb2dc..3a44c7d8d 100644 --- a/javascript/packages/orchestrator/src/constants.ts +++ b/javascript/packages/orchestrator/src/constants.ts @@ -45,8 +45,10 @@ const GENESIS_STATE_FILENAME = "genesis-state"; const GENESIS_WASM_FILENAME = "genesis-wasm"; const TMP_DONE = "echo done > /tmp/zombie-tmp-done"; -const WAIT_UNTIL_SCRIPT_SUFIX = `until [ -f ${FINISH_MAGIC_FILE} ]; do echo waiting for copy files to finish; sleep 1; done; echo copy files has finished`; -const K8S_WAIT_UNTIL_SCRIPT_SUFIX = `until [ -f ${FINISH_MAGIC_FILE} ]; do /cfg/coreutils echo "waiting for copy files to finish"; /cfg/coreutils sleep 1; done; /cfg/coreutils echo "copy files has finished"`; +const TRANSFER_CONTAINER_WAIT_LOG = "waiting for tar to finish"; +const NODE_CONTAINER_WAIT_LOG = "waiting for copy files to finish"; +const WAIT_UNTIL_SCRIPT_SUFIX = `until [ -f ${FINISH_MAGIC_FILE} ]; do echo ${NODE_CONTAINER_WAIT_LOG}; sleep 1; done; echo copy files has finished`; +const K8S_WAIT_UNTIL_SCRIPT_SUFIX = `until [ -f ${FINISH_MAGIC_FILE} ]; do /cfg/coreutils echo "${NODE_CONTAINER_WAIT_LOG}"; /cfg/coreutils sleep 1; done; /cfg/coreutils echo "copy files has finished"`; const TRANSFER_CONTAINER_NAME = "transfer-files-container"; const ZOMBIE_BUCKET = "zombienet-logs"; const WS_URI_PATTERN = "ws://{{IP}}:{{PORT}}"; @@ -131,6 +133,8 @@ export { GENESIS_STATE_FILENAME, GENESIS_WASM_FILENAME, TMP_DONE, + TRANSFER_CONTAINER_WAIT_LOG, + NODE_CONTAINER_WAIT_LOG, WAIT_UNTIL_SCRIPT_SUFIX, TRANSFER_CONTAINER_NAME, ZOMBIE_BUCKET, diff --git a/javascript/packages/orchestrator/src/providers/k8s/chain-spec.ts b/javascript/packages/orchestrator/src/providers/k8s/chain-spec.ts index 31e38ef7b..703db00e1 100644 --- a/javascript/packages/orchestrator/src/providers/k8s/chain-spec.ts +++ b/javascript/packages/orchestrator/src/providers/k8s/chain-spec.ts @@ -1,12 +1,13 @@ -import { sleep } from "@zombienet/utils"; import { promises as fsPromises, writeFileSync } from "fs"; import { DEFAULT_CHAIN_SPEC, DEFAULT_CHAIN_SPEC_COMMAND, DEFAULT_CHAIN_SPEC_RAW, + NODE_CONTAINER_WAIT_LOG, } from "../../constants"; import { getClient } from "../client"; import { createTempNodeDef, genNodeDef } from "./dynResourceDefinition"; +import { KubeClient } from "./kubeClient"; const debug = require("debug")("zombie::kube::chain-spec"); @@ -19,7 +20,7 @@ export async function setupChainSpec( // We have two options to get the chain-spec file, neither should use the `raw` file/argument // 1: User provide the file (we DON'T expect the raw file) // 2: User provide the chainSpecCommand (without the --raw option) - const client = getClient(); + const client = getClient() as KubeClient; if (chainConfig.chainSpecPath) { await fsPromises.copyFile(chainConfig.chainSpecPath, chainFullPath); } else { @@ -42,7 +43,10 @@ export async function setupChainSpec( const podName = podDef.metadata.name; await client.spawnFromDef(podDef); - debug("copy file from pod"); + debug("waiting for chain-spec"); + await client.waitLog(podName, podName, NODE_CONTAINER_WAIT_LOG); + + debug("Getting the chain spec file from pod to the local environment."); await client.copyFileFromPod( podName, plainChainSpecOutputFilePath, @@ -62,7 +66,7 @@ export async function getChainSpecRaw( chainCommand: string, chainFullPath: string, ): Promise { - const client = getClient(); + const client = getClient() as KubeClient; const plainPath = chainFullPath.replace(".json", "-plain.json"); const remoteChainSpecFullPath = @@ -91,25 +95,8 @@ export async function getChainSpecRaw( }, ]); - // let's just wait 2 secs before download - // Creating the raw version can take a couple of seconds, loop until the `build-spec` command is done - // or timedout (20 seconds) and fallback to the validation. - for (let i = 0; i < 10; i++) { - try { - await client.runCommand([ - "exec", - podName, - "--", - "/cfg/coreutils ls", - "/tmp/zombie-tmp-done", - ]); - // we can go ahead - break; - } catch (_) { - debug("waiting for raw chain-spec"); - await sleep(2000); - } - } + debug("waiting for raw chain-spec"); + await client.waitLog(podName, podName, NODE_CONTAINER_WAIT_LOG); debug("Getting the raw chain spec file from pod to the local environment."); await client.copyFileFromPod( diff --git a/javascript/packages/orchestrator/src/providers/k8s/dynResourceDefinition.ts b/javascript/packages/orchestrator/src/providers/k8s/dynResourceDefinition.ts index 9f1954c5d..6f5d9eeef 100644 --- a/javascript/packages/orchestrator/src/providers/k8s/dynResourceDefinition.ts +++ b/javascript/packages/orchestrator/src/providers/k8s/dynResourceDefinition.ts @@ -9,6 +9,7 @@ import { RPC_WS_PORT, TMP_DONE, TRANSFER_CONTAINER_NAME, + TRANSFER_CONTAINER_WAIT_LOG, WAIT_UNTIL_SCRIPT_SUFIX, } from "../../constants"; import { Network } from "../../network"; @@ -112,18 +113,18 @@ function make_transfer_containter(): any { "ash", "-c", [ - "wget https://github.com/moparisthebest/static-curl/releases/download/v7.83.1/curl-amd64 -O /cfg/curl", + "wget github.com/moparisthebest/static-curl/releases/download/v7.83.1/curl-amd64 -O /cfg/curl", "echo downloaded", "chmod +x /cfg/curl", "echo chmoded", - "wget https://github.com/uutils/coreutils/releases/download/0.0.17/coreutils-0.0.17-x86_64-unknown-linux-musl.tar.gz -O /cfg/coreutils-0.0.17-x86_64-unknown-linux-musl.tar.gz", + "wget github.com/uutils/coreutils/releases/download/0.0.17/coreutils-0.0.17-x86_64-unknown-linux-musl.tar.gz -O /cfg/coreutils-0.0.17-x86_64-unknown-linux-musl.tar.gz", "cd /cfg", "tar -xvzf ./coreutils-0.0.17-x86_64-unknown-linux-musl.tar.gz", "cp ./coreutils-0.0.17-x86_64-unknown-linux-musl/coreutils /cfg/coreutils", "chmod +x /cfg/coreutils", "rm -rf ./coreutils-0.0.17-x86_64-unknown-linux-musl", "echo coreutils downloaded", - `until [ -f ${FINISH_MAGIC_FILE} ]; do echo waiting for tar to finish; sleep 1; done; echo copy files has finished`, + `until [ -f ${FINISH_MAGIC_FILE} ]; do echo ${TRANSFER_CONTAINER_WAIT_LOG}; sleep 1; done; echo copy files has finished`, ].join(" && "), ], }; diff --git a/javascript/packages/orchestrator/src/providers/k8s/kubeClient.ts b/javascript/packages/orchestrator/src/providers/k8s/kubeClient.ts index 82ffd6946..944ebf273 100644 --- a/javascript/packages/orchestrator/src/providers/k8s/kubeClient.ts +++ b/javascript/packages/orchestrator/src/providers/k8s/kubeClient.ts @@ -3,6 +3,7 @@ import { CreateLogTable, decorators, getSha256, + retry, sleep, writeLocalJsonFile, } from "@zombienet/utils"; @@ -15,6 +16,7 @@ import { FINISH_MAGIC_FILE, P2P_PORT, TRANSFER_CONTAINER_NAME, + TRANSFER_CONTAINER_WAIT_LOG, } from "../../constants"; import { fileMap } from "../../types"; import { @@ -102,8 +104,8 @@ export class KubeClient extends Client { async spawnFromDef( podDef: any, filesToCopy: fileMap[] = [], - keystore: string, - chainSpecId: string, + keystore?: string, + chainSpecId?: string, dbSnapshot?: string, ): Promise { const name = podDef.metadata.name; @@ -128,8 +130,8 @@ export class KubeClient extends Client { logTable.print(); - await this.createResource(podDef, true, false); - await this.wait_transfer_container(name); + await this.createResource(podDef, true); + await this.waitTransferContainerReady(name); if (dbSnapshot) { // we need to get the snapshot from a public access @@ -201,7 +203,7 @@ export class KubeClient extends Client { } await this.putLocalMagicFile(name); - await this.wait_pod_ready(name); + await this.waitPodReady(name); logTable = new CreateLogTable({ colWidths: [20, 100], }); @@ -230,7 +232,6 @@ export class KubeClient extends Client { async createResource( resourseDef: any, scoped: boolean = false, - waitReady: boolean = false, ): Promise { await this.runCommand(["apply", "-f", "-"], { resourceDef: JSON.stringify(resourseDef), @@ -240,66 +241,67 @@ export class KubeClient extends Client { debug(resourseDef); const name = resourseDef.metadata.name; const kind: string = resourseDef.kind.toLowerCase(); + } - if (waitReady) { - // loop until ready - let t = this.timeout; - const args = ["get", kind, name, "-o", "jsonpath={.status}"]; - do { + async waitPodReady(pod: string): Promise { + const args = ["get", "pod", pod, "--no-headers"]; + await retry( + 3000, + this.timeout * 1000, + async () => { const result = await this.runCommand(args); - const status = JSON.parse(result.stdout); - if (["Running", "Succeeded"].includes(status.phase)) return; + if (result.stdout.match(/Running|Completed/)) return true; + if (result.stdout.match(/ErrImagePull|ImagePullBackOff/)) + throw new Error(`Error pulling image for pod : ${pod}`); + }, + `waitPodReady(): pod: ${pod}`, + ); + } - // check if we are waiting init container - for (const s of status.initContainerStatuses) { - if (s.name === TRANSFER_CONTAINER_NAME && s.state.running) return; + async waitContainerInState( + pod: string, + container: string, + state: string, + ): Promise { + const args = ["get", "pod", pod, "-o", "jsonpath={.status}"]; + await retry( + 3000, + this.timeout * 1000, + async () => { + const result = await this.runCommand(args); + const json = JSON.parse(result.stdout); + + let containerStatuses = json?.containerStatuses ?? []; + let initContainerStatuses = json?.initContainerStatuses ?? []; + for (const status of containerStatuses.concat(initContainerStatuses)) { + if (status.name === container && state in status.state) return true; } + }, + `waitContainerInState(): pod: ${pod}, container: ${container}, state: ${state}`, + ); + } - await new Promise((resolve) => setTimeout(resolve, 3000)); - t -= 3; - } while (t > 0); + async waitLog(pod: string, container: string, log: string): Promise { + const args = ["logs", "--tail=1", pod, "-c", `${container}`]; + await retry( + 3000, + this.timeout * 1000, + async () => { + const result = await this.runCommand(args); - throw new Error(`Timeout(${this.timeout}) for ${kind} : ${name}`); - } + if (result.stdout == log) return true; + }, + `waitLog(): pod: ${pod}, container: ${container}, log: ${log}`, + ); } - async wait_pod_ready(podName: string): Promise { - // loop until ready - let t = this.timeout; - const args = ["get", "pod", podName, "--no-headers"]; - do { - const result = await this.runCommand(args); - if (result.stdout.match(/Running|Completed/)) return; - if (result.stdout.match(/ErrImagePull|ImagePullBackOff/)) - throw new Error(`Error pulling image for pod : ${podName}`); - - await new Promise((resolve) => setTimeout(resolve, 3000)); - t -= 3; - } while (t > 0); - - throw new Error(`Timeout(${this.timeout}) for pod : ${podName}`); - } - async wait_transfer_container(podName: string): Promise { - // loop until ready - let t = this.timeout; - const args = ["get", "pod", podName, "-o", "jsonpath={.status}"]; - do { - const result = await this.runCommand(args); - const status = JSON.parse(result.stdout); - - // check if we are waiting init container - if (status.initContainerStatuses) { - for (const s of status.initContainerStatuses) { - if (s.name === TRANSFER_CONTAINER_NAME && s.state.running) return; - } - } - - await new Promise((resolve) => setTimeout(resolve, 3000)); - t -= 3; - } while (t > 0); + async waitTransferContainerReady(pod: string): Promise { + await this.waitContainerInState(pod, TRANSFER_CONTAINER_NAME, "running"); - throw new Error( - `Timeout(${this.timeout}) for transfer container for pod : ${podName}`, + await this.waitLog( + pod, + TRANSFER_CONTAINER_NAME, + TRANSFER_CONTAINER_WAIT_LOG, ); } @@ -540,7 +542,7 @@ export class KubeClient extends Client { } // wait until fileserver is ready, fix race condition #700. - await this.wait_pod_ready("fileserver"); + await this.waitPodReady("fileserver"); sleep(3 * 1000); let fileServerOk = false; let attempts = 0; @@ -831,7 +833,7 @@ export class KubeClient extends Client { this.namespace, ); - await this.wait_pod_ready("introspector"); + await this.waitPodReady("introspector"); } async uploadToFileserver( diff --git a/javascript/packages/utils/src/misc.ts b/javascript/packages/utils/src/misc.ts index f3d4f04c6..b8210061b 100644 --- a/javascript/packages/utils/src/misc.ts +++ b/javascript/packages/utils/src/misc.ts @@ -6,6 +6,24 @@ export async function sleep(ms: number) { return new Promise((resolve) => setTimeout(resolve, ms)); } +export async function retry( + delayMs: number, + timeoutMs: number, + fn: () => Promise, + errMsg: string, +) { + do { + if (await fn()) { + return; + } + + await sleep(delayMs); + timeoutMs -= delayMs; + } while (timeoutMs > 0); + + throw new Error(`Timeout(${timeoutMs}) for: ${errMsg}`); +} + export function generateNamespace(n: number = 16): string { const buf = randomBytes(n); return buf.toString("hex");