From 3cdc2b77e00cc7901185b9382066bd5de1c3d84a Mon Sep 17 00:00:00 2001 From: Serban Iorga Date: Sat, 18 Mar 2023 11:09:21 +0200 Subject: [PATCH 1/5] Remove unused fn argument fn: KubeClient::createResource arg: waitReady --- .../src/providers/k8s/kubeClient.ts | 25 ++----------------- 1 file changed, 2 insertions(+), 23 deletions(-) diff --git a/javascript/packages/orchestrator/src/providers/k8s/kubeClient.ts b/javascript/packages/orchestrator/src/providers/k8s/kubeClient.ts index 82ffd6946..7455ab6f5 100644 --- a/javascript/packages/orchestrator/src/providers/k8s/kubeClient.ts +++ b/javascript/packages/orchestrator/src/providers/k8s/kubeClient.ts @@ -128,7 +128,7 @@ export class KubeClient extends Client { logTable.print(); - await this.createResource(podDef, true, false); + await this.createResource(podDef, true); await this.wait_transfer_container(name); if (dbSnapshot) { @@ -230,7 +230,6 @@ export class KubeClient extends Client { async createResource( resourseDef: any, scoped: boolean = false, - waitReady: boolean = false, ): Promise { await this.runCommand(["apply", "-f", "-"], { resourceDef: JSON.stringify(resourseDef), @@ -240,27 +239,6 @@ export class KubeClient extends Client { debug(resourseDef); const name = resourseDef.metadata.name; const kind: string = resourseDef.kind.toLowerCase(); - - if (waitReady) { - // loop until ready - let t = this.timeout; - const args = ["get", kind, name, "-o", "jsonpath={.status}"]; - do { - const result = await this.runCommand(args); - const status = JSON.parse(result.stdout); - if (["Running", "Succeeded"].includes(status.phase)) return; - - // check if we are waiting init container - for (const s of status.initContainerStatuses) { - if (s.name === TRANSFER_CONTAINER_NAME && s.state.running) return; - } - - await new Promise((resolve) => setTimeout(resolve, 3000)); - t -= 3; - } while (t > 0); - - throw new Error(`Timeout(${this.timeout}) for ${kind} : ${name}`); - } } async wait_pod_ready(podName: string): Promise { @@ -279,6 +257,7 @@ export class KubeClient extends Client { throw new Error(`Timeout(${this.timeout}) for pod : ${podName}`); } + async wait_transfer_container(podName: string): Promise { // loop until ready let t = this.timeout; From b3764fb21eaeeed2004c48f53fd926f1e5d34abf Mon Sep 17 00:00:00 2001 From: Serban Iorga Date: Sat, 18 Mar 2023 12:24:52 +0200 Subject: [PATCH 2/5] Fix KubeClient::wait_transfer_container() - We were assessing if the transfer-files-container is ready only by checking if it's state is "running". While this is necessary, it is not enough. We need to make sure that it finished initializing (downloading coreutils). We do this by looking for the "waiting for tar to finish" line in the logs - Define a reusable method for retrying actions. --- .../packages/orchestrator/src/constants.ts | 2 + .../providers/k8s/dynResourceDefinition.ts | 3 +- .../src/providers/k8s/kubeClient.ts | 95 ++++++++++++------- javascript/packages/utils/src/misc.ts | 18 ++++ 4 files changed, 81 insertions(+), 37 deletions(-) diff --git a/javascript/packages/orchestrator/src/constants.ts b/javascript/packages/orchestrator/src/constants.ts index bb2dbb2dc..9c66e5d8a 100644 --- a/javascript/packages/orchestrator/src/constants.ts +++ b/javascript/packages/orchestrator/src/constants.ts @@ -45,6 +45,7 @@ const GENESIS_STATE_FILENAME = "genesis-state"; const GENESIS_WASM_FILENAME = "genesis-wasm"; const TMP_DONE = "echo done > /tmp/zombie-tmp-done"; +const TRANSFER_CONTAINER_WAIT_LOG = "waiting for tar to finish"; const WAIT_UNTIL_SCRIPT_SUFIX = `until [ -f ${FINISH_MAGIC_FILE} ]; do echo waiting for copy files to finish; sleep 1; done; echo copy files has finished`; const K8S_WAIT_UNTIL_SCRIPT_SUFIX = `until [ -f ${FINISH_MAGIC_FILE} ]; do /cfg/coreutils echo "waiting for copy files to finish"; /cfg/coreutils sleep 1; done; /cfg/coreutils echo "copy files has finished"`; const TRANSFER_CONTAINER_NAME = "transfer-files-container"; @@ -131,6 +132,7 @@ export { GENESIS_STATE_FILENAME, GENESIS_WASM_FILENAME, TMP_DONE, + TRANSFER_CONTAINER_WAIT_LOG, WAIT_UNTIL_SCRIPT_SUFIX, TRANSFER_CONTAINER_NAME, ZOMBIE_BUCKET, diff --git a/javascript/packages/orchestrator/src/providers/k8s/dynResourceDefinition.ts b/javascript/packages/orchestrator/src/providers/k8s/dynResourceDefinition.ts index 9f1954c5d..0ace83c74 100644 --- a/javascript/packages/orchestrator/src/providers/k8s/dynResourceDefinition.ts +++ b/javascript/packages/orchestrator/src/providers/k8s/dynResourceDefinition.ts @@ -10,6 +10,7 @@ import { TMP_DONE, TRANSFER_CONTAINER_NAME, WAIT_UNTIL_SCRIPT_SUFIX, + TRANSFER_CONTAINER_WAIT_LOG, } from "../../constants"; import { Network } from "../../network"; import { Node } from "../../types"; @@ -123,7 +124,7 @@ function make_transfer_containter(): any { "chmod +x /cfg/coreutils", "rm -rf ./coreutils-0.0.17-x86_64-unknown-linux-musl", "echo coreutils downloaded", - `until [ -f ${FINISH_MAGIC_FILE} ]; do echo waiting for tar to finish; sleep 1; done; echo copy files has finished`, + `until [ -f ${FINISH_MAGIC_FILE} ]; do echo ${TRANSFER_CONTAINER_WAIT_LOG}; sleep 1; done; echo copy files has finished`, ].join(" && "), ], }; diff --git a/javascript/packages/orchestrator/src/providers/k8s/kubeClient.ts b/javascript/packages/orchestrator/src/providers/k8s/kubeClient.ts index 7455ab6f5..431e84f26 100644 --- a/javascript/packages/orchestrator/src/providers/k8s/kubeClient.ts +++ b/javascript/packages/orchestrator/src/providers/k8s/kubeClient.ts @@ -3,6 +3,7 @@ import { CreateLogTable, decorators, getSha256, + retry, sleep, writeLocalJsonFile, } from "@zombienet/utils"; @@ -15,6 +16,7 @@ import { FINISH_MAGIC_FILE, P2P_PORT, TRANSFER_CONTAINER_NAME, + TRANSFER_CONTAINER_WAIT_LOG, } from "../../constants"; import { fileMap } from "../../types"; import { @@ -129,7 +131,7 @@ export class KubeClient extends Client { logTable.print(); await this.createResource(podDef, true); - await this.wait_transfer_container(name); + await this.waitTransferContainerReady(name); if (dbSnapshot) { // we need to get the snapshot from a public access @@ -201,7 +203,7 @@ export class KubeClient extends Client { } await this.putLocalMagicFile(name); - await this.wait_pod_ready(name); + await this.waitPodReady(name); logTable = new CreateLogTable({ colWidths: [20, 100], }); @@ -241,44 +243,65 @@ export class KubeClient extends Client { const kind: string = resourseDef.kind.toLowerCase(); } - async wait_pod_ready(podName: string): Promise { - // loop until ready - let t = this.timeout; - const args = ["get", "pod", podName, "--no-headers"]; - do { - const result = await this.runCommand(args); - if (result.stdout.match(/Running|Completed/)) return; - if (result.stdout.match(/ErrImagePull|ImagePullBackOff/)) - throw new Error(`Error pulling image for pod : ${podName}`); - - await new Promise((resolve) => setTimeout(resolve, 3000)); - t -= 3; - } while (t > 0); + async waitPodReady(pod: string): Promise { + const args = ["get", "pod", pod, "--no-headers"]; + await retry( + 3000, + this.timeout * 1000, + async () => { + const result = await this.runCommand(args); + if (result.stdout.match(/Running|Completed/)) return true; + if (result.stdout.match(/ErrImagePull|ImagePullBackOff/)) + throw new Error(`Error pulling image for pod : ${pod}`); + }, + `waitPodReady(): pod: ${pod}`, + ); + } - throw new Error(`Timeout(${this.timeout}) for pod : ${podName}`); + async waitContainerInState( + pod: string, + container: string, + state: string, + ): Promise { + const args = ["get", "pod", pod, "-o", "jsonpath={.status}"]; + await retry( + 3000, + this.timeout * 1000, + async () => { + const result = await this.runCommand(args); + const json = JSON.parse(result.stdout); + + let containerStatuses = json?.containerStatuses ?? []; + let initContainerStatuses = json?.initContainerStatuses ?? []; + for (const status of containerStatuses.concat(initContainerStatuses)) { + if (status.name === container && state in status.state) return true; + } + }, + `waitContainerInState(): pod: ${pod}, container: ${container}, state: ${state}`, + ); } - async wait_transfer_container(podName: string): Promise { - // loop until ready - let t = this.timeout; - const args = ["get", "pod", podName, "-o", "jsonpath={.status}"]; - do { - const result = await this.runCommand(args); - const status = JSON.parse(result.stdout); + async waitLog(pod: string, container: string, log: string): Promise { + const args = ["logs", "--tail=1", pod, "-c", `${container}`]; + await retry( + 3000, + this.timeout * 1000, + async () => { + const result = await this.runCommand(args); - // check if we are waiting init container - if (status.initContainerStatuses) { - for (const s of status.initContainerStatuses) { - if (s.name === TRANSFER_CONTAINER_NAME && s.state.running) return; - } - } + if (result.stdout == log) return true; + }, + `waitLog(): pod: ${pod}, container: ${container}, log: ${log}`, + ); + } - await new Promise((resolve) => setTimeout(resolve, 3000)); - t -= 3; - } while (t > 0); + async waitTransferContainerReady(pod: string): Promise { + await this.waitContainerInState(pod, TRANSFER_CONTAINER_NAME, "running"); - throw new Error( - `Timeout(${this.timeout}) for transfer container for pod : ${podName}`, + await this.waitLog( + pod, + TRANSFER_CONTAINER_NAME, + TRANSFER_CONTAINER_WAIT_LOG, ); } @@ -519,7 +542,7 @@ export class KubeClient extends Client { } // wait until fileserver is ready, fix race condition #700. - await this.wait_pod_ready("fileserver"); + await this.waitPodReady("fileserver"); sleep(3 * 1000); let fileServerOk = false; let attempts = 0; @@ -810,7 +833,7 @@ export class KubeClient extends Client { this.namespace, ); - await this.wait_pod_ready("introspector"); + await this.waitPodReady("introspector"); } async uploadToFileserver( diff --git a/javascript/packages/utils/src/misc.ts b/javascript/packages/utils/src/misc.ts index f3d4f04c6..b8210061b 100644 --- a/javascript/packages/utils/src/misc.ts +++ b/javascript/packages/utils/src/misc.ts @@ -6,6 +6,24 @@ export async function sleep(ms: number) { return new Promise((resolve) => setTimeout(resolve, ms)); } +export async function retry( + delayMs: number, + timeoutMs: number, + fn: () => Promise, + errMsg: string, +) { + do { + if (await fn()) { + return; + } + + await sleep(delayMs); + timeoutMs -= delayMs; + } while (timeoutMs > 0); + + throw new Error(`Timeout(${timeoutMs}) for: ${errMsg}`); +} + export function generateNamespace(n: number = 16): string { const buf = randomBytes(n); return buf.toString("hex"); From 64749a98250e93b7aaa2ff88c14781be8e112ade Mon Sep 17 00:00:00 2001 From: Serban Iorga Date: Sat, 18 Mar 2023 13:16:10 +0200 Subject: [PATCH 3/5] Fix k8s provider setupChainSpec() - Wait for the chain to start before trying to copy the chain spec file - Use wait_log() for getChainSpecRaw() --- .../packages/orchestrator/src/constants.ts | 6 ++-- .../src/providers/k8s/chain-spec.ts | 32 ++++++------------- .../src/providers/k8s/kubeClient.ts | 4 +-- 3 files changed, 16 insertions(+), 26 deletions(-) diff --git a/javascript/packages/orchestrator/src/constants.ts b/javascript/packages/orchestrator/src/constants.ts index 9c66e5d8a..3a44c7d8d 100644 --- a/javascript/packages/orchestrator/src/constants.ts +++ b/javascript/packages/orchestrator/src/constants.ts @@ -46,8 +46,9 @@ const GENESIS_WASM_FILENAME = "genesis-wasm"; const TMP_DONE = "echo done > /tmp/zombie-tmp-done"; const TRANSFER_CONTAINER_WAIT_LOG = "waiting for tar to finish"; -const WAIT_UNTIL_SCRIPT_SUFIX = `until [ -f ${FINISH_MAGIC_FILE} ]; do echo waiting for copy files to finish; sleep 1; done; echo copy files has finished`; -const K8S_WAIT_UNTIL_SCRIPT_SUFIX = `until [ -f ${FINISH_MAGIC_FILE} ]; do /cfg/coreutils echo "waiting for copy files to finish"; /cfg/coreutils sleep 1; done; /cfg/coreutils echo "copy files has finished"`; +const NODE_CONTAINER_WAIT_LOG = "waiting for copy files to finish"; +const WAIT_UNTIL_SCRIPT_SUFIX = `until [ -f ${FINISH_MAGIC_FILE} ]; do echo ${NODE_CONTAINER_WAIT_LOG}; sleep 1; done; echo copy files has finished`; +const K8S_WAIT_UNTIL_SCRIPT_SUFIX = `until [ -f ${FINISH_MAGIC_FILE} ]; do /cfg/coreutils echo "${NODE_CONTAINER_WAIT_LOG}"; /cfg/coreutils sleep 1; done; /cfg/coreutils echo "copy files has finished"`; const TRANSFER_CONTAINER_NAME = "transfer-files-container"; const ZOMBIE_BUCKET = "zombienet-logs"; const WS_URI_PATTERN = "ws://{{IP}}:{{PORT}}"; @@ -133,6 +134,7 @@ export { GENESIS_WASM_FILENAME, TMP_DONE, TRANSFER_CONTAINER_WAIT_LOG, + NODE_CONTAINER_WAIT_LOG, WAIT_UNTIL_SCRIPT_SUFIX, TRANSFER_CONTAINER_NAME, ZOMBIE_BUCKET, diff --git a/javascript/packages/orchestrator/src/providers/k8s/chain-spec.ts b/javascript/packages/orchestrator/src/providers/k8s/chain-spec.ts index 31e38ef7b..fcacbdd8a 100644 --- a/javascript/packages/orchestrator/src/providers/k8s/chain-spec.ts +++ b/javascript/packages/orchestrator/src/providers/k8s/chain-spec.ts @@ -4,9 +4,11 @@ import { DEFAULT_CHAIN_SPEC, DEFAULT_CHAIN_SPEC_COMMAND, DEFAULT_CHAIN_SPEC_RAW, + NODE_CONTAINER_WAIT_LOG, } from "../../constants"; import { getClient } from "../client"; import { createTempNodeDef, genNodeDef } from "./dynResourceDefinition"; +import { KubeClient } from "./kubeClient"; const debug = require("debug")("zombie::kube::chain-spec"); @@ -19,7 +21,7 @@ export async function setupChainSpec( // We have two options to get the chain-spec file, neither should use the `raw` file/argument // 1: User provide the file (we DON'T expect the raw file) // 2: User provide the chainSpecCommand (without the --raw option) - const client = getClient(); + const client = getClient() as KubeClient; if (chainConfig.chainSpecPath) { await fsPromises.copyFile(chainConfig.chainSpecPath, chainFullPath); } else { @@ -42,7 +44,10 @@ export async function setupChainSpec( const podName = podDef.metadata.name; await client.spawnFromDef(podDef); - debug("copy file from pod"); + debug("waiting for chain-spec"); + await client.waitLog(podName, podName, NODE_CONTAINER_WAIT_LOG); + + debug("Getting the chain spec file from pod to the local environment."); await client.copyFileFromPod( podName, plainChainSpecOutputFilePath, @@ -62,7 +67,7 @@ export async function getChainSpecRaw( chainCommand: string, chainFullPath: string, ): Promise { - const client = getClient(); + const client = getClient() as KubeClient; const plainPath = chainFullPath.replace(".json", "-plain.json"); const remoteChainSpecFullPath = @@ -91,25 +96,8 @@ export async function getChainSpecRaw( }, ]); - // let's just wait 2 secs before download - // Creating the raw version can take a couple of seconds, loop until the `build-spec` command is done - // or timedout (20 seconds) and fallback to the validation. - for (let i = 0; i < 10; i++) { - try { - await client.runCommand([ - "exec", - podName, - "--", - "/cfg/coreutils ls", - "/tmp/zombie-tmp-done", - ]); - // we can go ahead - break; - } catch (_) { - debug("waiting for raw chain-spec"); - await sleep(2000); - } - } + debug("waiting for raw chain-spec"); + await client.waitLog(podName, podName, NODE_CONTAINER_WAIT_LOG); debug("Getting the raw chain spec file from pod to the local environment."); await client.copyFileFromPod( diff --git a/javascript/packages/orchestrator/src/providers/k8s/kubeClient.ts b/javascript/packages/orchestrator/src/providers/k8s/kubeClient.ts index 431e84f26..944ebf273 100644 --- a/javascript/packages/orchestrator/src/providers/k8s/kubeClient.ts +++ b/javascript/packages/orchestrator/src/providers/k8s/kubeClient.ts @@ -104,8 +104,8 @@ export class KubeClient extends Client { async spawnFromDef( podDef: any, filesToCopy: fileMap[] = [], - keystore: string, - chainSpecId: string, + keystore?: string, + chainSpecId?: string, dbSnapshot?: string, ): Promise { const name = podDef.metadata.name; From abca3366f993619eddfeb3eddfa463fb4d11ac07 Mon Sep 17 00:00:00 2001 From: Serban Iorga Date: Sat, 18 Mar 2023 13:28:32 +0200 Subject: [PATCH 4/5] Fix wget failures I stumbled upon a lot of failures when downloading coreutils, after curl had been downloaded successfully from the same server. Not sure why. Might be an issue with the wget used in the container. Anyway, not using https seems to fix the issue. --- .../orchestrator/src/providers/k8s/dynResourceDefinition.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/javascript/packages/orchestrator/src/providers/k8s/dynResourceDefinition.ts b/javascript/packages/orchestrator/src/providers/k8s/dynResourceDefinition.ts index 0ace83c74..74457b725 100644 --- a/javascript/packages/orchestrator/src/providers/k8s/dynResourceDefinition.ts +++ b/javascript/packages/orchestrator/src/providers/k8s/dynResourceDefinition.ts @@ -113,11 +113,11 @@ function make_transfer_containter(): any { "ash", "-c", [ - "wget https://github.com/moparisthebest/static-curl/releases/download/v7.83.1/curl-amd64 -O /cfg/curl", + "wget github.com/moparisthebest/static-curl/releases/download/v7.83.1/curl-amd64 -O /cfg/curl", "echo downloaded", "chmod +x /cfg/curl", "echo chmoded", - "wget https://github.com/uutils/coreutils/releases/download/0.0.17/coreutils-0.0.17-x86_64-unknown-linux-musl.tar.gz -O /cfg/coreutils-0.0.17-x86_64-unknown-linux-musl.tar.gz", + "wget github.com/uutils/coreutils/releases/download/0.0.17/coreutils-0.0.17-x86_64-unknown-linux-musl.tar.gz -O /cfg/coreutils-0.0.17-x86_64-unknown-linux-musl.tar.gz", "cd /cfg", "tar -xvzf ./coreutils-0.0.17-x86_64-unknown-linux-musl.tar.gz", "cp ./coreutils-0.0.17-x86_64-unknown-linux-musl/coreutils /cfg/coreutils", From e351e954a0cc146a8772951f978adc356282757c Mon Sep 17 00:00:00 2001 From: Serban Iorga Date: Sat, 18 Mar 2023 15:23:38 +0200 Subject: [PATCH 5/5] Run prettier --- .../packages/orchestrator/src/providers/k8s/chain-spec.ts | 1 - .../orchestrator/src/providers/k8s/dynResourceDefinition.ts | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/javascript/packages/orchestrator/src/providers/k8s/chain-spec.ts b/javascript/packages/orchestrator/src/providers/k8s/chain-spec.ts index fcacbdd8a..703db00e1 100644 --- a/javascript/packages/orchestrator/src/providers/k8s/chain-spec.ts +++ b/javascript/packages/orchestrator/src/providers/k8s/chain-spec.ts @@ -1,4 +1,3 @@ -import { sleep } from "@zombienet/utils"; import { promises as fsPromises, writeFileSync } from "fs"; import { DEFAULT_CHAIN_SPEC, diff --git a/javascript/packages/orchestrator/src/providers/k8s/dynResourceDefinition.ts b/javascript/packages/orchestrator/src/providers/k8s/dynResourceDefinition.ts index 74457b725..6f5d9eeef 100644 --- a/javascript/packages/orchestrator/src/providers/k8s/dynResourceDefinition.ts +++ b/javascript/packages/orchestrator/src/providers/k8s/dynResourceDefinition.ts @@ -9,8 +9,8 @@ import { RPC_WS_PORT, TMP_DONE, TRANSFER_CONTAINER_NAME, - WAIT_UNTIL_SCRIPT_SUFIX, TRANSFER_CONTAINER_WAIT_LOG, + WAIT_UNTIL_SCRIPT_SUFIX, } from "../../constants"; import { Network } from "../../network"; import { Node } from "../../types";