Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix kubernetes provider synchronization issues #802

Merged
merged 5 commits into from
Mar 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions javascript/packages/orchestrator/src/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,10 @@ const GENESIS_STATE_FILENAME = "genesis-state";
const GENESIS_WASM_FILENAME = "genesis-wasm";

const TMP_DONE = "echo done > /tmp/zombie-tmp-done";
const WAIT_UNTIL_SCRIPT_SUFIX = `until [ -f ${FINISH_MAGIC_FILE} ]; do echo waiting for copy files to finish; sleep 1; done; echo copy files has finished`;
const K8S_WAIT_UNTIL_SCRIPT_SUFIX = `until [ -f ${FINISH_MAGIC_FILE} ]; do /cfg/coreutils echo "waiting for copy files to finish"; /cfg/coreutils sleep 1; done; /cfg/coreutils echo "copy files has finished"`;
const TRANSFER_CONTAINER_WAIT_LOG = "waiting for tar to finish";
const NODE_CONTAINER_WAIT_LOG = "waiting for copy files to finish";
const WAIT_UNTIL_SCRIPT_SUFIX = `until [ -f ${FINISH_MAGIC_FILE} ]; do echo ${NODE_CONTAINER_WAIT_LOG}; sleep 1; done; echo copy files has finished`;
const K8S_WAIT_UNTIL_SCRIPT_SUFIX = `until [ -f ${FINISH_MAGIC_FILE} ]; do /cfg/coreutils echo "${NODE_CONTAINER_WAIT_LOG}"; /cfg/coreutils sleep 1; done; /cfg/coreutils echo "copy files has finished"`;
const TRANSFER_CONTAINER_NAME = "transfer-files-container";
const ZOMBIE_BUCKET = "zombienet-logs";
const WS_URI_PATTERN = "ws://{{IP}}:{{PORT}}";
Expand Down Expand Up @@ -131,6 +133,8 @@ export {
GENESIS_STATE_FILENAME,
GENESIS_WASM_FILENAME,
TMP_DONE,
TRANSFER_CONTAINER_WAIT_LOG,
NODE_CONTAINER_WAIT_LOG,
WAIT_UNTIL_SCRIPT_SUFIX,
TRANSFER_CONTAINER_NAME,
ZOMBIE_BUCKET,
Expand Down
33 changes: 10 additions & 23 deletions javascript/packages/orchestrator/src/providers/k8s/chain-spec.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
import { sleep } from "@zombienet/utils";
import { promises as fsPromises, writeFileSync } from "fs";
import {
DEFAULT_CHAIN_SPEC,
DEFAULT_CHAIN_SPEC_COMMAND,
DEFAULT_CHAIN_SPEC_RAW,
NODE_CONTAINER_WAIT_LOG,
} from "../../constants";
import { getClient } from "../client";
import { createTempNodeDef, genNodeDef } from "./dynResourceDefinition";
import { KubeClient } from "./kubeClient";

const debug = require("debug")("zombie::kube::chain-spec");

Expand All @@ -19,7 +20,7 @@ export async function setupChainSpec(
// We have two options to get the chain-spec file, neither should use the `raw` file/argument
// 1: User provide the file (we DON'T expect the raw file)
// 2: User provide the chainSpecCommand (without the --raw option)
const client = getClient();
const client = getClient() as KubeClient;
if (chainConfig.chainSpecPath) {
await fsPromises.copyFile(chainConfig.chainSpecPath, chainFullPath);
} else {
Expand All @@ -42,7 +43,10 @@ export async function setupChainSpec(
const podName = podDef.metadata.name;
await client.spawnFromDef(podDef);

debug("copy file from pod");
debug("waiting for chain-spec");
await client.waitLog(podName, podName, NODE_CONTAINER_WAIT_LOG);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice, thanks!!


debug("Getting the chain spec file from pod to the local environment.");
await client.copyFileFromPod(
podName,
plainChainSpecOutputFilePath,
Expand All @@ -62,7 +66,7 @@ export async function getChainSpecRaw(
chainCommand: string,
chainFullPath: string,
): Promise<any> {
const client = getClient();
const client = getClient() as KubeClient;
const plainPath = chainFullPath.replace(".json", "-plain.json");

const remoteChainSpecFullPath =
Expand Down Expand Up @@ -91,25 +95,8 @@ export async function getChainSpecRaw(
},
]);

// let's just wait 2 secs before download
// Creating the raw version can take a couple of seconds, loop until the `build-spec` command is done
// or timedout (20 seconds) and fallback to the validation.
for (let i = 0; i < 10; i++) {
try {
await client.runCommand([
"exec",
podName,
"--",
"/cfg/coreutils ls",
"/tmp/zombie-tmp-done",
]);
// we can go ahead
break;
} catch (_) {
debug("waiting for raw chain-spec");
await sleep(2000);
}
}
debug("waiting for raw chain-spec");
await client.waitLog(podName, podName, NODE_CONTAINER_WAIT_LOG);

debug("Getting the raw chain spec file from pod to the local environment.");
await client.copyFileFromPod(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import {
RPC_WS_PORT,
TMP_DONE,
TRANSFER_CONTAINER_NAME,
TRANSFER_CONTAINER_WAIT_LOG,
WAIT_UNTIL_SCRIPT_SUFIX,
} from "../../constants";
import { Network } from "../../network";
Expand Down Expand Up @@ -112,18 +113,18 @@ function make_transfer_containter(): any {
"ash",
"-c",
[
"wget https://github.com/moparisthebest/static-curl/releases/download/v7.83.1/curl-amd64 -O /cfg/curl",
"wget github.com/moparisthebest/static-curl/releases/download/v7.83.1/curl-amd64 -O /cfg/curl",
"echo downloaded",
"chmod +x /cfg/curl",
"echo chmoded",
"wget https://github.com/uutils/coreutils/releases/download/0.0.17/coreutils-0.0.17-x86_64-unknown-linux-musl.tar.gz -O /cfg/coreutils-0.0.17-x86_64-unknown-linux-musl.tar.gz",
"wget github.com/uutils/coreutils/releases/download/0.0.17/coreutils-0.0.17-x86_64-unknown-linux-musl.tar.gz -O /cfg/coreutils-0.0.17-x86_64-unknown-linux-musl.tar.gz",
"cd /cfg",
"tar -xvzf ./coreutils-0.0.17-x86_64-unknown-linux-musl.tar.gz",
"cp ./coreutils-0.0.17-x86_64-unknown-linux-musl/coreutils /cfg/coreutils",
"chmod +x /cfg/coreutils",
"rm -rf ./coreutils-0.0.17-x86_64-unknown-linux-musl",
"echo coreutils downloaded",
`until [ -f ${FINISH_MAGIC_FILE} ]; do echo waiting for tar to finish; sleep 1; done; echo copy files has finished`,
`until [ -f ${FINISH_MAGIC_FILE} ]; do echo ${TRANSFER_CONTAINER_WAIT_LOG}; sleep 1; done; echo copy files has finished`,
].join(" && "),
],
};
Expand Down
120 changes: 61 additions & 59 deletions javascript/packages/orchestrator/src/providers/k8s/kubeClient.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import {
CreateLogTable,
decorators,
getSha256,
retry,
sleep,
writeLocalJsonFile,
} from "@zombienet/utils";
Expand All @@ -15,6 +16,7 @@ import {
FINISH_MAGIC_FILE,
P2P_PORT,
TRANSFER_CONTAINER_NAME,
TRANSFER_CONTAINER_WAIT_LOG,
} from "../../constants";
import { fileMap } from "../../types";
import {
Expand Down Expand Up @@ -102,8 +104,8 @@ export class KubeClient extends Client {
async spawnFromDef(
podDef: any,
filesToCopy: fileMap[] = [],
keystore: string,
chainSpecId: string,
keystore?: string,
chainSpecId?: string,
dbSnapshot?: string,
): Promise<void> {
const name = podDef.metadata.name;
Expand All @@ -128,8 +130,8 @@ export class KubeClient extends Client {

logTable.print();

await this.createResource(podDef, true, false);
await this.wait_transfer_container(name);
await this.createResource(podDef, true);
await this.waitTransferContainerReady(name);

if (dbSnapshot) {
// we need to get the snapshot from a public access
Expand Down Expand Up @@ -201,7 +203,7 @@ export class KubeClient extends Client {
}

await this.putLocalMagicFile(name);
await this.wait_pod_ready(name);
await this.waitPodReady(name);
logTable = new CreateLogTable({
colWidths: [20, 100],
});
Expand Down Expand Up @@ -230,7 +232,6 @@ export class KubeClient extends Client {
async createResource(
resourseDef: any,
scoped: boolean = false,
waitReady: boolean = false,
): Promise<void> {
await this.runCommand(["apply", "-f", "-"], {
resourceDef: JSON.stringify(resourseDef),
Expand All @@ -240,66 +241,67 @@ export class KubeClient extends Client {
debug(resourseDef);
const name = resourseDef.metadata.name;
const kind: string = resourseDef.kind.toLowerCase();
}

if (waitReady) {
// loop until ready
let t = this.timeout;
const args = ["get", kind, name, "-o", "jsonpath={.status}"];
do {
async waitPodReady(pod: string): Promise<void> {
const args = ["get", "pod", pod, "--no-headers"];
await retry(
3000,
this.timeout * 1000,
async () => {
const result = await this.runCommand(args);
const status = JSON.parse(result.stdout);
if (["Running", "Succeeded"].includes(status.phase)) return;
if (result.stdout.match(/Running|Completed/)) return true;
if (result.stdout.match(/ErrImagePull|ImagePullBackOff/))
throw new Error(`Error pulling image for pod : ${pod}`);
},
`waitPodReady(): pod: ${pod}`,
);
}

// check if we are waiting init container
for (const s of status.initContainerStatuses) {
if (s.name === TRANSFER_CONTAINER_NAME && s.state.running) return;
async waitContainerInState(
pod: string,
container: string,
state: string,
): Promise<void> {
const args = ["get", "pod", pod, "-o", "jsonpath={.status}"];
await retry(
3000,
this.timeout * 1000,
async () => {
const result = await this.runCommand(args);
const json = JSON.parse(result.stdout);

let containerStatuses = json?.containerStatuses ?? [];
let initContainerStatuses = json?.initContainerStatuses ?? [];
for (const status of containerStatuses.concat(initContainerStatuses)) {
if (status.name === container && state in status.state) return true;
}
},
`waitContainerInState(): pod: ${pod}, container: ${container}, state: ${state}`,
);
}

await new Promise((resolve) => setTimeout(resolve, 3000));
t -= 3;
} while (t > 0);
async waitLog(pod: string, container: string, log: string): Promise<void> {
const args = ["logs", "--tail=1", pod, "-c", `${container}`];
await retry(
3000,
this.timeout * 1000,
async () => {
const result = await this.runCommand(args);

throw new Error(`Timeout(${this.timeout}) for ${kind} : ${name}`);
}
if (result.stdout == log) return true;
},
`waitLog(): pod: ${pod}, container: ${container}, log: ${log}`,
);
}

async wait_pod_ready(podName: string): Promise<void> {
// loop until ready
let t = this.timeout;
const args = ["get", "pod", podName, "--no-headers"];
do {
const result = await this.runCommand(args);
if (result.stdout.match(/Running|Completed/)) return;
if (result.stdout.match(/ErrImagePull|ImagePullBackOff/))
throw new Error(`Error pulling image for pod : ${podName}`);

await new Promise((resolve) => setTimeout(resolve, 3000));
t -= 3;
} while (t > 0);

throw new Error(`Timeout(${this.timeout}) for pod : ${podName}`);
}
async wait_transfer_container(podName: string): Promise<void> {
// loop until ready
let t = this.timeout;
const args = ["get", "pod", podName, "-o", "jsonpath={.status}"];
do {
const result = await this.runCommand(args);
const status = JSON.parse(result.stdout);

// check if we are waiting init container
if (status.initContainerStatuses) {
for (const s of status.initContainerStatuses) {
if (s.name === TRANSFER_CONTAINER_NAME && s.state.running) return;
}
}

await new Promise((resolve) => setTimeout(resolve, 3000));
t -= 3;
} while (t > 0);
async waitTransferContainerReady(pod: string): Promise<void> {
await this.waitContainerInState(pod, TRANSFER_CONTAINER_NAME, "running");

throw new Error(
`Timeout(${this.timeout}) for transfer container for pod : ${podName}`,
await this.waitLog(
pod,
TRANSFER_CONTAINER_NAME,
TRANSFER_CONTAINER_WAIT_LOG,
);
}

Expand Down Expand Up @@ -540,7 +542,7 @@ export class KubeClient extends Client {
}

// wait until fileserver is ready, fix race condition #700.
await this.wait_pod_ready("fileserver");
await this.waitPodReady("fileserver");
sleep(3 * 1000);
let fileServerOk = false;
let attempts = 0;
Expand Down Expand Up @@ -831,7 +833,7 @@ export class KubeClient extends Client {
this.namespace,
);

await this.wait_pod_ready("introspector");
await this.waitPodReady("introspector");
}

async uploadToFileserver(
Expand Down
18 changes: 18 additions & 0 deletions javascript/packages/utils/src/misc.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,24 @@ export async function sleep(ms: number) {
return new Promise((resolve) => setTimeout(resolve, ms));
}

export async function retry(
delayMs: number,
timeoutMs: number,
fn: () => Promise<boolean | undefined>,
errMsg: string,
) {
do {
if (await fn()) {
return;
}

await sleep(delayMs);
timeoutMs -= delayMs;
} while (timeoutMs > 0);

throw new Error(`Timeout(${timeoutMs}) for: ${errMsg}`);
}

export function generateNamespace(n: number = 16): string {
const buf = randomBytes(n);
return buf.toString("hex");
Expand Down