From 4a670527d3ccfa408a44530125dd386fda0151af Mon Sep 17 00:00:00 2001 From: Zygimantas Date: Thu, 7 Nov 2024 15:35:18 +0100 Subject: [PATCH] fix(provider): improve recognition of exit codes currently, both DO and Docker providers do not wait for the container to exit until returning an exit code. this introduces a condition, where the container hasn't exited yet (e.g. with an error), but we already fetch it's stdout and exit code (which at that point is 0). this commit makes the providers wait for the exec container to finish and only then returns the response. --- core/provider/digitalocean/task.go | 49 +++++++++++++++++++++++++----- core/provider/docker/task.go | 25 ++++++++++++--- cosmos/node/genesis.go | 24 ++++++++++++--- cosmos/node/init.go | 11 ++++++- cosmos/node/keys.go | 6 +++- 5 files changed, 97 insertions(+), 18 deletions(-) diff --git a/core/provider/digitalocean/task.go b/core/provider/digitalocean/task.go index 5a22e22..1380355 100644 --- a/core/provider/digitalocean/task.go +++ b/core/provider/digitalocean/task.go @@ -325,19 +325,36 @@ func (p *Provider) RunCommand(ctx context.Context, taskName string, command []st defer resp.Close() - execInspect, err := dockerClient.ContainerExecInspect(ctx, exec.ID) + lastExitCode := 0 + + err = util.WaitForCondition(ctx, 10*time.Second, 100*time.Millisecond, func() (bool, error) { + execInspect, err := dockerClient.ContainerExecInspect(ctx, exec.ID) + if err != nil { + return false, err + } + + if execInspect.Running { + return false, nil + } + + lastExitCode = execInspect.ExitCode + + return true, nil + }) + if err != nil { - return "", "", 0, err + p.logger.Error("failed to wait for exec", zap.Error(err), zap.String("taskName", taskName)) + return "", "", lastExitCode, err } var stdout, stderr bytes.Buffer _, err = stdcopy.StdCopy(&stdout, &stderr, resp.Reader) if err != nil { - return "", "", 0, err + return "", "", lastExitCode, err } - return stdout.String(), stderr.String(), execInspect.ExitCode, nil + return stdout.String(), stderr.String(), lastExitCode, nil } func (p *Provider) RunCommandWhileStopped(ctx context.Context, taskName string, definition provider.TaskDefinition, command []string) (string, string, int, error) { @@ -408,10 +425,26 @@ func (p *Provider) RunCommandWhileStopped(ctx context.Context, taskName string, defer resp.Close() - execInspect, err := dockerClient.ContainerExecInspect(ctx, exec.ID) + lastExitCode := 0 + + err = util.WaitForCondition(ctx, 10*time.Second, 100*time.Millisecond, func() (bool, error) { + execInspect, err := dockerClient.ContainerExecInspect(ctx, exec.ID) + if err != nil { + return false, err + } + + if execInspect.Running { + return false, nil + } + + lastExitCode = execInspect.ExitCode + + return true, nil + }) + if err != nil { - p.logger.Error("failed to inspect exec", zap.Error(err), zap.String("taskName", taskName)) - return "", "", 0, err + p.logger.Error("failed to wait for exec", zap.Error(err), zap.String("taskName", taskName)) + return "", "", lastExitCode, err } var stdout, stderr bytes.Buffer @@ -420,7 +453,7 @@ func (p *Provider) RunCommandWhileStopped(ctx context.Context, taskName string, return "", "", 0, err } - return stdout.String(), stderr.String(), execInspect.ExitCode, err + return stdout.String(), stderr.String(), lastExitCode, err } func startContainerWithBlock(ctx context.Context, dockerClient *dockerclient.Client, containerID string) error { diff --git a/core/provider/docker/task.go b/core/provider/docker/task.go index 7e7a303..8564c99 100644 --- a/core/provider/docker/task.go +++ b/core/provider/docker/task.go @@ -235,18 +235,35 @@ func (p *Provider) RunCommand(ctx context.Context, id string, command []string) defer resp.Close() - execInspect, err := p.dockerClient.ContainerExecInspect(ctx, exec.ID) + lastExitCode := 0 + + err = util.WaitForCondition(ctx, 10*time.Second, 100*time.Millisecond, func() (bool, error) { + execInspect, err := p.dockerClient.ContainerExecInspect(ctx, exec.ID) + if err != nil { + return false, err + } + + if execInspect.Running { + return false, nil + } + + lastExitCode = execInspect.ExitCode + + return true, nil + }) + if err != nil { - return "", "", 0, err + p.logger.Error("failed to wait for exec", zap.Error(err), zap.String("id", id)) + return "", "", lastExitCode, err } var stdout, stderr bytes.Buffer _, err = stdcopy.StdCopy(&stdout, &stderr, resp.Reader) if err != nil { - return "", "", 0, err + return "", "", lastExitCode, err } - return stdout.String(), stderr.String(), execInspect.ExitCode, nil + return stdout.String(), stderr.String(), lastExitCode, nil } func (p *Provider) RunCommandWhileStopped(ctx context.Context, id string, definition provider.TaskDefinition, command []string) (string, string, int, error) { diff --git a/cosmos/node/genesis.go b/cosmos/node/genesis.go index 4980e88..6fb53ab 100644 --- a/cosmos/node/genesis.go +++ b/cosmos/node/genesis.go @@ -75,7 +75,11 @@ func (n *Node) AddGenesisAccount(ctx context.Context, address string, genesisAmo n.logger.Debug("add-genesis-account", zap.String("stdout", stdout), zap.String("stderr", stderr), zap.Int("exitCode", exitCode)) if err != nil { - return err + return fmt.Errorf("failed to add genesis account: %w", err) + } + + if exitCode != 0 { + return fmt.Errorf("failed to add genesis account (exitcode=%d): %s", exitCode, stderr) } return nil @@ -102,11 +106,15 @@ func (n *Node) GenerateGenTx(ctx context.Context, genesisSelfDelegation types.Co stdout, stderr, exitCode, err := n.Task.RunCommand(ctx, command) n.logger.Debug("gentx", zap.String("stdout", stdout), zap.String("stderr", stderr), zap.Int("exitCode", exitCode)) + if err != nil { + return fmt.Errorf("failed to generate genesis transaction: %w", err) + } + if exitCode != 0 { - return fmt.Errorf("failed to generate genesis transaction: %s (exitcode=%d)", stderr, exitCode) + return fmt.Errorf("failed to generate genesis transaction (exitcode=%d): %s", exitCode, stderr) } - return err + return nil } // CollectGenTxs collects the genesis transactions from the node and create a finalized genesis file @@ -124,7 +132,15 @@ func (n *Node) CollectGenTxs(ctx context.Context) error { stdout, stderr, exitCode, err := n.Task.RunCommand(ctx, n.BinCommand(command...)) n.logger.Debug("collect-gentxs", zap.String("stdout", stdout), zap.String("stderr", stderr), zap.Int("exitCode", exitCode)) - return err + if err != nil { + return fmt.Errorf("failed to collect genesis transactions: %w", err) + } + + if exitCode != 0 { + return fmt.Errorf("failed to collect genesis transactions (exitcode=%d): %s", exitCode, stderr) + } + + return nil } // OverwriteGenesisFile overwrites the genesis file on the node with the provided genesis file diff --git a/cosmos/node/init.go b/cosmos/node/init.go index 33695ae..90ff9f6 100644 --- a/cosmos/node/init.go +++ b/cosmos/node/init.go @@ -2,6 +2,7 @@ package node import ( "context" + "fmt" "go.uber.org/zap" ) @@ -14,5 +15,13 @@ func (n *Node) InitHome(ctx context.Context) error { stdout, stderr, exitCode, err := n.Task.RunCommand(ctx, n.BinCommand([]string{"init", n.Definition.Name, "--chain-id", chainConfig.ChainId}...)) n.logger.Debug("init home", zap.String("stdout", stdout), zap.String("stderr", stderr), zap.Int("exitCode", exitCode)) - return err + if err != nil { + return fmt.Errorf("failed to init home: %w", err) + } + + if exitCode != 0 { + return fmt.Errorf("failed to init home (exit code %d): %s", exitCode, stderr) + } + + return nil } diff --git a/cosmos/node/keys.go b/cosmos/node/keys.go index 3a9a61d..58c504a 100644 --- a/cosmos/node/keys.go +++ b/cosmos/node/keys.go @@ -58,12 +58,16 @@ func (n *Node) KeyBech32(ctx context.Context, name, bech string) (string, error) command = append(command, "--bech", bech) } - stdout, stderr, _, err := n.Task.RunCommand(ctx, command) + stdout, stderr, exitCode, err := n.Task.RunCommand(ctx, command) n.logger.Debug("show key", zap.String("name", name), zap.String("stdout", stdout), zap.String("stderr", stderr)) if err != nil { return "", fmt.Errorf("failed to show key %q (stderr=%q): %w", name, stderr, err) } + if exitCode != 0 { + return "", fmt.Errorf("failed to show key %q (exitcode=%d): %s", name, exitCode, stderr) + } + return util.CleanDockerOutput(stdout), nil }