diff --git a/cmd/containerd-shim-runhcs-v1/task_hcs.go b/cmd/containerd-shim-runhcs-v1/task_hcs.go index d544cb0934..9b12d46101 100644 --- a/cmd/containerd-shim-runhcs-v1/task_hcs.go +++ b/cmd/containerd-shim-runhcs-v1/task_hcs.go @@ -252,8 +252,8 @@ func newHcsTask( if parent != nil { // We have a parent UVM. Listen for its exit and forcibly close this // task. This is not expected but in the event of a UVM crash we need to - // handle this case. - go ht.waitForHostExit() + // ensure the resources are cleaned up as expected. + go ht.waitForHostOrContainerExit() } go ht.waitInitExit() @@ -616,19 +616,21 @@ func (ht *hcsTask) waitInitExit() { ht.close(ctx) } -// waitForHostExit waits for the host virtual machine to exit. Once exited -// forcibly exits all additional exec's in this task. +// waitForHostOrContainerExit waits for the host virtual machine to exit. Once exited, +// it forcibly exits all additional execs in this task. Make sure to check +// for container exit as well since the container could exit before +// the UVM and leak this goroutine started for its task. // // This MUST be called via a goroutine to wait on a background thread. // // Note: For Windows process isolated containers there is no host virtual // machine so this should not be called. -func (ht *hcsTask) waitForHostExit() { - ctx, span := oc.StartSpan(context.Background(), "hcsTask::waitForHostExit") +func (ht *hcsTask) waitForHostOrContainerExit() { + ctx, span := oc.StartSpan(context.Background(), "hcsTask::waitForHostOrContainerExit") defer span.End() span.AddAttributes(trace.StringAttribute("tid", ht.id)) - err := ht.host.WaitCtx(ctx) + err := ht.host.WaitForUvmOrContainerExit(ctx, ht.c) if err != nil { log.G(ctx).WithError(err).Error("failed to wait for host virtual machine exit") } else { diff --git a/internal/uvm/wait.go b/internal/uvm/wait.go index 5b411c4206..c3ef6e87b2 100644 --- a/internal/uvm/wait.go +++ b/internal/uvm/wait.go @@ -9,9 +9,33 @@ import ( "github.com/sirupsen/logrus" + "github.com/Microsoft/hcsshim/internal/cow" "github.com/Microsoft/hcsshim/internal/logfields" ) +// WaitForUvmOrContainerExit waits for the container `c` or its UVM +// to exit. This is used to clean up hcs task and exec resources by +// the caller. +func (uvm *UtilityVM) WaitForUvmOrContainerExit(ctx context.Context, c cow.Container) (err error) { + select { + case <-c.WaitChannel(): + return c.WaitError() + case <-uvm.hcsSystem.WaitChannel(): + logrus.WithField(logfields.UVMID, uvm.id).Debug("uvm exited, waiting for output processing to complete") + var outputErr error + if uvm.outputProcessingDone != nil { + select { + case <-uvm.outputProcessingDone: + case <-ctx.Done(): + outputErr = fmt.Errorf("failed to wait on uvm output processing: %w", ctx.Err()) + } + } + return errors.Join(uvm.hcsSystem.WaitError(), outputErr) + case <-ctx.Done(): + return ctx.Err() + } +} + // Wait waits synchronously for a utility VM to terminate. func (uvm *UtilityVM) Wait() error { return uvm.WaitCtx(context.Background()) }