From 4eb4705fe0fc545138acff1baefa4ba73b3f46cd Mon Sep 17 00:00:00 2001 From: Sean McGinnis Date: Thu, 7 Nov 2024 13:33:40 -0600 Subject: [PATCH] Retry obtaining KUBECONFIG file lock When there are multiple concurrent create or delete operations, there can be different processing trying to lock the KUBECONFIG file at the same time. This leads to small windows where getting the lock files. We don't fail the operation if something like "kind delete cluster" isn't able to remove its settings from KUBECONFIG. This is ideal and what most users would expect, but it also means there may be some leftover crud left behind that they are not aware of. This adds some basic retries to obtaining the file lock. It potentially adds a very small delay (in a much longer process), but makes it more likely that the process is able to get the lock and complete the KUBECONFIG update. Signed-off-by: Sean McGinnis --- .../kubeconfig/internal/kubeconfig/lock.go | 24 ++++++++++++++----- 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/pkg/cluster/internal/kubeconfig/internal/kubeconfig/lock.go b/pkg/cluster/internal/kubeconfig/internal/kubeconfig/lock.go index 41d1be27fc..bcb4d02c70 100644 --- a/pkg/cluster/internal/kubeconfig/internal/kubeconfig/lock.go +++ b/pkg/cluster/internal/kubeconfig/internal/kubeconfig/lock.go @@ -19,9 +19,12 @@ package kubeconfig import ( "os" "path/filepath" + "time" ) -// these are from +const lockFileRetryAttemps = 5 + +// these are based on // https://github.com/kubernetes/client-go/blob/611184f7c43ae2d520727f01d49620c7ed33412d/tools/clientcmd/loader.go#L439-L440 func lockFile(filename string) error { @@ -32,12 +35,21 @@ func lockFile(filename string) error { return err } } - f, err := os.OpenFile(lockName(filename), os.O_CREATE|os.O_EXCL, 0) - if err != nil { - return err + + // Retry obtaining the file lock a few times to accommodate concurrent operations. + var lastErr error + for i := 0; i < lockFileRetryAttemps; i++ { + f, err := os.OpenFile(lockName(filename), os.O_CREATE|os.O_EXCL, 0) + if err == nil { + f.Close() + return nil + } + + lastErr = err + time.Sleep(100 * time.Millisecond) } - f.Close() - return nil + + return lastErr } func unlockFile(filename string) error {