From 457560bb17db04b9ea99ea7f5c4a52853cf62df8 Mon Sep 17 00:00:00 2001 From: Yossi Boaron Date: Wed, 14 Aug 2024 16:03:51 +0300 Subject: [PATCH] Add retry mechanism to CNI interface discovery on kube-proxy handler We noticed that even though RA pod starts running only after the node is ready, sometimes the kube-proxy handler fails to discover the CNI interface a fter the node is rebooted. This PR adds a retry to CNI discovery. Fixes: https://github.com/submariner-io/submariner/issues/3120 Signed-off-by: Yossi Boaron --- .../handlers/kubeproxy/kp_packetfilter.go | 23 ++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/pkg/routeagent_driver/handlers/kubeproxy/kp_packetfilter.go b/pkg/routeagent_driver/handlers/kubeproxy/kp_packetfilter.go index 9a529021c..d79033a57 100644 --- a/pkg/routeagent_driver/handlers/kubeproxy/kp_packetfilter.go +++ b/pkg/routeagent_driver/handlers/kubeproxy/kp_packetfilter.go @@ -21,6 +21,7 @@ package kubeproxy import ( "net" "os" + "time" "github.com/pkg/errors" "github.com/submariner-io/admiral/pkg/log" @@ -31,6 +32,8 @@ import ( "github.com/submariner-io/submariner/pkg/packetfilter" "github.com/submariner-io/submariner/pkg/vxlan" utilruntime "k8s.io/apimachinery/pkg/util/runtime" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/util/retry" "k8s.io/utils/set" logf "sigs.k8s.io/controller-runtime/pkg/log" ) @@ -91,8 +94,16 @@ func (kp *SyncHandler) GetNetworkPlugins() []string { return networkPlugins } +var discoverCNIRetryConfig = wait.Backoff{ + Cap: 1 * time.Minute, + Duration: 4 * time.Second, + Factor: 1.2, + Steps: 12, +} + func (kp *SyncHandler) Init() error { var err error + var cniIface *cni.Interface kp.hostname, err = os.Hostname() if err != nil { @@ -104,7 +115,17 @@ func (kp *SyncHandler) Init() error { return errors.Wrapf(err, "Unable to find the default interface on host: %s", kp.hostname) } - cniIface, err := cni.Discover(kp.localClusterCidr) + err = retry.OnError(discoverCNIRetryConfig, func(err error) bool { + logger.Infof("Waiting for CNI interface discovery: %s", err) + return true + }, func() error { + cniIface, err = cni.Discover(kp.localClusterCidr) + if err != nil { + return errors.Wrapf(err, "Error discovering the CNI interface") + } + + return nil + }) if err == nil { // Configure CNI Specific changes kp.cniIface = cniIface