Skip to content

Commit

Permalink
[backport] fix: [NPM] [Linux] race when deleting/readding NetPol (#2980)
Browse files Browse the repository at this point in the history
[backport] fix: [NPM] [Linux] race when deleting/readding NetPol (#2978)

Signed-off-by: Hunter Gregory <[email protected]>
  • Loading branch information
huntergregory authored Aug 31, 2024
1 parent e4436da commit a68d21a
Show file tree
Hide file tree
Showing 6 changed files with 92 additions and 5 deletions.
34 changes: 29 additions & 5 deletions npm/pkg/dataplane/dataplane.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,12 @@ import (
const (
reconcileDuration = time.Duration(5 * time.Minute)

contextBackground = "BACKGROUND"
contextApplyDP = "APPLY-DP"
contextAddNetPol = "ADD-NETPOL"
contextAddNetPolBootup = "BOOTUP-ADD-NETPOL"
contextDelNetPol = "DEL-NETPOL"
contextBackground = "BACKGROUND"
contextApplyDP = "APPLY-DP"
contextAddNetPol = "ADD-NETPOL"
contextAddNetPolBootup = "BOOTUP-ADD-NETPOL"
contextAddNetPolCIDRPrecaution = "ADD-NETPOL-CIDR-PRECAUTION"
contextDelNetPol = "DEL-NETPOL"
)

var (
Expand Down Expand Up @@ -471,6 +472,29 @@ func (dp *DataPlane) addPolicies(netPols []*policies.NPMNetworkPolicy) error {
}
}

if !util.IsWindowsDP() {
for _, netPol := range netPols {
if !(netPol.HasCIDRRules() && dp.ipsetMgr.PreviousApplyFailed()) {
continue
}

if inBootupPhase {
// this should never happen because bootup phase is for windows, but just in case, we don't want to applyDataplaneNow() or else there will be a deadlock on dp.applyInfo
msg := fmt.Sprintf("[DataPlane] [%s] at risk of improperly applying a CIDR policy which is removed then readded", contextAddNetPolCIDRPrecaution)
klog.Warning(msg)
metrics.SendErrorLogAndMetric(util.DaemonDataplaneID, msg)
break
}

// prevent #2977
if err := dp.applyDataPlaneNow(contextAddNetPolCIDRPrecaution); err != nil {
return err // nolint:wrapcheck // unnecessary to wrap error since the provided context is included in the error
}

break
}
}

// 1. Add IPSets and apply for each NetPol.
// Apply IPSets after each NetworkPolicy unless ApplyInBackground=true and we're in the bootup phase (only happens for Windows currently)
for _, netPol := range netPols {
Expand Down
27 changes: 27 additions & 0 deletions npm/pkg/dataplane/dataplane_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,33 @@ func TestRemovePolicy(t *testing.T) {
require.NoError(t, err)
}

func TestHandle2977(t *testing.T) {
if util.IsWindowsDP() {
return
}

metrics.InitializeAll()

calls := append(getBootupTestCalls(), getAddPolicyTestCallsForDP(&testPolicyobj)...)
calls = append(calls, policies.GetRemovePolicyTestCalls(&testPolicyobj)...)
calls = append(calls, ipsets.GetApplyIPSetsFailureTestCalls()...)
calls = append(calls, ipsets.GetApplyIPSetsTestCalls(nil, getAffectedIPSets(&testPolicyobj))...)
calls = append(calls, getAddPolicyTestCallsForDP(&testPolicyobj)...)
ioshim := common.NewMockIOShim(calls)
defer ioshim.VerifyCalls(t, calls)
dp, err := NewDataPlane("testnode", ioshim, dpCfg, nil)
require.NoError(t, err)

err = dp.AddPolicy(&testPolicyobj)
require.NoError(t, err)

err = dp.RemovePolicy(testPolicyobj.PolicyKey)
require.Error(t, err)

err = dp.AddPolicy(&testPolicyobj)
require.NoError(t, err)
}

func TestUpdatePolicy(t *testing.T) {
metrics.InitializeAll()

Expand Down
7 changes: 7 additions & 0 deletions npm/pkg/dataplane/ipsets/ipsetmanager.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,13 @@ func NewIPSetManager(iMgrCfg *IPSetManagerCfg, ioShim *common.IOShim) *IPSetMana
}
}

// PreviousApplyFailed is only relevant for Linux right now since Windows doesn't track failures
func (iMgr *IPSetManager) PreviousApplyFailed() bool {
iMgr.Lock()
defer iMgr.Unlock()
return iMgr.consecutiveApplyFailures > 0
}

/*
Reconcile removes empty/unreferenced sets from the cache.
For ApplyAllIPSets mode, those sets are added to the toDeleteCache.
Expand Down
16 changes: 16 additions & 0 deletions npm/pkg/dataplane/ipsets/testutils_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@ var (
Stdout: "success",
ExitCode: 0,
}

fakeRestoreFailureCommand = testutils.TestCmd{
Cmd: ipsetRestoreStringSlice,
Stdout: "failure",
ExitCode: 1,
}
)

func GetApplyIPSetsTestCalls(toAddOrUpdateIPSets, toDeleteIPSets []*IPSetMetadata) []testutils.TestCmd {
Expand All @@ -20,6 +26,16 @@ func GetApplyIPSetsTestCalls(toAddOrUpdateIPSets, toDeleteIPSets []*IPSetMetadat
return []testutils.TestCmd{fakeRestoreSuccessCommand}
}

func GetApplyIPSetsFailureTestCalls() []testutils.TestCmd {
return []testutils.TestCmd{
fakeRestoreFailureCommand,
fakeRestoreFailureCommand,
fakeRestoreFailureCommand,
fakeRestoreFailureCommand,
fakeRestoreFailureCommand,
}
}

func GetResetTestCalls() []testutils.TestCmd {
return []testutils.TestCmd{
{Cmd: []string{"ipset", "list", "--name"}, PipedToCommand: true},
Expand Down
4 changes: 4 additions & 0 deletions npm/pkg/dataplane/ipsets/testutils_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ func GetApplyIPSetsTestCalls(_, _ []*IPSetMetadata) []testutils.TestCmd {
return []testutils.TestCmd{}
}

func GetApplyIPSetsFailureTestCalls() []testutils.TestCmd {
return []testutils.TestCmd{}
}

func GetResetTestCalls() []testutils.TestCmd {
return []testutils.TestCmd{}
}
9 changes: 9 additions & 0 deletions npm/pkg/dataplane/policies/policy.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,15 @@ func NewNPMNetworkPolicy(netPolName, netPolNamespace string) *NPMNetworkPolicy {
}
}

func (netPol *NPMNetworkPolicy) HasCIDRRules() bool {
for _, set := range netPol.RuleIPSets {
if set.Metadata.Type == ipsets.CIDRBlocks {
return true
}
}
return false
}

func (netPol *NPMNetworkPolicy) AllPodSelectorIPSets() []*ipsets.TranslatedIPSet {
return append(netPol.PodSelectorIPSets, netPol.ChildPodSelectorIPSets...)
}
Expand Down

0 comments on commit a68d21a

Please sign in to comment.