Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

raft: re-enable config change safety #124804

Merged
merged 2 commits into from
May 29, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion pkg/raft/raft.go
Original file line number Diff line number Diff line change
Expand Up @@ -1279,7 +1279,13 @@ func stepLeader(r *raft, m pb.Message) error {
cc = ccc
}
if cc != nil {
// Per the "Apply" invariant in the config change safety argument[^1],
// the leader must not append a config change if it hasn't applied all
// config changes in its log.
//
// [^1]: https://github.com/etcd-io/etcd/issues/7625#issuecomment-489232411
alreadyPending := r.pendingConfIndex > r.raftLog.applied

alreadyJoint := len(r.trk.Config.Voters[1]) > 0
wantsLeaveJoint := len(cc.AsV2().Changes) == 0

Expand All @@ -1292,7 +1298,11 @@ func stepLeader(r *raft, m pb.Message) error {
failedCheck = "not in joint state; refusing empty conf change"
}

if failedCheck != "" && !r.disableConfChangeValidation {
// Allow disabling config change constraints that are guaranteed by the
// upper state machine layer (incorrect ones will apply as no-ops).
//
// NB: !alreadyPending requirement is always respected, for safety.
if alreadyPending || (failedCheck != "" && !r.disableConfChangeValidation) {
r.logger.Infof("%x ignoring conf change %v at config %s: %s", r.id, cc, r.trk.Config, failedCheck)
m.Entries[i] = pb.Entry{Type: pb.EntryNormal}
} else {
Expand Down
66 changes: 34 additions & 32 deletions pkg/raft/testdata/confchange_disable_validation.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,45 +32,47 @@ l2 l3
----
ok

# Entries both get appended.
process-ready 1
----
Ready MustSync=true:
Entries:
1/4 EntryNormal "foo"
1/5 EntryConfChangeV2 l2 l3

# Dummy entry comes up for application.
process-ready 1
----
Ready MustSync=false:
HardState Term:1 Vote:1 Commit:5
CommittedEntries:
1/4 EntryNormal "foo"

# Propose new config change. Note how it isn't rejected,
# which is due to DisableConfChangeValidation=true.
propose-conf-change 1
----
ok

# Turn on autopilot: the first config change applies, the
# second one gets committed and also applies.
stabilize
# Entries both get appended and applied.
stabilize 1
----
> 1 handling Ready
Ready MustSync=true:
Entries:
1/6 EntryConfChangeV2
1/4 EntryNormal "foo"
1/5 EntryConfChangeV2 l2 l3
> 1 handling Ready
Ready MustSync=false:
HardState Term:1 Vote:1 Commit:5
CommittedEntries:
1/4 EntryNormal "foo"
> 1 handling Ready
Ready MustSync=false:
CommittedEntries:
1/5 EntryConfChangeV2 l2 l3
INFO 1 switched to configuration voters=(1)&&(1) learners=(2 3)
> 1 handling Ready
Ready MustSync=false:
HardState Term:1 Vote:1 Commit:6
CommittedEntries:
1/6 EntryConfChangeV2
Messages:
1->2 MsgApp Term:1 Log:1/5 Commit:5 Entries:[1/6 EntryConfChangeV2]
1->3 MsgApp Term:1 Log:1/5 Commit:5 Entries:[1/6 EntryConfChangeV2]
INFO 1 switched to configuration voters=(1) learners=(2 3)
1->2 MsgApp Term:1 Log:1/4 Commit:5 Entries:[1/5 EntryConfChangeV2 l2 l3]
1->3 MsgApp Term:1 Log:1/4 Commit:5 Entries:[1/5 EntryConfChangeV2 l2 l3]

# Propose new config change. Note how it isn't rejected,
# which is due to DisableConfChangeValidation=true.
propose-conf-change 1
l4
----
ok

# The new config change is appended to the log.
process-ready 1
----
Ready MustSync=true:
Entries:
1/6 EntryConfChangeV2 l4

# If we process-ready on node 1 now, the second config change will come up for
# application, and the node will panic with "config is already joint". The state
# machine must ensure not to apply it.
#
# TODO(pav-kv): support no-op command application in tests, or support asserting
# that the node panics.
Loading