Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: distributed via raft #9

Closed
wants to merge 14 commits into from
1,640 changes: 1,620 additions & 20 deletions Cargo.lock

Large diffs are not rendered by default.

14 changes: 14 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,17 @@ dashmap = { version = "5.5.3", features = ["serde"] }
crossbeam-utils = "0.8"
rayon = "1.9.0"
tokio = { version = "1.36.0", features = ["full"] }
bytes = "1"
reqwest = { version = "0.12.2", features = ["json"] }
tracing-slog = "0.2.0"
slog = "2.7.0"
protobuf = { version = "2"}

tonic = "0.4"
prost = "0.7"
openraft = { git = "https://github.com/datafuselabs/openraft.git", features = ["type-alias", "serde"]}
serde_json = "1.0.115"
actix-web = "4.0.0"

[build-dependencies]
tonic-build = "0.4.0"
193 changes: 193 additions & 0 deletions proto/eraftpb.proto
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
syntax = "proto3";
package eraftpb;

enum EntryType {
EntryNormal = 0;
EntryConfChange = 1;
EntryConfChangeV2 = 2;
}

// The entry is a type of change that needs to be applied. It contains two data fields.
// While the fields are built into the model; their usage is determined by the entry_type.
//
// For normal entries, the data field should contain the data change that should be applied.
// The context field can be used for any contextual data that might be relevant to the
// application of the data.
//
// For configuration changes, the data will contain the ConfChange message and the
// context will provide anything needed to assist the configuration change. The context
// if for the user to set and use in this case.
message Entry {
EntryType entry_type = 1;
uint64 term = 2;
uint64 index = 3;
bytes data = 4;
bytes context = 6;

// Deprecated! It is kept for backward compatibility.
// TODO: remove it in the next major release.
bool sync_log = 5;
}

message SnapshotMetadata {
// The current `ConfState`.
ConfState conf_state = 1;
// The applied index.
uint64 index = 2;
// The term of the applied index.
uint64 term = 3;
}

message Snapshot {
bytes data = 1;
SnapshotMetadata metadata = 2;
}

enum MessageType {
MsgHup = 0;
MsgBeat = 1;
MsgPropose = 2;
MsgAppend = 3;
MsgAppendResponse = 4;
MsgRequestVote = 5;
MsgRequestVoteResponse = 6;
MsgSnapshot = 7;
MsgHeartbeat = 8;
MsgHeartbeatResponse = 9;
MsgUnreachable = 10;
MsgSnapStatus = 11;
MsgCheckQuorum = 12;
MsgTransferLeader = 13;
MsgTimeoutNow = 14;
MsgReadIndex = 15;
MsgReadIndexResp = 16;
MsgRequestPreVote = 17;
MsgRequestPreVoteResponse = 18;
}

message Message {
MessageType msg_type = 1;
uint64 to = 2;
uint64 from = 3;
uint64 term = 4;
// logTerm is generally used for appending Raft logs to followers. For example,
// (type=MsgAppend,index=100,log_term=5) means leader appends entries starting at
// index=101, and the term of entry at index 100 is 5.
// (type=MsgAppendResponse,reject=true,index=100,log_term=5) means follower rejects some
// entries from its leader as it already has an entry with term 5 at index 100.
uint64 log_term = 5;
uint64 index = 6;
repeated Entry entries = 7;
uint64 commit = 8;
uint64 commit_term = 15;
Snapshot snapshot = 9;
uint64 request_snapshot = 13;
bool reject = 10;
uint64 reject_hint = 11;
bytes context = 12;
uint64 deprecated_priority = 14;
// If this new field is not set, then use the above old field; otherwise
// use the new field. When broadcasting request vote, both fields are
// set if the priority is larger than 0. This change is not a fully
// compatible change, but it makes minimal impact that only new priority
// is not recognized by the old nodes during rolling update.
int64 priority = 16;
}

message HardState {
uint64 term = 1;
uint64 vote = 2;
uint64 commit = 3;
}

enum ConfChangeTransition {
// Automatically use the simple protocol if possible, otherwise fall back
// to ConfChangeType::Implicit. Most applications will want to use this.
Auto = 0;
// Use joint consensus unconditionally, and transition out of them
// automatically (by proposing a zero configuration change).
//
// This option is suitable for applications that want to minimize the time
// spent in the joint configuration and do not store the joint configuration
// in the state machine (outside of InitialState).
Implicit = 1;
// Use joint consensus and remain in the joint configuration until the
// application proposes a no-op configuration change. This is suitable for
// applications that want to explicitly control the transitions, for example
// to use a custom payload (via the Context field).
Explicit = 2;
}

message ConfState {
repeated uint64 voters = 1;
repeated uint64 learners = 2;

// The voters in the outgoing config. If not empty the node is in joint consensus.
repeated uint64 voters_outgoing = 3;
// The nodes that will become learners when the outgoing config is removed.
// These nodes are necessarily currently in nodes_joint (or they would have
// been added to the incoming config right away).
repeated uint64 learners_next = 4;
// If set, the config is joint and Raft will automatically transition into
// the final config (i.e. remove the outgoing config) when this is safe.
bool auto_leave = 5;
}

enum ConfChangeType {
AddNode = 0;
RemoveNode = 1;
AddLearnerNode = 2;
}

message ConfChange {
ConfChangeType change_type = 2;
uint64 node_id = 3;
bytes context = 4;

uint64 id = 1;
}

// ConfChangeSingle is an individual configuration change operation. Multiple
// such operations can be carried out atomically via a ConfChangeV2.
message ConfChangeSingle {
ConfChangeType change_type = 1;
uint64 node_id = 2;
}

// ConfChangeV2 messages initiate configuration changes. They support both the
// simple "one at a time" membership change protocol and full Joint Consensus
// allowing for arbitrary changes in membership.
//
// The supplied context is treated as an opaque payload and can be used to
// attach an action on the state machine to the application of the config change
// proposal. Note that contrary to Joint Consensus as outlined in the Raft
// paper[1], configuration changes become active when they are *applied* to the
// state machine (not when they are appended to the log).
//
// The simple protocol can be used whenever only a single change is made.
//
// Non-simple changes require the use of Joint Consensus, for which two
// configuration changes are run. The first configuration change specifies the
// desired changes and transitions the Raft group into the joint configuration,
// in which quorum requires a majority of both the pre-changes and post-changes
// configuration. Joint Consensus avoids entering fragile intermediate
// configurations that could compromise survivability. For example, without the
// use of Joint Consensus and running across three availability zones with a
// replication factor of three, it is not possible to replace a voter without
// entering an intermediate configuration that does not survive the outage of
// one availability zone.
//
// The provided ConfChangeTransition specifies how (and whether) Joint Consensus
// is used, and assigns the task of leaving the joint configuration either to
// Raft or the application. Leaving the joint configuration is accomplished by
// proposing a ConfChangeV2 with only and optionally the Context field
// populated.
//
// For details on Raft membership changes, see:
//
// [1]: https://github.com/ongardie/dissertation/blob/master/online-trim.pdf
message ConfChangeV2 {
ConfChangeTransition transition = 1;
repeated ConfChangeSingle changes = 2;
bytes context = 3;
}
14 changes: 14 additions & 0 deletions proto/raft.proto
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
syntax = "proto3";
package sqrlraft;

import "eraftpb.proto";

message MessageAck {
bool success = 1;
}

// Use our copied raft protos and create a service wrapper
service Raft {
rpc Send(eraftpb.Message) returns (MessageAck);
}

Loading
Loading