diff --git a/Cargo.toml b/Cargo.toml index e198ff7..e799aee 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,7 +19,7 @@ rustdoc-args = ["--cfg", "docsrs"] ahash = "0.8.11" bincode = "1.3.3" chashmap = { version = "2.2.2", optional = true } -clap = { version = "=4.5.13", features = ["derive"] } +clap = { version = "=4.5.14", features = ["derive"] } contrie = { version = "0.1.4", optional = true } core_affinity = "0.8.1" ctrlc = "3.4.4" @@ -37,13 +37,13 @@ parking_lot = "0.12.3" quanta = "0.12.3" rand = "0.8.5" rocksdb = { version = "0.22.0", optional = true } -scc = { version = "2.1.6", optional = true } -serde = { version = "1.0.204", features = ["derive"] } +scc = { version = "2.1.9", optional = true } +serde = { version = "1.0.205", features = ["derive"] } toml = "0.8.19" zipf = "7.0.1" [dev-dependencies] -tempfile = "3.10.1" +tempfile = "3.12.0" [features] chashmap = ["dep:chashmap"] diff --git a/README.md b/README.md index fd7c267..7d245c1 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ The [documentation](https://docs.rs/kvbench) provides detailed usage guidelines. ## Development -The missing pieces that are currently under active development: +This project is being actively developed. The following tasks are currently being worked on: - Read-modify-write (RMW) support. - More built-in stores and benchmark parameters. diff --git a/src/bench.rs b/src/bench.rs index 984d9e2..3dae38f 100644 --- a/src/bench.rs +++ b/src/bench.rs @@ -1,133 +1,4 @@ //! The core benchmark functionality. -//! -//! A benchmark in this crate actually refers to a group of benchmark runs, named **phases**. Users -//! can provide one or multiple phases that will be run sequentially, each with different -//! configurations. -//! -//! ## Configuration Format -//! -//! A benchmark configuration file is formatted in TOML. It consists of the definition of multiple -//! phases, each is defined in a dictionary named `benchmark`. Phases are organized in an array, so -//! the configuration of each phase starts with `[[benchmark]]`. It also supports a `[global]` -//! section in the configuration file that will override the missing field in each phase. This can -//! reduce the number of repeated options in each phase (e.g., shared options). -//! -//! A configuration file generally looks like the following: -//! -//! ```toml -//! [global] -//! # global options -//! -//! [[benchmark]] -//! # phase 1 configuration -//! -//! [[benchmark]] -//! # phase 2 configuration -//! -//! ... -//! ``` -//! -//! Available options and their usage can be found in [`BenchmarkOpt`] and [`GlobalOpt`], for phase -//! and global options, respectively. -//! -//! Options in `[global]` section can be overwritten via environment variables without changing the -//! content in the TOML file. -//! For example, if the user needs to override `x` in `[global]`, setting the environment variable -//! `global.x` will get the job done. -//! -//! ## Output Format -//! -//! Currently, all outputs are in plain text format. This makes the output easy to process using -//! shell scripts and tools including gnuplot. If there are new data added to the output, it -//! will be appended at the end of existing entries (but before `cdf` if it exists, see below) -//! to make sure outputs from old versions can still be processed without changes. -//! -//! ### Throughput-only Output (default case) -//! -//! When measuring throughput, an output may look like the following: -//! ```txt -//! phase 0 repeat 0 duration 1.00 elapsed 1.00 total 1000000 mops 1.00 -//! phase 0 repeat 1 duration 1.00 elapsed 2.00 total 1000000 mops 1.00 -//! phase 0 repeat 2 duration 1.00 elapsed 3.00 total 1000000 mops 1.00 -//! phase 0 finish . duration 1.00 elapsed 3.00 total 3000000 mops 1.00 -//! ``` -//! -//! The general format is: -//! -//! ```txt -//! phase

repeat duration elapsed total mops -//! ``` -//! -//! Where: -//! -//! - `

`: phase id. -//! - ``: repeat id in a phase, or string `finish .`, if the line is the aggregated report -//! of a whole phase. -//! - ``: the duration of the repeat/phase, in seconds. -//! - ``: the total elapsed seconds since the starting of the program. -//! - ``: the total key-value operations executed by all worker threads in the repeat/phase. -//! - ``: followed by the throughput in million operations per second of the repeat/phase. -//! -//! ### Throughput + Latency Output (when `latency` is `true`) -//! -//! When latency measurement is enabled, the latency metrics shall be printed at the end of each -//! benchmark. It is not shown after each repeat, because unlike throughput which is a singleton -//! value at a given time, latency is a set of values and it usually matters only when we aggregate -//! a lot of them. The output format in this case is generally the same as throughput-only -//! measurements, but the `finish` line has extra output like the following: -//! -//! ```txt -//! phase 0 repeat 0 duration 1.00 elapsed 1.00 total 1000000 mops 1.00 -//! phase 0 repeat 1 duration 1.00 elapsed 2.00 total 1000000 mops 1.00 -//! phase 0 repeat 2 duration 1.00 elapsed 3.00 total 1000000 mops 1.00 -//! phase 0 finish . duration 1.00 elapsed 3.00 total 3000000 mops 1.00 min_us 0.05 max_us 100.00 avg_us 50.00 p50_us 50.00 p95_us 95.00 p99_us 99.00 p999_us 100.00 -//! ``` -//! -//! The extra output on the last line has a format of: -//! -//! ```txt -//! min_us max_us avg_us p50_us p95_us p99_us

p999_us -//! ``` -//! -//! Where (all units are microseconds): -//! -//! - ``: minimum latency -//! - ``: maximum latency -//! - ``: mean latency -//! - ``: median latency (50% percentile) -//! - ``: P95 latency -//! - `

`: P99 latency -//! - ``: P999 latency (99.9%) -//! -//! ### Throughput + Latency + Latency CDF Mode (when both `latency` and `cdf` are `true`) -//! -//! When `cdf` is enabled, the latency CDF data will be printed at the end of the same line as the -//! latency metrics above. In that case, the output will be like the following: -//! -//! ```txt -//! phase 0 repeat 0 duration 1.00 elapsed 1.00 total 1000000 mops 1.00 -//! phase 0 repeat 1 duration 1.00 elapsed 2.00 total 1000000 mops 1.00 -//! phase 0 repeat 2 duration 1.00 elapsed 3.00 total 1000000 mops 1.00 -//! phase 0 finish . duration 1.00 elapsed 3.00 total 3000000 mops 1.00 min_us 0.05 max_us 100.00 avg_us 50.00 p50_us 50.00 p95_us 95.00 p99_us 99.00 p999_us 100.00 cdf_us percentile ... -//! ``` -//! Since the latency metrics vary a lot between different benchmarks/runs, the number of data -//! points of the CDF is different. Therefore, it is printed at the end of the output only. It is -//! printed as a tuple of ` ` where `` is the latency in microseconds and -//! `` is the percentile of the accumulated operations with latency higher than between -//! ` - 1` and ``, inclusively, ranging from 0 to 100 (two digit precision). -//! There can be arbitrary number of tuples. The output ends when the maximum recorded latency is -//! reached. -//! -//! An example of the CDF data will look like: -//! -//! ```txt -//! cdf_us percentile 1 0.00 2 0.00 3 0.00 4 10.00 5 20.00 6 20.00 ... -//! ``` -//! -//! It means there are not data points at 1/2/3 microseconds. At 4 microseconds, there are 10% data -//! points. At 5 microseconds, there are another 10% data points which makes the total percentile -//! 20.00. At 6 microseconds, there are no data points so the percentile is still 20.00. Users can -//! post-process the output and make a smooth CDF plot out of it. use crate::stores::{BenchKVMap, BenchKVMapOpt}; use crate::workload::{Workload, WorkloadOpt}; @@ -174,7 +45,12 @@ enum ReportMode { /// The configuration of a single benchmark deserialized from a TOML string. /// /// The fields are optional to ease parsing from TOML, as there can be global parameters that are -/// set for them. +/// set for them. The default value will be applied if an option is not specified by both the file +/// and the global option. +/// +/// **Note**: If an option not explicitly marked optional and it is not specified by both the file +/// and the global option, its default value will be applied. If it has no default value, an error +/// will be raised. The precedence of a value is: file > global (after env overridden) > default. #[derive(Deserialize, Clone, Debug)] pub struct BenchmarkOpt { /// Number of threads that runs this benchmark. @@ -182,10 +58,11 @@ pub struct BenchmarkOpt { /// Default: 1. pub threads: Option, - /// How many times this benchmark will be repeated. This option is useful when user would like - /// to plot the performance trend over time in the same benchmark. For example, setting this - /// option to 100 with one second timeout for each repeat can provide 100 data points over a - /// 100 second period. + /// How many times this benchmark will be repeated. + /// + /// This option is useful when user would like to plot the performance trend over time in the + /// same benchmark. For example, setting this option to 100 with one second timeout for each + /// repeat can provide 100 data points over a 100 second period. /// /// Default: 1. pub repeat: Option, @@ -194,23 +71,29 @@ pub struct BenchmarkOpt { /// option will be ignored. /// /// Note: see `ops`. + /// + /// *This value is optional.* pub timeout: Option, /// How many operations each worker will execute. Only used if `timeout` is not given. /// /// Note: if both `timeout` and `ops` are not given, the run is only stopped when all possible /// keys are generated. + /// + /// *This value is optional.* pub ops: Option, - /// Report mode: + /// Report mode. /// /// - "hidden": not reported. /// - "repeat": after each repeat, the metrics for that repeat is printed. /// - "finish": after all repeats are finished, the metrics of the whole phase is printed. /// - "all": equals to "repeat" + "finish". + /// + /// Default: "all". pub report: Option, - /// Max depth of queue for each worker. Only useful with [`AsyncKVMap`]. + /// Max depth of queue for each worker (only used with async stores). /// /// When the pending requests are less than `qd`, the worker will not attempt to get more /// responses. @@ -218,7 +101,7 @@ pub struct BenchmarkOpt { /// Default: 1. pub qd: Option, - /// Batch size for each request. Only useful with [`AsyncKVMap`]. + /// Batch size for each request (only used with async stores). /// /// Default: 1. pub batch: Option, @@ -368,8 +251,8 @@ impl Benchmark { /// The global options that go to the `[global]` section. /// -/// They will override missing fields in each `[[benchmark]]` section, if the corresponding option -/// is missing. For the usage of each option, please refer to [`BenchmarkOpt`]. +/// They will override the unspecified fields in each `[[benchmark]]` section with the same name. +/// For the usage of each option, please refer to [`BenchmarkOpt`]. #[derive(Deserialize, Clone, Debug)] pub struct GlobalOpt { // benchmark diff --git a/src/cmdline.rs b/src/cmdline.rs index 3cf7f36..c59b862 100644 --- a/src/cmdline.rs +++ b/src/cmdline.rs @@ -120,8 +120,7 @@ fn list_cli() { /// ``` /// /// Where `STORE_CONFIG` and `BENCH_CONFIG` are the paths to the key-value store and benchmark -/// configuration files, respectively. For their format, you can refer to the documentations of -/// [`crate::stores`] and [`crate::bench`]. +/// configuration files, respectively. /// /// ### Server mode /// @@ -131,8 +130,7 @@ fn list_cli() { /// kvbench server -s -a -p -n /// ``` /// -/// Where `STORE_CONFIG` is the path of the key-value store configuration file. Its format is -/// documented in [`crate::stores`]. +/// Where `STORE_CONFIG` is the path of the key-value store configuration file. /// /// The default `HOST` and `PORT` are `0.0.0.0` and `9000`. By default, the server will spawn one /// worker thread only for incoming connections. You can adjust the number of worker threads by diff --git a/src/lib.rs b/src/lib.rs index 8f2587a..f17ea00 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3,30 +3,188 @@ //! A benchmark framework designed for testing key-value stores with easily customizable //! workloads. //! -//! With `kvbench`, you can define the details of a benchmark using the TOML format, such as the -//! proportions of mixed operations, the key access pattern, and key space size, just to name a -//! few. In addition to regular single-process benchmarks, `kvbench` also integrates a key-value -//! client/server implementation that works with a dedicated server thread/machine. +//! Key features: //! -//! You can also incorporate `kvbench` into your own key-value store implementations and run it -//! against the built-in stores. All you need is implementing the [`KVMap`] or the [`AsyncKVMap`] -//! trait, depending on the type of the store. After registering your store, simply reuse the -//! exported [`cmdline()`] in your `main` function and it will work seamlessly with your own store. +//! 1. Flexible and ergonomic control over benchmark specifications using TOML configuration files. +//! 2. Collecting diverse metrics, including throughput, latency (w/ CDF), and rate-limited latency. +//! 3. One-shot execution of multiple benchmark steps with different properties. +//! 4. Various built-in key-value stores in place as well as a client/server implementation. +//! 5. Highly extensible and can be seamlessly integrated into your own store. //! -//! A few key design choices include: +//! # Benchmark Configuration //! -//! - Each key-value store exclusively stores a single type of key/value pair: variable-sized byte -//! arrays represented as [`u8`] slices on the heap. No generics over the key's type. -//! - The key-value store and the benchmark configurations are black boxes. They are created -//! dynamically from a TOML file, and dynamically dispatched. -//! - Benchmark functionalities can be reused in users' own crates: new key-value stores can be -//! dynamically registered without touching the source code of this crate. +//! A benchmark in kvbench consists of one or more benchmark runs, termed as *phases*. +//! Phases will be run sequentially following their order in the configuration file. //! -//! More detailed usage could be found in the module-level rustdocs: +//! A benchmark configuration file is formatted in TOML. It consists of the definition of each +//! phase in an array named `benchmark`, so the configuration of each phase starts with +//! `[[benchmark]]`. The file also optionally contains a `[global]` section which will override the +//! unspecified field in each phase. This can eliminate redundant options in each phase, for +//! example, when those options are the same across the board. //! -//! - [`mod@bench`] for the config format of a benchmark. -//! - [`mod@stores`] for the config format of a built-in key-value store. -//! - [`cmdline()`] for the usage of the default command line interface. +//! A configuration file generally looks like the following: +//! +//! ```toml +//! [global] +//! # global options +//! +//! [[benchmark]] +//! # phase 1 configuration +//! +//! [[benchmark]] +//! # phase 2 configuration +//! +//! ... +//! ``` +//! Options in `[global]` section can also be overwritten via environment variables without +//! modifying the TOML file. For example, if the user needs to override `x` in `[global]`, one can +//! set the environment variable `global.x` (case insensitive). This is helpful when the user would +//! like to run different benchmarks when changing only a few options using a shell script. +//! +//! **Reference** +//! +//! - [`BenchmarkOpt`]: the available options for benchmark phase configuration. +//! - [`GlobalOpt`]: the available options for global configuration. +//! +//! # Key-Value Store Configuration +//! +//! In addition to the specification of the benchmark itself, kvbench also requires the +//! parameters of the key-value store it runs against. Only one key-value store runs at a time. +//! +//! The configuration of a key-value store is stored in a dictionary `map`. +//! A store's configuration file looks like the following: +//! +//! ```toml +//! [map] +//! name = "..." +//! # option1 = ... +//! # option2 = ... +//! +//! ... +//! ``` +//! The field `name` must be given and it should be equal to the name registered by the store. +//! Other than `name`, all the fields are parsed as a string map and will be passed to the +//! store's constructor function. The options in `[map]` section can also be overwritten via +//! environment variables (e.g., setting `map.x` overrides property `x`). +//! +//! **Reference** +//! +//! - [`mod@stores`]: the available options for built-in stores and how to register new stores. +//! +//! # Run a Benchmark +//! +//! Once the configuration files of the benchmark along with the key-value store are ready, a +//! benchmark can be started by using the `bench` mode of the built-in command-line interface. +//! +//! **Reference** +//! +//! - [`cmdline()`]: the usage of the default command-line interface. +//! +//! # Metrics Collection +//! +//! Currently, all outputs are in plain text format. This makes the output easy to process using +//! shell scripts and tools including gnuplot. If there are new data added to the output, it +//! will be appended at the end of existing entries (but before `cdf` if it exists, see below) +//! to make sure outputs from old versions can still be processed without changes. +//! +//! ## Throughput-only Output (default case) +//! +//! When measuring throughput, an output may look like the following: +//! ```txt +//! phase 0 repeat 0 duration 1.00 elapsed 1.00 total 1000000 mops 1.00 +//! phase 0 repeat 1 duration 1.00 elapsed 2.00 total 1000000 mops 1.00 +//! phase 0 repeat 2 duration 1.00 elapsed 3.00 total 1000000 mops 1.00 +//! phase 0 finish . duration 1.00 elapsed 3.00 total 3000000 mops 1.00 +//! ``` +//! +//! The general format is: +//! +//! ```txt +//! phase

repeat duration elapsed total mops +//! ``` +//! +//! Where: +//! +//! - `

`: phase id. +//! - ``: repeat id in a phase, or string `finish .`, if the line is the aggregated report +//! of a whole phase. +//! - ``: the duration of the repeat/phase, in seconds. +//! - ``: the total elapsed seconds since the starting of the program. +//! - ``: the total key-value operations executed by all worker threads in the repeat/phase. +//! - ``: followed by the throughput in million operations per second of the repeat/phase. +//! +//! ## Throughput + Latency Output (when `latency` is `true`) +//! +//! When latency measurement is enabled, the latency metrics shall be printed at the end of each +//! benchmark. It is not shown after each repeat, because unlike throughput which is a singleton +//! value at a given time, latency is a set of values and it usually matters only when we aggregate +//! a lot of them. The output format in this case is generally the same as throughput-only +//! measurements, but the `finish` line has extra output like the following: +//! +//! ```txt +//! phase 0 repeat 0 duration 1.00 elapsed 1.00 total 1000000 mops 1.00 +//! phase 0 repeat 1 duration 1.00 elapsed 2.00 total 1000000 mops 1.00 +//! phase 0 repeat 2 duration 1.00 elapsed 3.00 total 1000000 mops 1.00 +//! phase 0 finish . duration 1.00 elapsed 3.00 total 3000000 mops 1.00 min_us 0.05 max_us 100.00 avg_us 50.00 p50_us 50.00 p95_us 95.00 p99_us 99.00 p999_us 100.00 +//! ``` +//! +//! The extra output on the last line has a format of: +//! +//! ```txt +//! min_us max_us avg_us p50_us p95_us p99_us

p999_us +//! ``` +//! +//! Where (all units are microseconds): +//! +//! - ``: minimum latency +//! - ``: maximum latency +//! - ``: mean latency +//! - ``: median latency (50% percentile) +//! - ``: P95 latency +//! - `

`: P99 latency +//! - ``: P999 latency (99.9%) +//! +//! ## Throughput + Latency + Latency CDF Mode (when both `latency` and `cdf` are `true`) +//! +//! When `cdf` is enabled, the latency CDF data will be printed at the end of the same line as the +//! latency metrics above. In that case, the output will be like the following: +//! +//! ```txt +//! phase 0 repeat 0 duration 1.00 elapsed 1.00 total 1000000 mops 1.00 +//! phase 0 repeat 1 duration 1.00 elapsed 2.00 total 1000000 mops 1.00 +//! phase 0 repeat 2 duration 1.00 elapsed 3.00 total 1000000 mops 1.00 +//! phase 0 finish . duration 1.00 elapsed 3.00 total 3000000 mops 1.00 min_us 0.05 max_us 100.00 avg_us 50.00 p50_us 50.00 p95_us 95.00 p99_us 99.00 p999_us 100.00 cdf_us percentile ... +//! ``` +//! Since the latency metrics vary a lot between different benchmarks/runs, the number of data +//! points of the CDF is different. Therefore, it is printed at the end of the output only. It is +//! printed as a tuple of ` ` where `` is the latency in microseconds and +//! `` is the percentile of the accumulated operations with latency higher than between +//! ` - 1` and ``, inclusively, ranging from 0 to 100 (two digit precision). +//! There can be arbitrary number of tuples. The output ends when the maximum recorded latency is +//! reached. +//! +//! An example of the CDF data will look like: +//! +//! ```txt +//! cdf_us percentile 1 0.00 2 0.00 3 0.00 4 10.00 5 20.00 6 20.00 ... +//! ``` +//! +//! It means there are not data points at 1/2/3 microseconds. At 4 microseconds, there are 10% data +//! points. At 5 microseconds, there are another 10% data points which makes the total percentile +//! 20.00. At 6 microseconds, there are no data points so the percentile is still 20.00. Users can +//! post-process the output and make a smooth CDF plot out of it. +//! +//! # Server Mode +//! A key-value client/server implementation is available in kvbench. The server can be backed by +//! an arbitrary key-value store defined by a TOML file as in a benchmark, and the server can be +//! started using the `server` mode of the built-in command-line interface. +//! +//! To benchmark the server's performance, users can use the built-in client implementation. +//! +//! **Reference** +//! +//! - [`cmdline()`]: the usage of the default command-line interface. +//! - [`stores::remote`]: the available options of the key-value store client. use serde::{Deserialize, Serialize}; use std::cell::RefCell; @@ -83,7 +241,7 @@ pub enum Operation { Scan { key: Box<[u8]>, n: usize }, } -/// A request sent by a client to a server. +/// A request submitted by an asynchronous store. #[derive(Serialize, Deserialize, Eq, PartialEq, Clone, Debug)] pub struct Request { /// The (usually unique) identifier of the request, or custom data. @@ -93,7 +251,7 @@ pub struct Request { pub op: Operation, } -/// A response sent by a server to a client. +/// A response received by an asynchronous store. #[derive(Serialize, Deserialize, Eq, PartialEq, Clone, Debug)] pub struct Response { /// The `id` of the corresponding request. @@ -153,14 +311,16 @@ impl AsyncResponder for RefCell> { } } -pub mod bench; +mod bench; mod cmdline; -pub mod server; +mod server; pub mod stores; pub mod thread; -pub mod workload; +mod workload; +pub use bench::{BenchmarkOpt, GlobalOpt}; pub use cmdline::cmdline; +pub use workload::WorkloadOpt; pub extern crate inventory; pub extern crate toml; diff --git a/src/stores.rs b/src/stores.rs index f8b7469..e9cf6f6 100644 --- a/src/stores.rs +++ b/src/stores.rs @@ -1,27 +1,10 @@ //! Adapters for built-in and external key-value stores. //! -//! ## Configuration Format +//! ## Built-in Stores //! -//! The configuration of a key-value store is stored in a dictionary named `map`. Therefore, a -//! store's configuration file looks like the following: -//! -//! ```toml -//! [map] -//! name = "..." -//! # option1 = ... -//! # option2 = ... -//! -//! ... -//! ``` -//! The field `name` must be given and it should be equal to the name registered by the store. -//! Other than `name`, all the fields are parsed as a string map and will be hand over to the -//! constructor of the store's constructor function. For available options other than `name`, one -//! can refer to the module-level documentation of a specific store. -//! -//! Similar to the `[global]` secition of a benchmark, the options in a `[map]` section can also -//! be overwritten via environment variables. -//! For example, if the user needs to override `x` in `[map]`, setting the environment variable -//! `map.x` will get the job done. +//! The usage of built-in stores can be found in the module-level documentations. Please note that +//! it may be necessary to enable specific features of the crate to enable a certain built-in +//! store. //! //! ## Registering New Stores //! diff --git a/src/thread.rs b/src/thread.rs index 92fa562..fe480d3 100644 --- a/src/thread.rs +++ b/src/thread.rs @@ -11,22 +11,30 @@ //! it is with the [`JoinHandle`]. Because the purpose is not general spawn-join but solely for //! benchmark code, which does not use any return values. +/// A join handle returned by a spawn function. pub trait JoinHandle { + /// Join the thread, consume the boxed self. fn join(self: Box); } +/// A thread management abstraction. pub trait Thread { + /// Spawn a new thread using a boxed closure. fn spawn(&self, f: Box) -> Box; + /// Yield the current thread. fn yield_now(&self); + /// Pin the current thread to a certain CPU core. fn pin(&self, core: usize); } +/// A zero-sized wrapper for [`std::thread`] functions. #[derive(Clone)] -pub(crate) struct DefaultThread; +pub struct DefaultThread; -pub(crate) struct DefaultJoinHandle(std::thread::JoinHandle<()>); +/// A wrapper for [`std::thread::JoinHandle`]. +pub struct DefaultJoinHandle(std::thread::JoinHandle<()>); impl JoinHandle for DefaultJoinHandle { fn join(self: Box) { diff --git a/src/workload.rs b/src/workload.rs index 333b406..7f31ecf 100644 --- a/src/workload.rs +++ b/src/workload.rs @@ -148,37 +148,66 @@ impl KeyGenerator { /// A set of workload parameters that can be deserialized from a TOML string. /// -/// This struct is used for interacting with workload configuration files and also create new -/// [`Workload`] instances. Some options are wrapped in an `Option` type to ease writing -/// configuration files. If users would like to create a [`Workload`] instance directly using these -/// options, all fields must be present. +/// **Note 1**: If an option not explicitly marked optional and it is not specified by both the file +/// and the global option, its default value will be applied. If it has no default value, an error +/// will be raised. The precedence of a value is: file > global (after env overridden) > default. +/// +/// **Note 2**: the sum of all `*_perc` options must be equal to 100. #[derive(Deserialize, Clone, Debug, PartialEq)] pub struct WorkloadOpt { - /// Percentage of `SET` operations (optional, default 0). + /// Percentage of `SET` operations. + /// + /// Must be a non-negative integer if given. + /// + /// Default: 0. pub set_perc: Option, - /// Percentage of `GET` operations (optional, default 0). + /// Percentage of `GET` operations. + /// + /// Must be a non-negative integer if given. + /// + /// Default: 0. pub get_perc: Option, - /// Percentage of `DELETE` operations (optional, default 0). + /// Percentage of `DELETE` operations. + /// + /// Must be a non-negative integer if given. + /// + /// Default: 0. pub del_perc: Option, - /// Percentage of `SCAN` operations (optional, default 0). + /// Percentage of `SCAN` operations. + /// + /// Must be a non-negative integer if given. + /// + /// Default: 0. pub scan_perc: Option, - /// The number of iterations per `SCAN` (only used when `scan_perc` is non-zero, default 10). + /// The number of iterations per `SCAN`. + /// + /// Must be a positive integer if provided. + /// + /// Default: 10. pub scan_n: Option, /// Key length in bytes. + /// + /// Must be a positive integer. pub klen: Option, /// Value length in bytes. + /// + /// Must be a positive integer. pub vlen: Option, /// Minimum key. + /// + /// Must be a non-negative integer. pub kmin: Option, /// Maximum key. + /// + /// Must be greater than `kmin`. pub kmax: Option, /// Key distribution. @@ -196,12 +225,16 @@ pub struct WorkloadOpt { /// - "latest": just like Zipfian but the hotspot is the latest key written to the store. pub dist: String, - /// The theta parameter for Zipfian distribution. (Optional, default 1.0) + /// The theta parameter for Zipfian distribution. + /// + /// Default: 1.0. pub zipf_theta: Option, - /// The hotspot location for Zipfian distribution. (Optional, default 0.0) + /// The hotspot location for Zipfian distribution. /// /// 0.0 means the first key. 0.5 means approximately the middle in the key space. + /// + /// Default: 0.0. pub zipf_hotspot: Option, }