Skip to content

Commit

Permalink
feat: conditional preallocate
Browse files Browse the repository at this point in the history
Allow the user to check whether they want to preallocate the space for
the hashtable file or not. By default it's set to true.
  • Loading branch information
pepyakin committed Nov 11, 2024
1 parent 5b7b429 commit e10b324
Show file tree
Hide file tree
Showing 4 changed files with 96 additions and 25 deletions.
67 changes: 43 additions & 24 deletions nomt/src/bitbox/ht_file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,18 +62,18 @@ pub fn open(

/// Creates the store file. Fails if store file already exists.
///
/// Lays out the meta page, and fills the file with zeroes.
pub fn create(path: PathBuf, num_pages: u32) -> std::io::Result<()> {
/// Lays out the meta page. If `preallocate` is true, preallocates the blocks for the file.
pub fn create(path: PathBuf, num_pages: u32, preallocate: bool) -> std::io::Result<()> {
let start = std::time::Instant::now();
let ht_path = path.join("ht");
let ht_file = OpenOptions::new().write(true).create(true).open(ht_path)?;

// number of pages + pages required for meta bits.
let page_count = num_pages + num_meta_byte_pages(num_pages);
let len = page_count as usize * PAGE_SIZE;
ht_file.set_len(len as u64)?;

zero_file(&ht_file, len)?;
resize_and_prealloc(&ht_file, len as u64, preallocate)?;

ht_file.sync_all()?;
drop(ht_file);

Expand All @@ -90,37 +90,56 @@ pub fn create(path: PathBuf, num_pages: u32) -> std::io::Result<()> {
Ok(())
}

#[cfg(target_os = "linux")]
fn zero_file(file: &File, len: usize) -> std::io::Result<()> {
let res = unsafe {
use std::os::fd::AsRawFd;

libc::fallocate(
file.as_raw_fd(),
libc::FALLOC_FL_ZERO_RANGE,
0 as _,
len as _,
)
};

if res == -1 {
Err(std::io::Error::last_os_error())
} else {
Ok(())
/// Sets the file size and attempts to preallocate the file if `preallocate` is true.
///
/// Returns an error if setting the file size fails. File preallocation is done on a best-effort basis
/// and may silently fall back to regular allocation.
///
/// After this call, if successful, the file size is set to `len` bytes.
fn resize_and_prealloc(ht_file: &File, len: u64, preallocate: bool) -> std::io::Result<()> {
if !preallocate {
// If not preallocating, just set the file size and return.
ht_file.set_len(len)?;
return Ok(());
}

cfg_if::cfg_if! {
if #[cfg(target_os = "linux")] {
// To preallocate on Linux systems, try using fallocate with ZERO_RANGE first as it's more
// efficient. fallocate sets the file size as well, so ftruncate (aka file.set_len()) is
// not needed.
if crate::sys::linux::tmpfs_check(ht_file) {
// Skip preallocation for tmpfs. It doesn't support fallocate and it's
// memory-backed anyway. ftruncate and bail.
ht_file.set_len(len)?;
return Ok(());
}
if let Err(_) = crate::sys::linux::falloc_zero_file(ht_file, len) {
// If fallocate fails, fall back to zeroing the file with write.
resize_and_zero_file(ht_file, len)?;
}
return Ok(());
} else {
resize_and_zero_file(ht_file, len)?;
}
}
}

#[cfg(not(target_os = "linux"))]
fn zero_file(mut file: &File, len: usize) -> std::io::Result<()> {
// Fallback method for allocating extents for the file: just incrementally write zeroes to the file.
fn resize_and_zero_file(mut file: &File, len: u64) -> std::io::Result<()> {
use std::io::Write;

// Set the file size first.
file.set_len(len)?;

// Zero the file.
let len = len as usize;
let buf = [0u8; PAGE_SIZE * 4];
let mut remaining = len;
while remaining > 0 {
let len = std::cmp::min(remaining, buf.len());
file.write_all(&buf[..len])?;
remaining -= len;
}

Ok(())
}
16 changes: 16 additions & 0 deletions nomt/src/options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ pub struct Options {
pub(crate) warm_up: bool,
/// The number of threads to use for fetching prior values.
pub(crate) rollback_tp_size: usize,
/// Whether to preallocate the hashtable file.
pub(crate) preallocate_ht: bool,
}

impl Options {
Expand All @@ -40,6 +42,7 @@ impl Options {
max_rollback_log_len: 100,
warm_up: false,
rollback_tp_size: 4,
preallocate_ht: true,
}
}

Expand Down Expand Up @@ -121,4 +124,17 @@ impl Options {
pub fn rollback_tp_size(&mut self, rollback_tp_size: usize) {
self.rollback_tp_size = rollback_tp_size;
}

/// Sets whether to preallocate the hashtable file.
///
/// Many filesystems don't handle sparse files well. If the `preallocate_ht` option is set to
/// `true`, NOMT will try to make sure that the file is fully allocated.
///
/// If set to `false` this won't allocate the disk space for the hashtable file upfront, but can
/// lead to fragmentation later.
///
/// Default: `true`.
pub fn preallocate_ht(&mut self, preallocate_ht: bool) {
self.preallocate_ht = preallocate_ht;
}
}
2 changes: 1 addition & 1 deletion nomt/src/store/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,7 @@ fn create(o: &crate::Options) -> anyhow::Result<()> {
meta_fd.sync_all()?;
drop(meta_fd);

bitbox::create(o.path.clone(), o.bitbox_num_pages)?;
bitbox::create(o.path.clone(), o.bitbox_num_pages, o.preallocate_ht)?;
beatree::create(&o.path)?;

// As the last step, sync the directory.
Expand Down
36 changes: 36 additions & 0 deletions nomt/src/sys/linux.rs
Original file line number Diff line number Diff line change
@@ -1 +1,37 @@
//! Linux-specific code.
use super::unix::cvt_r;
use std::fs::File;
use std::os::fd::AsRawFd;

/// Returns true if the file is on a tmpfs filesystem.
/// False if it's not or the check fails.
pub fn tmpfs_check(file: &File) -> bool {
unsafe {
// SAFETY: unsafe because ffi call. This should be IO-safe because the file is passed
// by reference. This should be memory-safe because the `statfs` struct is
// zeroed and the `f_type` field should be set by the ffi call.
let mut stat: libc::statfs = std::mem::zeroed();
cvt_r(|| libc::fstatfs(file.as_raw_fd(), &mut stat))
.map(|_| stat.f_type == libc::TMPFS_MAGIC)
.unwrap_or(false)
}
}

/// fallocate changes the size of the file to the given length if it's less than the current size.
/// If the file is larger than the given length, the file is not truncated.
///
/// Doesn't work on tmpfs.
pub fn falloc_zero_file(file: &File, len: u64) -> std::io::Result<()> {
cvt_r(|| unsafe {
// SAFETY: unsafe because ffi call. This should be IO-safe because the file is passed
// by reference.
libc::fallocate(
file.as_raw_fd(),
libc::FALLOC_FL_ZERO_RANGE,
0 as _,
len as _,
)
})
.map(drop)
}

0 comments on commit e10b324

Please sign in to comment.