Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add the ability to run a short filesystem test #18

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,6 @@ setup:
make -C tools
sudo make -C tools install
Rscript setup.R

short:
Rscript main.R short
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,14 @@ Two environment variables can be used to configure fsbench:

* `OUTPUT_FILE` defaults to `./results-<date>-<time>.csv` and indicates the path where the benchmark results should be recorded, as CSV data. (The same results are always printed to the screen, in tabular form.)

### Shortened Run

```shell
make short
```
This command runs the load test in a shortened form to allow for the quick assessment of a filesystem. It's not as comprehensive and doesn't load down the storage as heavily as the full run.


## Comparing results

Once you have one or more results files from running the benchmarks, you can use the comparison tool to compare multiple runs across several filesystems. The comparison tool will average multiple runs for each distinct filesystem tested, and produce bar charts to compare performance between the different filesystems.
Expand Down
53 changes: 42 additions & 11 deletions main.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,39 +5,65 @@ library(R.utils) # needed for fread to read .gz files
library(vroom)

source("_functions.R")
short <- FALSE
args <- commandArgs(trailingOnly = TRUE)

if (length(args) >= 1) {
if (args[1] == "short") {
short <- TRUE
}
}

benchmark_begin()

dir.create(target("lib"), recursive = TRUE, showWarnings = FALSE)

# Install common R packages =====================================================================================
benchmark("Install MASS", time_install("MASS", lib = target("lib")))
if (!short) {
benchmark("Install lattice", time_install("lattice", lib = target("lib")))
benchmark("Install BH", time_install("BH", lib = target("lib")))
}

utils::remove.packages(c("MASS"), lib = target("lib"))

utils::remove.packages(c("MASS", "lattice", "BH"), lib = target("lib"))
if (!short){
utils::remove.packages(c("lattice", "BH"), lib = target("lib"))
}
unlink(target("lib"), recursive = TRUE)
# ===============================================================================================================

# Write, then read, 1GB CSV =====================================================================================
benchmark("Write CSV, 100MB", write_random_csv(target("100mb.csv"), 100*1024*1024))
if (!short) {
benchmark("Write CSV, 10KB", write_random_csv(target("10kb.csv"), 10*1024))
benchmark("Write CSV, 1MB", write_random_csv(target("1mb.csv"), 1024*1024))
benchmark("Write CSV, 100MB", write_random_csv(target("100mb.csv"), 100*1024*1024))
benchmark("Write CSV, 1GB", write_random_csv(target("1gb.csv"), 1024*1024*1024))
}

benchmark("Read CSV, 100MB", system.time({ data.table::fread(target("100mb.csv")) }))
if (!short) {
benchmark("Read CSV, 10KB", system.time({ data.table::fread(target("10kb.csv")) }))
benchmark("Read CSV, 1MB", system.time({ data.table::fread(target("1mb.csv")) }))
benchmark("Read CSV, 100MB", system.time({ data.table::fread(target("100mb.csv")) }))
benchmark("Read CSV, 1GB", system.time({ data.table::fread(target("1gb.csv")) }))
}

unlink(target("100mb.csv"))
if (!short) {
unlink(target("10kb.csv"))
unlink(target("1mb.csv"))
unlink(target("100mb.csv"))
unlink(target("1gb.csv"))
}
# ===============================================================================================================

# Parallel tests with 1GB readers/writers =======================================================================
for (i in 1:4) {

iters <- 2
if (!short) {
iters <- 4
}

for (i in 1:iters) {
num_writers <- 2^i
benchmark("DD write, 1GB", system.time({
mclapply(1:num_writers, function(id) {
Expand All @@ -48,7 +74,7 @@ for (i in 1:4) {
}), parallelism = num_writers)
}

for (i in 1:4) {
for (i in 1:iters) {
num_readers <- 2^i
benchmark("DD read, 1GB", system.time({
mclapply(1:num_readers, function(id) {
Expand All @@ -63,7 +89,8 @@ unlink(target("parallel_*.dat"))
# ===============================================================================================================

# Small files tests =============================================================================================
for (i in 1:4) {

for (i in 1:iters) {
num_files <- 10 ^ i
file_size <- 100*1024*1024 / num_files

Expand All @@ -82,7 +109,7 @@ for (i in 1:4) {
# ===============================================================================================================

# Parallel small file tests =====================================================================================
for (i in 1:4) {
for (i in 1:iters) {
num_writers <- 2^i
benchmark("DD write, 10MB over 1000 files", system.time({
mclapply(1:num_writers, function(id) {
Expand All @@ -95,7 +122,7 @@ for (i in 1:4) {
}), parallelism = num_writers)
}

for (i in 1:4) {
for (i in 1:iters) {
num_readers <- 2^i
benchmark("DD read, 10MB over 1000 files", system.time({
mclapply(1:num_readers, function(id) {
Expand All @@ -120,6 +147,7 @@ size_per_row <- size_100mb / num_rows
fst_frame <- data.frame(x1 = runif(num_rows), x2 = runif(num_rows))
write.fst(fst_frame, target("dataset.fst"))

if (!short) {
num_read <- 0
benchmark("FST random reads, 100MB over 10*10MB reads", system.time({
rows_to_read <- (10*1024*1024) / size_per_row
Expand All @@ -130,6 +158,7 @@ benchmark("FST random reads, 100MB over 10*10MB reads", system.time({
num_read <- num_read + object.size(fst_subset)
}
}))
}

num_read <- 0
benchmark("FST random reads, 100MB over 100*1MB reads", system.time({
Expand All @@ -142,6 +171,7 @@ benchmark("FST random reads, 100MB over 100*1MB reads", system.time({
}
}))

if (!short) {
num_read <- 0
benchmark("FST random reads, 100MB over 1000*100KB reads", system.time({
rows_to_read <- (100*1024) / size_per_row
Expand All @@ -152,7 +182,8 @@ benchmark("FST random reads, 100MB over 1000*100KB reads", system.time({
num_read <- num_read + object.size(fst_subset)
}
}))

}
if (!short) {
num_read <- 0
benchmark("FST random reads, 100MB over 10000*10KB reads", system.time({
rows_to_read <- (10*1024) / size_per_row
Expand All @@ -163,7 +194,7 @@ benchmark("FST random reads, 100MB over 10000*10KB reads", system.time({
num_read <- num_read + object.size(fst_subset)
}
}))

}
unlink(target("dataset.fst"))
#================================================================================================================

Expand Down