rstudio · samcofer · Jun 3, 2024 · Jun 3, 2024 · Jun 3, 2024 · Jun 3, 2024
diff --git a/Makefile b/Makefile
@@ -5,3 +5,6 @@ setup:
 	make -C tools
 	sudo make -C tools install
 	Rscript setup.R
+
+short:
+	Rscript main.R short
diff --git a/README.md b/README.md
@@ -28,6 +28,14 @@ Two environment variables can be used to configure fsbench:
 
 * `OUTPUT_FILE` defaults to `./results-<date>-<time>.csv` and indicates the path where the benchmark results should be recorded, as CSV data. (The same results are always printed to the screen, in tabular form.)
 
+### Shortened Run
+
+```shell
+make short
+```
+This command runs the load test in a shortened form to allow for the quick assessment of a filesystem. It's not as comprehensive and doesn't load down the storage as heavily as the full run.
+
+
 ## Comparing results
 
 Once you have one or more results files from running the benchmarks, you can use the comparison tool to compare multiple runs across several filesystems. The comparison tool will average multiple runs for each distinct filesystem tested, and produce bar charts to compare performance between the different filesystems.

diff --git a/main.R b/main.R
@@ -5,39 +5,65 @@ library(R.utils) # needed for fread to read .gz files
 library(vroom)
 
 source("_functions.R")
+short <- FALSE
+args <- commandArgs(trailingOnly = TRUE)
+
+if (length(args) >= 1) {
+  if (args[1] == "short") {
+  short <- TRUE
+  }
+}
 
 benchmark_begin()
 
 dir.create(target("lib"), recursive = TRUE, showWarnings = FALSE)
 
 # Install common R packages =====================================================================================
 benchmark("Install MASS", time_install("MASS", lib = target("lib")))
+if (!short) {
 benchmark("Install lattice", time_install("lattice", lib = target("lib")))
 benchmark("Install BH", time_install("BH", lib = target("lib")))
+}
+
+utils::remove.packages(c("MASS"), lib = target("lib"))
 
-utils::remove.packages(c("MASS", "lattice", "BH"), lib = target("lib"))
+if (!short){
+utils::remove.packages(c("lattice", "BH"), lib = target("lib"))
+}
 unlink(target("lib"), recursive = TRUE)
 # ===============================================================================================================
 
 # Write, then read, 1GB CSV =====================================================================================
+benchmark("Write CSV, 100MB", write_random_csv(target("100mb.csv"), 100*1024*1024))
+if (!short) {
 benchmark("Write CSV, 10KB", write_random_csv(target("10kb.csv"), 10*1024))
 benchmark("Write CSV, 1MB", write_random_csv(target("1mb.csv"), 1024*1024))
-benchmark("Write CSV, 100MB", write_random_csv(target("100mb.csv"), 100*1024*1024))
 benchmark("Write CSV, 1GB", write_random_csv(target("1gb.csv"), 1024*1024*1024))
+}
 
+benchmark("Read CSV, 100MB", system.time({ data.table::fread(target("100mb.csv")) }))
+if (!short) {
 benchmark("Read CSV, 10KB", system.time({ data.table::fread(target("10kb.csv")) }))
 benchmark("Read CSV, 1MB", system.time({ data.table::fread(target("1mb.csv")) }))
-benchmark("Read CSV, 100MB", system.time({ data.table::fread(target("100mb.csv")) }))
 benchmark("Read CSV, 1GB", system.time({ data.table::fread(target("1gb.csv")) }))
+}
 
+unlink(target("100mb.csv"))
+if (!short) {
 unlink(target("10kb.csv"))
 unlink(target("1mb.csv"))
-unlink(target("100mb.csv"))
 unlink(target("1gb.csv"))
+}
 # ===============================================================================================================
 
 # Parallel tests with 1GB readers/writers =======================================================================
-for (i in 1:4) {
+
+iters <- 2
+if (!short) {
+iters <- 4
+}
+
+for (i in 1:iters) {
   num_writers <- 2^i
   benchmark("DD write, 1GB", system.time({
     mclapply(1:num_writers, function(id) {
@@ -48,7 +74,7 @@ for (i in 1:4) {
   }), parallelism = num_writers)
 }
 
-for (i in 1:4) {
+for (i in 1:iters) {
   num_readers <- 2^i
   benchmark("DD read, 1GB", system.time({
     mclapply(1:num_readers, function(id) {
@@ -63,7 +89,8 @@ unlink(target("parallel_*.dat"))
 # ===============================================================================================================
 
 # Small files tests =============================================================================================
-for (i in 1:4) {
+
+for (i in 1:iters) {
   num_files <- 10 ^ i
   file_size <- 100*1024*1024 / num_files
 
@@ -82,7 +109,7 @@ for (i in 1:4) {
 # ===============================================================================================================
 
 # Parallel small file tests =====================================================================================
-for (i in 1:4) {
+for (i in 1:iters) {
   num_writers <- 2^i
   benchmark("DD write, 10MB over 1000 files", system.time({
     mclapply(1:num_writers, function(id) {
@@ -95,7 +122,7 @@ for (i in 1:4) {
   }), parallelism = num_writers)
 }
 
-for (i in 1:4) {
+for (i in 1:iters) {
   num_readers <- 2^i
   benchmark("DD read, 10MB over 1000 files", system.time({
     mclapply(1:num_readers, function(id) {
@@ -120,6 +147,7 @@ size_per_row <- size_100mb / num_rows
 fst_frame <- data.frame(x1 = runif(num_rows), x2 = runif(num_rows))
 write.fst(fst_frame, target("dataset.fst"))
 
+if (!short) {
 num_read <- 0
 benchmark("FST random reads, 100MB over 10*10MB reads", system.time({
   rows_to_read <- (10*1024*1024) / size_per_row
@@ -130,6 +158,7 @@ benchmark("FST random reads, 100MB over 10*10MB reads", system.time({
     num_read <- num_read + object.size(fst_subset)
   }
 }))
+}
 
 num_read <- 0
 benchmark("FST random reads, 100MB over 100*1MB reads", system.time({
@@ -142,6 +171,7 @@ benchmark("FST random reads, 100MB over 100*1MB reads", system.time({
   }
 }))
 
+if (!short) {
 num_read <- 0
 benchmark("FST random reads, 100MB over 1000*100KB reads", system.time({
   rows_to_read <- (100*1024) / size_per_row
@@ -152,7 +182,8 @@ benchmark("FST random reads, 100MB over 1000*100KB reads", system.time({
     num_read <- num_read + object.size(fst_subset)
   }
 }))
-
+}
+if (!short) {
 num_read <- 0
 benchmark("FST random reads, 100MB over 10000*10KB reads", system.time({
   rows_to_read <- (10*1024) / size_per_row
@@ -163,7 +194,7 @@ benchmark("FST random reads, 100MB over 10000*10KB reads", system.time({
     num_read <- num_read + object.size(fst_subset)
   }
 }))
-
+}
 unlink(target("dataset.fst"))
 #================================================================================================================