From 8022c8b7601ab4774d0de3d4c4d7b96aeb4826d6 Mon Sep 17 00:00:00 2001
From: Derek Bruening <bruening@google.com>
Date: Fri, 23 Feb 2024 19:39:38 -0500
Subject: [PATCH] i#6675: Collapse consecutive idle replay entries (#6673)

Saves substantial file space in the drmemtrace scheduler's record-replay
file when cores are idling by combining consecutive idle entries.

Adds checks to all unit tests which create replay files; this requires
access to the non-public record format, done via a helper class.

Also tested on several real applications: for one the record file drops
from 34MB to 4K, matching the size of the as-traced schedule files. Even
tiny applications like threadsig show clear reductions. Consecutive idle
entries were ballooning these files. Large applications had multi-GB
files; with this fix they are orders of magnitude smaller.

Issue: #6471, #6675
Fixes: #6675
---
 clients/drcachesim/scheduler/scheduler.cpp    | 23 ++++++++
 clients/drcachesim/scheduler/scheduler.h      | 10 ++++
 .../drcachesim/tests/scheduler_unit_tests.cpp | 56 +++++++++++++++++++
 3 files changed, 89 insertions(+)
diff --git a/clients/drcachesim/scheduler/scheduler.cpp b/clients/drcachesim/scheduler/scheduler.cpp
index 176801e8e71..bd24a88c5b8 100644
--- a/clients/drcachesim/scheduler/scheduler.cpp
+++ b/clients/drcachesim/scheduler/scheduler.cpp
@@ -106,6 +106,23 @@ typedef dynamorio::drmemtrace::record_file_reader_t<std::ifstream>
     default_record_file_reader_t;
 #endif
 
+std::string
+replay_file_checker_t::check(archive_istream_t *infile)
+{
+    // Ensure we don't have repeated idle records, which balloon the file size.
+    scheduler_t::schedule_record_t record;
+    bool prev_was_idle = false;
+    while (infile->read(reinterpret_cast<char *>(&record), sizeof(record))) {
+        if (record.type == scheduler_t::schedule_record_t::IDLE) {
+            if (prev_was_idle)
+                return "Error: consecutive idle records";
+            prev_was_idle = true;
+        } else
+            prev_was_idle = false;
+    }
+    return "";
+}
+
 /****************************************************************
  * Specializations for scheduler_tmpl_t<reader_t>, aka scheduler_t.
  */
@@ -1628,6 +1645,12 @@ scheduler_tmpl_t<RecordType, ReaderType>::record_schedule_segment(
     // We always use the current wall-clock time, as the time stored in the prior
     // next_record() call can be out of order across outputs and lead to deadlocks.
     uint64_t timestamp = get_time_micros();
+    if (type == schedule_record_t::IDLE &&
+        outputs_[output].record.back().type == schedule_record_t::IDLE) {
+        // Merge.  We don't need intermediate timestamps when idle, and consecutive
+        // idle records quickly balloon the file.
+        return sched_type_t::STATUS_OK;
+    }
     outputs_[output].record.emplace_back(type, input, start_instruction, stop_instruction,
                                          timestamp);
     // The stop is typically updated later in close_schedule_segment().
diff --git a/clients/drcachesim/scheduler/scheduler.h b/clients/drcachesim/scheduler/scheduler.h
index c2f4dedb1c2..78bc71fbf64 100644
--- a/clients/drcachesim/scheduler/scheduler.h
+++ b/clients/drcachesim/scheduler/scheduler.h
@@ -72,6 +72,13 @@
 namespace dynamorio {  /**< General DynamoRIO namespace. */
 namespace drmemtrace { /**< DrMemtrace tracing + simulation infrastructure namespace. */
 
+/* For testing, where schedule_record_t is not accessible. */
+class replay_file_checker_t {
+public:
+    std::string
+    check(archive_istream_t *infile);
+};
+
 /**
  * Schedules traced software threads onto simulated cpus.
  * Takes in a set of recorded traces and maps them onto a new set of output
@@ -1611,6 +1618,9 @@ template <typename RecordType, typename ReaderType> class scheduler_tmpl_t {
     // For online where we currently have to map dynamically observed thread ids
     // to the 0-based shard index.
     std::unordered_map<memref_tid_t, int> tid2shard_;
+
+    // Our testing class needs access to schedule_record_t.
+    friend class replay_file_checker_t;
 };
 
 /** See #dynamorio::drmemtrace::scheduler_tmpl_t. */
diff --git a/clients/drcachesim/tests/scheduler_unit_tests.cpp b/clients/drcachesim/tests/scheduler_unit_tests.cpp
index 21235463e71..56757dc8874 100644
--- a/clients/drcachesim/tests/scheduler_unit_tests.cpp
+++ b/clients/drcachesim/tests/scheduler_unit_tests.cpp
@@ -1148,6 +1148,14 @@ test_synthetic_time_quanta()
         if (scheduler.write_recorded_schedule() != scheduler_t::STATUS_SUCCESS)
             assert(false);
     }
+    {
+        replay_file_checker_t checker;
+        zipfile_istream_t infile(record_fname);
+        std::string res = checker.check(&infile);
+        if (!res.empty())
+            std::cerr << "replay file checker failed: " << res;
+        assert(res.empty());
+    }
     {
         // Replay.
         std::vector<scheduler_t::input_reader_t> readers;
@@ -2285,6 +2293,14 @@ test_replay()
         if (scheduler.write_recorded_schedule() != scheduler_t::STATUS_SUCCESS)
             assert(false);
     }
+    {
+        replay_file_checker_t checker;
+        zipfile_istream_t infile(record_fname);
+        std::string res = checker.check(&infile);
+        if (!res.empty())
+            std::cerr << "replay file checker failed: " << res;
+        assert(res.empty());
+    }
     // Now replay the schedule several times to ensure repeatability.
     for (int outer = 0; outer < 5; ++outer) {
         std::vector<scheduler_t::input_workload_t> sched_inputs;
@@ -2402,6 +2418,14 @@ test_replay_multi_threaded(const char *testdir)
         if (scheduler.write_recorded_schedule() != scheduler_t::STATUS_SUCCESS)
             assert(false);
     }
+    {
+        replay_file_checker_t checker;
+        zipfile_istream_t infile(record_fname);
+        std::string res = checker.check(&infile);
+        if (!res.empty())
+            std::cerr << "replay file checker failed: " << res;
+        assert(res.empty());
+    }
     {
         // Replay.
         scheduler_t scheduler;
@@ -2740,6 +2764,14 @@ test_replay_skip()
         if (scheduler.write_recorded_schedule() != scheduler_t::STATUS_SUCCESS)
             assert(false);
     }
+    {
+        replay_file_checker_t checker;
+        zipfile_istream_t infile(record_fname);
+        std::string res = checker.check(&infile);
+        if (!res.empty())
+            std::cerr << "replay file checker failed: " << res;
+        assert(res.empty());
+    }
     {
         // Replay.
         std::vector<scheduler_t::input_reader_t> readers;
@@ -2918,6 +2950,14 @@ test_replay_limit()
             thread.join();
         if (scheduler.write_recorded_schedule() != scheduler_t::STATUS_SUCCESS)
             assert(false);
+    }
+    {
+        replay_file_checker_t checker;
+        zipfile_istream_t infile(record_fname);
+        std::string res = checker.check(&infile);
+        if (!res.empty())
+            std::cerr << "replay file checker failed: " << res;
+        assert(res.empty());
         for (int i = 0; i < NUM_OUTPUTS; ++i) {
             std::cerr << "Output #" << i << " schedule: " << record_schedule[i] << "\n";
         }
@@ -3005,6 +3045,14 @@ test_replay_limit()
             thread.join();
         if (scheduler.write_recorded_schedule() != scheduler_t::STATUS_SUCCESS)
             assert(false);
+    }
+    {
+        replay_file_checker_t checker;
+        zipfile_istream_t infile(record_fname);
+        std::string res = checker.check(&infile);
+        if (!res.empty())
+            std::cerr << "replay file checker failed: " << res;
+        assert(res.empty());
         int switches = 0;
         for (int i = 0; i < NUM_OUTPUTS; ++i) {
             std::cerr << "Output #" << i << " schedule: " << record_schedule[i] << "\n";
@@ -3400,6 +3448,14 @@ test_inactive()
         if (scheduler.write_recorded_schedule() != scheduler_t::STATUS_SUCCESS)
             assert(false);
     }
+    {
+        replay_file_checker_t checker;
+        zipfile_istream_t infile(record_fname);
+        std::string res = checker.check(&infile);
+        if (!res.empty())
+            std::cerr << "replay file checker failed: " << res;
+        assert(res.empty());
+    }
     {
         // Replay.
         std::vector<scheduler_t::input_reader_t> readers;