Deflake hashtable random fairness test (#1618)

Fixes the unit test for hashtable random fairness intermittent failures when running with the `--accurate` flag. https://github.com/valkey-io/valkey/actions/runs/12969591890/job/36173815884#step:10:105 The test case picks a random element out of 400, repeated 1M times, and then checks that 60% of the elements are picked within 3 standard deviations from the number of times they're expected to be picked. In this test run (with `--accurate`), the expected number is 2500 and the standard deviation is 50, which is only 2% of the expected value. This makes the check too strict and makes the test flaky. As an alternative, we allow 80% of the elements to be picked within 10% of the expected number. With this alternative condition, we can also raise the check for the non-edge case from 60% to 80% of the elements to be within 3 standard deviations. (With fewer repetitions, 3 standard deviations is greater than 10% of the expected value, so this new condition only affects the `--accurate` test run.) Additional change: Set a random seed to the hash function in the test suite. Until now, we only seeded the random number generator. Signed-off-by: Viktor Söderqvist <[email protected]>
valkey-io · Jan 27, 2025 · a18fcdb · a18fcdb
1 parent 6657757
commit a18fcdb
Showing 1 changed file with 20 additions and 2 deletions.
diff --git a/src/unit/test_hashtable.c b/src/unit/test_hashtable.c
@@ -21,6 +21,9 @@ static void randomSeed(void) {
     getRandomBytes((void *)&seed, sizeof(seed));
     init_genrand64(seed);
     srandom((unsigned)seed);
+    uint8_t hashseed[16];
+    getRandomBytes(hashseed, sizeof(hashseed));
+    hashtableSetHashFunctionSeed(hashseed);
 }
 
 /* An entry holding a string key and a string value in one allocation. */
@@ -749,15 +752,17 @@ int test_random_entry(int argc, char **argv, int flags) {
     /* With large n, the distribution approaches a normal distribution and we
      * can use p68 = within 1 std dev, p95 = within 2 std dev, p99.7 = within 3
      * std dev. */
-    long p68 = 0, p95 = 0, p99 = 0, p4dev = 0, p5dev = 0;
+    long p68 = 0, p95 = 0, p99 = 0, p4dev = 0, p5dev = 0, p10percent = 0;
     for (size_t j = 0; j < count; j++) {
         double dev = expected - times_picked[j];
         p68 += (dev >= -std_dev && dev <= std_dev);
         p95 += (dev >= -std_dev * 2 && dev <= std_dev * 2);
         p99 += (dev >= -std_dev * 3 && dev <= std_dev * 3);
         p4dev += (dev >= -std_dev * 4 && dev <= std_dev * 4);
         p5dev += (dev >= -std_dev * 5 && dev <= std_dev * 5);
+        p10percent += (dev >= -0.1 * expected && dev <= 0.1 * expected);
     }
+
     printf("Random entry fairness test\n");
     printf("  Pick one of %zu entries, %ld times.\n", count, num_rounds);
     printf("  Expecting each entry to be picked %.2lf times, std dev %.3lf.\n", expected, std_dev);
@@ -766,12 +771,25 @@ int test_random_entry(int argc, char **argv, int flags) {
     printf("  Within 3 std dev (p99) = %.2lf%%\n", 100 * p99 / m);
     printf("  Within 4 std dev       = %.2lf%%\n", 100 * p4dev / m);
     printf("  Within 5 std dev       = %.2lf%%\n", 100 * p5dev / m);
+    printf("  Within 10%% dev         = %.2lf%%\n", 100 * p10percent / m);
 
     /* Conclusion? The number of trials (n) relative to the probabilities (p and
      * 1 − p) must be sufficiently large (n * p ≥ 5 and n * (1 − p) ≥ 5) to
      * approximate a binomial distribution with a normal distribution. */
     if (n / m >= 5 && n * (1 - 1 / m) >= 5) {
-        TEST_ASSERT_MESSAGE("Too unfair randomness", 100 * p99 / m >= 60.0);
+        /* Check that 80% of the elements are picked within 3 std deviations of
+         * the expected number. This is a low bar, since typically the 99% of
+         * the elements are within this range.
+         *
+         * There is an edge case. When n is very large and m is very small, the
+         * std dev of a binomial distribution is very small, which becomes too
+         * strict for our bucket layout and makes the test flaky. For example
+         * with m = 400 and n = 1M, we get an expected value of 2500 and a std
+         * dev of 50, which is just 2% of the expected value. We lower the bar
+         * for this case and accept that 80% of elements are just within 10% of
+         * the expected value. */
+        TEST_ASSERT_MESSAGE("Too unfair randomness",
+                            100 * p99 / m >= 80.0 || 100 * p10percent / m >= 80.0);
     } else {
         printf("To uncertain numbers to draw any conclusions about fairness.\n");
     }