-
Notifications
You must be signed in to change notification settings - Fork 1.2k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
pkg/fuzzer: use a MAB to decide on exec fuzz vs exec gen
Let's try to use a plain delta-epsylon MAB for this purpose. To better track its effect, also calculate moving averages of the "new max signal" / "execution time" ratios for exec fuzz and exec gen.
- Loading branch information
Showing
10 changed files
with
313 additions
and
23 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
// Copyright 2024 syzkaller project authors. All rights reserved. | ||
// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. | ||
|
||
package learning | ||
|
||
import ( | ||
"math/rand" | ||
"sync" | ||
) | ||
|
||
type Action[T comparable] struct { | ||
Arm T | ||
index int | ||
} | ||
|
||
func (a Action[T]) Empty() bool { | ||
return a == Action[T]{} | ||
} | ||
|
||
type countedValue struct { | ||
value float64 | ||
count int64 | ||
} | ||
|
||
func (cv *countedValue) update(value, minStep float64) { | ||
// Using larger steps at the beginning allows us to | ||
// converge faster to the actual value. | ||
// The minStep limit ensures that we can still track | ||
// non-stationary problems. | ||
cv.count++ | ||
step := 1.0 / float64(cv.count) | ||
if step < minStep { | ||
step = minStep | ||
} | ||
cv.value += (value - cv.value) * step | ||
} | ||
|
||
// PlainMAB is a very simple epsylon-greedy MAB implementation. | ||
type PlainMAB[T comparable] struct { | ||
MinLearningRate float64 | ||
ExplorationRate float64 | ||
|
||
mu sync.RWMutex | ||
arms []T | ||
weights []countedValue | ||
} | ||
|
||
func (p *PlainMAB[T]) AddArms(arms ...T) { | ||
p.mu.Lock() | ||
defer p.mu.Unlock() | ||
for _, arm := range arms { | ||
p.arms = append(p.arms, arm) | ||
p.weights = append(p.weights, countedValue{0, 0}) | ||
} | ||
} | ||
|
||
func (p *PlainMAB[T]) Action(r *rand.Rand) Action[T] { | ||
p.mu.RLock() | ||
defer p.mu.RUnlock() | ||
var pos int | ||
if r.Float64() < p.ExplorationRate { | ||
pos = r.Intn(len(p.arms)) | ||
} else { | ||
for i := 1; i < len(p.arms); i++ { | ||
if p.weights[i].value > p.weights[pos].value { | ||
pos = i | ||
} | ||
} | ||
} | ||
return Action[T]{Arm: p.arms[pos], index: pos} | ||
} | ||
|
||
func (p *PlainMAB[T]) SaveReward(action Action[T], reward float64) { | ||
p.mu.Lock() | ||
defer p.mu.Unlock() | ||
p.weights[action.index].update(reward, p.MinLearningRate) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
// Copyright 2024 syzkaller project authors. All rights reserved. | ||
// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. | ||
|
||
package learning | ||
|
||
import ( | ||
"math/rand" | ||
"testing" | ||
|
||
"github.com/google/syzkaller/pkg/testutil" | ||
"github.com/stretchr/testify/assert" | ||
) | ||
|
||
func TestMABSmallDiff(t *testing.T) { | ||
r := rand.New(testutil.RandSource(t)) | ||
bandit := &PlainMAB[int]{ | ||
MinLearningRate: 0.0001, | ||
ExplorationRate: 0.1, | ||
} | ||
arms := []float64{0.65, 0.7} | ||
for i := range arms { | ||
bandit.AddArms(i) | ||
} | ||
const steps = 40000 | ||
counts := runMAB(r, bandit, arms, steps) | ||
t.Logf("counts: %v", counts) | ||
assert.Greater(t, counts[1], steps/4*3) | ||
} | ||
|
||
func TestNonStationaryMAB(t *testing.T) { | ||
r := rand.New(testutil.RandSource(t)) | ||
bandit := &PlainMAB[int]{ | ||
MinLearningRate: 0.02, | ||
ExplorationRate: 0.04, | ||
} | ||
|
||
arms := []float64{0.2, 0.7, 0.5, 0.1} | ||
for i := range arms { | ||
bandit.AddArms(i) | ||
} | ||
|
||
const steps = 25000 | ||
counts := runMAB(r, bandit, arms, steps) | ||
t.Logf("initially: %v", counts) | ||
|
||
// Ensure that we've found the best arm. | ||
assert.Greater(t, counts[1], steps/2) | ||
|
||
// Now change the best arm's avg reward. | ||
arms[3] = 0.9 | ||
counts = runMAB(r, bandit, arms, steps) | ||
t.Logf("after reward change: %v", counts) | ||
assert.Greater(t, counts[3], steps/2) | ||
} | ||
|
||
func runMAB(r *rand.Rand, bandit *PlainMAB[int], arms []float64, steps int) []int { | ||
counts := make([]int, len(arms)) | ||
for i := 0; i < steps; i++ { | ||
action := bandit.Action(r) | ||
// TODO: use normal distribution? | ||
reward := r.Float64() * arms[action.Arm] | ||
counts[action.Arm]++ | ||
bandit.SaveReward(action, reward) | ||
} | ||
return counts | ||
} |
Oops, something went wrong.