Skip to content

Commit

Permalink
pkg/fuzzer: use a MAB to decide on exec fuzz vs exec gen
Browse files Browse the repository at this point in the history
Let's try to use a plain delta-epsylon MAB for this purpose.

To better track its effect, also calculate moving averages of the
"new max signal" / "execution time" ratios for exec fuzz and exec gen.
  • Loading branch information
a-nogikh committed Apr 3, 2024
1 parent b90978b commit dbd3f2d
Show file tree
Hide file tree
Showing 10 changed files with 313 additions and 23 deletions.
71 changes: 52 additions & 19 deletions pkg/fuzzer/fuzzer.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (

"github.com/google/syzkaller/pkg/corpus"
"github.com/google/syzkaller/pkg/ipc"
"github.com/google/syzkaller/pkg/learning"
"github.com/google/syzkaller/pkg/rpctype"
"github.com/google/syzkaller/pkg/signal"
"github.com/google/syzkaller/prog"
Expand All @@ -34,6 +35,12 @@ type Fuzzer struct {
ctMu sync.Mutex // TODO: use RWLock.
ctRegenerate chan struct{}

// Use a MAB to determine the right distribution of
// exec fuzz and exec gen.
genFuzzMAB *learning.PlainMAB[string]
genSignalSpeed *learning.RunningRatioAverage[float64]
fuzzSignalSpeed *learning.RunningRatioAverage[float64]

nextExec *priorityQueue[*Request]
nextJobID atomic.Int64

Expand All @@ -43,6 +50,12 @@ type Fuzzer struct {

func NewFuzzer(ctx context.Context, cfg *Config, rnd *rand.Rand,
target *prog.Target) *Fuzzer {
genFuzzMAB := &learning.PlainMAB[string]{
ExplorationRate: 0.02,
MinLearningRate: 0.0005,
}
genFuzzMAB.AddArms(statFuzz, statGenerate)

f := &Fuzzer{
Config: cfg,
Cover: &Cover{},
Expand All @@ -54,7 +67,10 @@ func NewFuzzer(ctx context.Context, cfg *Config, rnd *rand.Rand,

// We're okay to lose some of the messages -- if we are already
// regenerating the table, we don't want to repeat it right away.
ctRegenerate: make(chan struct{}),
ctRegenerate: make(chan struct{}),
genFuzzMAB: genFuzzMAB,
genSignalSpeed: learning.NewRunningRatioAverage[float64](10000),
fuzzSignalSpeed: learning.NewRunningRatioAverage[float64](20000),

nextExec: makePriorityQueue[*Request](),
}
Expand Down Expand Up @@ -91,6 +107,8 @@ type Request struct {
flags ProgTypes
stat string
resultC chan *Result

genFuzzAction learning.Action[string]
}

type Result struct {
Expand All @@ -102,11 +120,12 @@ func (fuzzer *Fuzzer) Done(req *Request, res *Result) {
// Triage individual calls.
// We do it before unblocking the waiting threads because
// it may result it concurrent modification of req.Prog.
var newSignal int
if req.NeedSignal != rpctype.NoSignal && res.Info != nil {
for call, info := range res.Info.Calls {
fuzzer.triageProgCall(req.Prog, &info, call, req.flags)
newSignal += fuzzer.triageProgCall(req.Prog, &info, call, req.flags)
}
fuzzer.triageProgCall(req.Prog, &res.Info.Extra, -1, req.flags)
newSignal += fuzzer.triageProgCall(req.Prog, &res.Info.Extra, -1, req.flags)
}
// Unblock threads that wait for the result.
if req.resultC != nil {
Expand All @@ -116,20 +135,36 @@ func (fuzzer *Fuzzer) Done(req *Request, res *Result) {
fuzzer.mu.Lock()
fuzzer.stats[req.stat]++
fuzzer.mu.Unlock()
// Update the MAB(s).
reward := 0.0
if res.Info != nil && res.Info.ElapsedSec > 0 {
// Similarly to the "SyzVegas: Beating Kernel Fuzzing Odds with Reinforcement Learning"
// paper, let's use the ratio of "new max signal" to "execution time".
// Unlike the paper, let's take the raw value of it instead of its ratio to the average one.
reward = float64(newSignal) / res.Info.ElapsedSec
if req.stat == statGenerate {
fuzzer.genSignalSpeed.Save(float64(newSignal), res.Info.ElapsedSec)
} else if req.stat == statFuzz {
fuzzer.fuzzSignalSpeed.Save(float64(newSignal), res.Info.ElapsedSec)
}
}
if !req.genFuzzAction.Empty() {
fuzzer.genFuzzMAB.SaveReward(req.genFuzzAction, reward)
}
}

func (fuzzer *Fuzzer) triageProgCall(p *prog.Prog, info *ipc.CallInfo, call int,
flags ProgTypes) {
flags ProgTypes) int {
prio := signalPrio(p, info, call)
newMaxSignal := fuzzer.Cover.addRawMaxSignal(info.Signal, prio)
if newMaxSignal.Empty() {
return
return 0
}
if flags&progInTriage > 0 {
// We are already triaging this exact prog.
// All newly found coverage is flaky.
fuzzer.Logf(2, "found new flaky signal in call %d in %s", call, p)
return
return newMaxSignal.Len()
}
fuzzer.Logf(2, "found new signal in call %d in %s", call, p)
fuzzer.startJob(&triageJob{
Expand All @@ -140,6 +175,7 @@ func (fuzzer *Fuzzer) triageProgCall(p *prog.Prog, info *ipc.CallInfo, call int,
flags: flags,
jobPriority: triageJobPrio(flags),
})
return newMaxSignal.Len()
}

func signalPrio(p *prog.Prog, info *ipc.CallInfo, call int) (prio uint8) {
Expand Down Expand Up @@ -184,21 +220,18 @@ func (fuzzer *Fuzzer) nextInput() *Request {
}
}

// Either generate a new input or mutate an existing one.
mutateRate := 0.95
if !fuzzer.Config.Coverage {
// If we don't have real coverage signal, generate programs
// more frequently because fallback signal is weak.
mutateRate = 0.5
}
rnd := fuzzer.rand()
if rnd.Float64() < mutateRate {
req := mutateProgRequest(fuzzer, rnd)
if req != nil {
return req
}
action := fuzzer.genFuzzMAB.Action(rnd)

var req *Request
if action.Arm == statFuzz {
req = mutateProgRequest(fuzzer, rnd)
}
if req == nil {
req = genProgRequest(fuzzer, rnd)
}
return genProgRequest(fuzzer, rnd)
req.genFuzzAction = action
return req
}

func (fuzzer *Fuzzer) startJob(newJob job) {
Expand Down
2 changes: 2 additions & 0 deletions pkg/fuzzer/fuzzer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,8 @@ func TestFuzz(t *testing.T) {
t.Logf("%s", p.Serialize())
}

t.Logf("stats: %+v", fuzzer.Stats().Named)

assert.Equal(t, len(tf.expectedCrashes), len(tf.crashes),
"not all expected crashes were found")
}
Expand Down
2 changes: 2 additions & 0 deletions pkg/fuzzer/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,5 +42,7 @@ func (fuzzer *Fuzzer) Stats() Stats {
for k, v := range fuzzer.stats {
ret.Named[k] = v
}
ret.Named["exec gen, sig/sec*1000"] = uint64(fuzzer.genSignalSpeed.Load() * 1000)
ret.Named["exec fuzz, sig/sec*1000"] = uint64(fuzzer.fuzzSignalSpeed.Load() * 1000)
return ret
}
10 changes: 8 additions & 2 deletions pkg/ipc/ipc.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,9 @@ type CallInfo struct {
}

type ProgInfo struct {
Calls []CallInfo
Extra CallInfo // stores Signal and Cover collected from background threads
Calls []CallInfo
Extra CallInfo // stores Signal and Cover collected from background threads
ElapsedSec float64 // total execution time in seconds
}

type Env struct {
Expand Down Expand Up @@ -275,14 +276,19 @@ func (env *Env) Exec(opts *ExecOpts, p *prog.Prog) (output []byte, info *ProgInf
return
}

start := osutil.MonotonicNano()
output, hanged, err0 = env.cmd.exec(opts, progData)
elapsedNs := osutil.MonotonicNano() - start
if err0 != nil {
env.cmd.close()
env.cmd = nil
return
}

info, err0 = env.parseOutput(p, opts)
if info != nil {
info.ElapsedSec = float64(elapsedNs) / float64(1e9)
}
if info != nil && env.config.Flags&FlagSignal == 0 {
addFallbackSignal(p, info)
}
Expand Down
77 changes: 77 additions & 0 deletions pkg/learning/mab.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
// Copyright 2024 syzkaller project authors. All rights reserved.
// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.

package learning

import (
"math/rand"
"sync"
)

type Action[T comparable] struct {
Arm T
index int
}

func (a Action[T]) Empty() bool {
return a == Action[T]{}
}

type countedValue struct {
value float64
count int64
}

func (cv *countedValue) update(value, minStep float64) {
// Using larger steps at the beginning allows us to
// converge faster to the actual value.
// The minStep limit ensures that we can still track
// non-stationary problems.
cv.count++
step := 1.0 / float64(cv.count)
if step < minStep {
step = minStep
}
cv.value += (value - cv.value) * step
}

// PlainMAB is a very simple epsylon-greedy MAB implementation.
type PlainMAB[T comparable] struct {
MinLearningRate float64
ExplorationRate float64

mu sync.RWMutex
arms []T
weights []countedValue
}

func (p *PlainMAB[T]) AddArms(arms ...T) {
p.mu.Lock()
defer p.mu.Unlock()
for _, arm := range arms {
p.arms = append(p.arms, arm)
p.weights = append(p.weights, countedValue{0, 0})
}
}

func (p *PlainMAB[T]) Action(r *rand.Rand) Action[T] {
p.mu.RLock()
defer p.mu.RUnlock()
var pos int
if r.Float64() < p.ExplorationRate {
pos = r.Intn(len(p.arms))
} else {
for i := 1; i < len(p.arms); i++ {
if p.weights[i].value > p.weights[pos].value {
pos = i
}
}
}
return Action[T]{Arm: p.arms[pos], index: pos}
}

func (p *PlainMAB[T]) SaveReward(action Action[T], reward float64) {
p.mu.Lock()
defer p.mu.Unlock()
p.weights[action.index].update(reward, p.MinLearningRate)
}
66 changes: 66 additions & 0 deletions pkg/learning/mab_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
// Copyright 2024 syzkaller project authors. All rights reserved.
// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.

package learning

import (
"math/rand"
"testing"

"github.com/google/syzkaller/pkg/testutil"
"github.com/stretchr/testify/assert"
)

func TestMABSmallDiff(t *testing.T) {
r := rand.New(testutil.RandSource(t))
bandit := &PlainMAB[int]{
MinLearningRate: 0.0001,
ExplorationRate: 0.1,
}
arms := []float64{0.65, 0.7}
for i := range arms {
bandit.AddArms(i)
}
const steps = 40000
counts := runMAB(r, bandit, arms, steps)
t.Logf("counts: %v", counts)
assert.Greater(t, counts[1], steps/4*3)
}

func TestNonStationaryMAB(t *testing.T) {
r := rand.New(testutil.RandSource(t))
bandit := &PlainMAB[int]{
MinLearningRate: 0.02,
ExplorationRate: 0.04,
}

arms := []float64{0.2, 0.7, 0.5, 0.1}
for i := range arms {
bandit.AddArms(i)
}

const steps = 25000
counts := runMAB(r, bandit, arms, steps)
t.Logf("initially: %v", counts)

// Ensure that we've found the best arm.
assert.Greater(t, counts[1], steps/2)

// Now change the best arm's avg reward.
arms[3] = 0.9
counts = runMAB(r, bandit, arms, steps)
t.Logf("after reward change: %v", counts)
assert.Greater(t, counts[3], steps/2)
}

func runMAB(r *rand.Rand, bandit *PlainMAB[int], arms []float64, steps int) []int {
counts := make([]int, len(arms))
for i := 0; i < steps; i++ {
action := bandit.Action(r)
// TODO: use normal distribution?
reward := r.Float64() * arms[action.Arm]
counts[action.Arm]++
bandit.SaveReward(action, reward)
}
return counts
}
Loading

0 comments on commit dbd3f2d

Please sign in to comment.