diff --git a/pkg/fuzzer/fuzzer.go b/pkg/fuzzer/fuzzer.go index df6fbd598f47..13897a96d0df 100644 --- a/pkg/fuzzer/fuzzer.go +++ b/pkg/fuzzer/fuzzer.go @@ -14,6 +14,7 @@ import ( "github.com/google/syzkaller/pkg/corpus" "github.com/google/syzkaller/pkg/ipc" + "github.com/google/syzkaller/pkg/learning" "github.com/google/syzkaller/pkg/rpctype" "github.com/google/syzkaller/pkg/signal" "github.com/google/syzkaller/prog" @@ -34,6 +35,12 @@ type Fuzzer struct { ctMu sync.Mutex // TODO: use RWLock. ctRegenerate chan struct{} + // Use a MAB to determine the right distribution of + // exec fuzz and exec gen. + genFuzzMAB *learning.PlainMAB[string] + genSignalSpeed *learning.RunningRatioAverage[float64] + fuzzSignalSpeed *learning.RunningRatioAverage[float64] + nextExec *priorityQueue[*Request] nextJobID atomic.Int64 @@ -43,6 +50,12 @@ type Fuzzer struct { func NewFuzzer(ctx context.Context, cfg *Config, rnd *rand.Rand, target *prog.Target) *Fuzzer { + genFuzzMAB := &learning.PlainMAB[string]{ + ExplorationRate: 0.02, + MinLearningRate: 0.0005, + } + genFuzzMAB.AddArms(statFuzz, statGenerate) + f := &Fuzzer{ Config: cfg, Cover: &Cover{}, @@ -54,7 +67,10 @@ func NewFuzzer(ctx context.Context, cfg *Config, rnd *rand.Rand, // We're okay to lose some of the messages -- if we are already // regenerating the table, we don't want to repeat it right away. - ctRegenerate: make(chan struct{}), + ctRegenerate: make(chan struct{}), + genFuzzMAB: genFuzzMAB, + genSignalSpeed: learning.NewRunningRatioAverage[float64](10000), + fuzzSignalSpeed: learning.NewRunningRatioAverage[float64](20000), nextExec: makePriorityQueue[*Request](), } @@ -91,6 +107,8 @@ type Request struct { flags ProgTypes stat string resultC chan *Result + + genFuzzAction learning.Action[string] } type Result struct { @@ -102,11 +120,12 @@ func (fuzzer *Fuzzer) Done(req *Request, res *Result) { // Triage individual calls. // We do it before unblocking the waiting threads because // it may result it concurrent modification of req.Prog. + var newSignal int if req.NeedSignal != rpctype.NoSignal && res.Info != nil { for call, info := range res.Info.Calls { - fuzzer.triageProgCall(req.Prog, &info, call, req.flags) + newSignal += fuzzer.triageProgCall(req.Prog, &info, call, req.flags) } - fuzzer.triageProgCall(req.Prog, &res.Info.Extra, -1, req.flags) + newSignal += fuzzer.triageProgCall(req.Prog, &res.Info.Extra, -1, req.flags) } // Unblock threads that wait for the result. if req.resultC != nil { @@ -116,20 +135,36 @@ func (fuzzer *Fuzzer) Done(req *Request, res *Result) { fuzzer.mu.Lock() fuzzer.stats[req.stat]++ fuzzer.mu.Unlock() + // Update the MAB(s). + reward := 0.0 + if res.Info != nil && res.Info.ElapsedSec > 0 { + // Similarly to the "SyzVegas: Beating Kernel Fuzzing Odds with Reinforcement Learning" + // paper, let's use the ratio of "new max signal" to "execution time". + // Unlike the paper, let's take the raw value of it instead of its ratio to the average one. + reward = float64(newSignal) / res.Info.ElapsedSec + if req.stat == statGenerate { + fuzzer.genSignalSpeed.Save(float64(newSignal), res.Info.ElapsedSec) + } else if req.stat == statFuzz { + fuzzer.fuzzSignalSpeed.Save(float64(newSignal), res.Info.ElapsedSec) + } + } + if !req.genFuzzAction.Empty() { + fuzzer.genFuzzMAB.SaveReward(req.genFuzzAction, reward) + } } func (fuzzer *Fuzzer) triageProgCall(p *prog.Prog, info *ipc.CallInfo, call int, - flags ProgTypes) { + flags ProgTypes) int { prio := signalPrio(p, info, call) newMaxSignal := fuzzer.Cover.addRawMaxSignal(info.Signal, prio) if newMaxSignal.Empty() { - return + return 0 } if flags&progInTriage > 0 { // We are already triaging this exact prog. // All newly found coverage is flaky. fuzzer.Logf(2, "found new flaky signal in call %d in %s", call, p) - return + return newMaxSignal.Len() } fuzzer.Logf(2, "found new signal in call %d in %s", call, p) fuzzer.startJob(&triageJob{ @@ -140,6 +175,7 @@ func (fuzzer *Fuzzer) triageProgCall(p *prog.Prog, info *ipc.CallInfo, call int, flags: flags, jobPriority: triageJobPrio(flags), }) + return newMaxSignal.Len() } func signalPrio(p *prog.Prog, info *ipc.CallInfo, call int) (prio uint8) { @@ -184,21 +220,18 @@ func (fuzzer *Fuzzer) nextInput() *Request { } } - // Either generate a new input or mutate an existing one. - mutateRate := 0.95 - if !fuzzer.Config.Coverage { - // If we don't have real coverage signal, generate programs - // more frequently because fallback signal is weak. - mutateRate = 0.5 - } rnd := fuzzer.rand() - if rnd.Float64() < mutateRate { - req := mutateProgRequest(fuzzer, rnd) - if req != nil { - return req - } + action := fuzzer.genFuzzMAB.Action(rnd) + + var req *Request + if action.Arm == statFuzz { + req = mutateProgRequest(fuzzer, rnd) + } + if req == nil { + req = genProgRequest(fuzzer, rnd) } - return genProgRequest(fuzzer, rnd) + req.genFuzzAction = action + return req } func (fuzzer *Fuzzer) startJob(newJob job) { diff --git a/pkg/fuzzer/fuzzer_test.go b/pkg/fuzzer/fuzzer_test.go index 5c09201097be..4d9e0d541992 100644 --- a/pkg/fuzzer/fuzzer_test.go +++ b/pkg/fuzzer/fuzzer_test.go @@ -85,6 +85,8 @@ func TestFuzz(t *testing.T) { t.Logf("%s", p.Serialize()) } + t.Logf("stats: %+v", fuzzer.Stats().Named) + assert.Equal(t, len(tf.expectedCrashes), len(tf.crashes), "not all expected crashes were found") } diff --git a/pkg/fuzzer/stats.go b/pkg/fuzzer/stats.go index 044febc64712..223336422231 100644 --- a/pkg/fuzzer/stats.go +++ b/pkg/fuzzer/stats.go @@ -42,5 +42,7 @@ func (fuzzer *Fuzzer) Stats() Stats { for k, v := range fuzzer.stats { ret.Named[k] = v } + ret.Named["exec gen, sig/sec*1000"] = uint64(fuzzer.genSignalSpeed.Load() * 1000) + ret.Named["exec fuzz, sig/sec*1000"] = uint64(fuzzer.fuzzSignalSpeed.Load() * 1000) return ret } diff --git a/pkg/ipc/ipc.go b/pkg/ipc/ipc.go index c90e56caf936..4af24e79e0d7 100644 --- a/pkg/ipc/ipc.go +++ b/pkg/ipc/ipc.go @@ -95,8 +95,9 @@ type CallInfo struct { } type ProgInfo struct { - Calls []CallInfo - Extra CallInfo // stores Signal and Cover collected from background threads + Calls []CallInfo + Extra CallInfo // stores Signal and Cover collected from background threads + ElapsedSec float64 // total execution time in seconds } type Env struct { @@ -275,7 +276,9 @@ func (env *Env) Exec(opts *ExecOpts, p *prog.Prog) (output []byte, info *ProgInf return } + start := osutil.MonotonicNano() output, hanged, err0 = env.cmd.exec(opts, progData) + elapsedNs := osutil.MonotonicNano() - start if err0 != nil { env.cmd.close() env.cmd = nil @@ -283,6 +286,9 @@ func (env *Env) Exec(opts *ExecOpts, p *prog.Prog) (output []byte, info *ProgInf } info, err0 = env.parseOutput(p, opts) + if info != nil { + info.ElapsedSec = float64(elapsedNs) / float64(1e9) + } if info != nil && env.config.Flags&FlagSignal == 0 { addFallbackSignal(p, info) } diff --git a/pkg/learning/mab.go b/pkg/learning/mab.go new file mode 100644 index 000000000000..34fe68dd03ce --- /dev/null +++ b/pkg/learning/mab.go @@ -0,0 +1,77 @@ +// Copyright 2024 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package learning + +import ( + "math/rand" + "sync" +) + +type Action[T comparable] struct { + Arm T + index int +} + +func (a Action[T]) Empty() bool { + return a == Action[T]{} +} + +type countedValue struct { + value float64 + count int64 +} + +func (cv *countedValue) update(value, minStep float64) { + // Using larger steps at the beginning allows us to + // converge faster to the actual value. + // The minStep limit ensures that we can still track + // non-stationary problems. + cv.count++ + step := 1.0 / float64(cv.count) + if step < minStep { + step = minStep + } + cv.value += (value - cv.value) * step +} + +// PlainMAB is a very simple epsylon-greedy MAB implementation. +type PlainMAB[T comparable] struct { + MinLearningRate float64 + ExplorationRate float64 + + mu sync.RWMutex + arms []T + weights []countedValue +} + +func (p *PlainMAB[T]) AddArms(arms ...T) { + p.mu.Lock() + defer p.mu.Unlock() + for _, arm := range arms { + p.arms = append(p.arms, arm) + p.weights = append(p.weights, countedValue{0, 0}) + } +} + +func (p *PlainMAB[T]) Action(r *rand.Rand) Action[T] { + p.mu.RLock() + defer p.mu.RUnlock() + var pos int + if r.Float64() < p.ExplorationRate { + pos = r.Intn(len(p.arms)) + } else { + for i := 1; i < len(p.arms); i++ { + if p.weights[i].value > p.weights[pos].value { + pos = i + } + } + } + return Action[T]{Arm: p.arms[pos], index: pos} +} + +func (p *PlainMAB[T]) SaveReward(action Action[T], reward float64) { + p.mu.Lock() + defer p.mu.Unlock() + p.weights[action.index].update(reward, p.MinLearningRate) +} diff --git a/pkg/learning/mab_test.go b/pkg/learning/mab_test.go new file mode 100644 index 000000000000..2f93e6c2fa5e --- /dev/null +++ b/pkg/learning/mab_test.go @@ -0,0 +1,66 @@ +// Copyright 2024 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package learning + +import ( + "math/rand" + "testing" + + "github.com/google/syzkaller/pkg/testutil" + "github.com/stretchr/testify/assert" +) + +func TestMABSmallDiff(t *testing.T) { + r := rand.New(testutil.RandSource(t)) + bandit := &PlainMAB[int]{ + MinLearningRate: 0.0001, + ExplorationRate: 0.1, + } + arms := []float64{0.65, 0.7} + for i := range arms { + bandit.AddArms(i) + } + const steps = 40000 + counts := runMAB(r, bandit, arms, steps) + t.Logf("counts: %v", counts) + assert.Greater(t, counts[1], steps/4*3) +} + +func TestNonStationaryMAB(t *testing.T) { + r := rand.New(testutil.RandSource(t)) + bandit := &PlainMAB[int]{ + MinLearningRate: 0.02, + ExplorationRate: 0.04, + } + + arms := []float64{0.2, 0.7, 0.5, 0.1} + for i := range arms { + bandit.AddArms(i) + } + + const steps = 25000 + counts := runMAB(r, bandit, arms, steps) + t.Logf("initially: %v", counts) + + // Ensure that we've found the best arm. + assert.Greater(t, counts[1], steps/2) + + // Now change the best arm's avg reward. + arms[3] = 0.9 + counts = runMAB(r, bandit, arms, steps) + t.Logf("after reward change: %v", counts) + assert.Greater(t, counts[3], steps/2) +} + +func runMAB(r *rand.Rand, bandit *PlainMAB[int], arms []float64, steps int) []int { + counts := make([]int, len(arms)) + for i := 0; i < steps; i++ { + action := bandit.Action(r) + // TODO: use normal distribution? + reward := r.Float64() * arms[action.Arm] + counts[action.Arm]++ + bandit.SaveReward(action, reward) + } + return counts +} diff --git a/pkg/learning/window.go b/pkg/learning/window.go new file mode 100644 index 000000000000..2cbfe5d3bfec --- /dev/null +++ b/pkg/learning/window.go @@ -0,0 +1,67 @@ +// Copyright 2024 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package learning + +import "sync" + +type Number interface { + int | int64 | float64 +} + +type RunningAverage[T Number] struct { + window []T + mu sync.RWMutex + pos int + total T +} + +func NewRunningAverage[T Number](size int) *RunningAverage[T] { + return &RunningAverage[T]{ + window: make([]T, size), + } +} + +func (ra *RunningAverage[T]) SaveInt(val int) { + ra.Save(T(val)) +} + +func (ra *RunningAverage[T]) Save(val T) { + ra.mu.Lock() + defer ra.mu.Unlock() + prev := ra.window[ra.pos] + ra.window[ra.pos] = val + ra.total += val - prev + ra.pos = (ra.pos + 1) % len(ra.window) +} + +func (ra *RunningAverage[T]) Load() T { + ra.mu.RLock() + defer ra.mu.RUnlock() + return ra.total +} + +type RunningRatioAverage[T Number] struct { + values *RunningAverage[T] + divideBy *RunningAverage[T] +} + +func NewRunningRatioAverage[T Number](size int) *RunningRatioAverage[T] { + return &RunningRatioAverage[T]{ + values: NewRunningAverage[T](size), + divideBy: NewRunningAverage[T](size), + } +} + +func (rra *RunningRatioAverage[T]) Save(nomDelta, denomDelta T) { + rra.values.Save(nomDelta) + rra.divideBy.Save(denomDelta) +} + +func (rra *RunningRatioAverage[T]) Load() float64 { + denom := rra.divideBy.Load() + if denom == 0 { + return 0 + } + return float64(rra.values.Load()) / float64(denom) +} diff --git a/pkg/learning/window_test.go b/pkg/learning/window_test.go new file mode 100644 index 000000000000..b01dd300c607 --- /dev/null +++ b/pkg/learning/window_test.go @@ -0,0 +1,35 @@ +// Copyright 2024 syzkaller project authors. All rights reserved. +// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. + +package learning + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestRunningRatioAverage(t *testing.T) { + ra := NewRunningRatioAverage[float64](3) + for i := 0; i < 4; i++ { + ra.Save(2.0, 1.0) + } + assert.InDelta(t, 2.0, ra.Load(), 0.1) + for i := 0; i < 4; i++ { + ra.Save(3.0, 2.0) + } + assert.InDelta(t, 1.5, ra.Load(), 0.1) +} + +func TestRunningAverage(t *testing.T) { + ra := NewRunningAverage[int](3) + assert.Equal(t, 0, ra.Load()) + ra.Save(1) + assert.Equal(t, 1, ra.Load()) + ra.Save(2) + assert.Equal(t, 3, ra.Load()) + for i := 4; i <= 10; i++ { + ra.SaveInt(i) + } + assert.Equal(t, 8+9+10, ra.Load()) +} diff --git a/syz-manager/http.go b/syz-manager/http.go index 22af491fde57..d20c8fefba30 100644 --- a/syz-manager/http.go +++ b/syz-manager/http.go @@ -176,7 +176,7 @@ func (mgr *Manager) collectStats() []UIStat { for k, v := range rawStats { val := "" switch { - case k == "fuzzer jobs" || strings.HasPrefix(k, "rpc exchange"): + case k == "fuzzer jobs" || strings.HasPrefix(k, "rpc exchange") || strings.Contains(k, "/sec"): val = fmt.Sprint(v) default: val = rateStat(v, secs) diff --git a/syz-manager/rpc.go b/syz-manager/rpc.go index 0c94a5793de2..939aa04710a1 100644 --- a/syz-manager/rpc.go +++ b/syz-manager/rpc.go @@ -325,7 +325,9 @@ func (runner *Runner) doneRequest(resp rpctype.ExecutionResult, fuzzerObj *fuzze } info.Extra.Cover = runner.instModules.Canonicalize(info.Extra.Cover) info.Extra.Signal = runner.instModules.Canonicalize(info.Extra.Signal) - fuzzerObj.Done(req, &fuzzer.Result{Info: info}) + fuzzerObj.Done(req, &fuzzer.Result{ + Info: info, + }) } func (runner *Runner) newRequest(req *fuzzer.Request) rpctype.ExecutionRequest {