-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathbenchmark.py
executable file
·375 lines (335 loc) · 11.7 KB
/
benchmark.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
#!/usr/bin/env python3
# 2016, Georg Sauthoff <[email protected]>, GPLv3+
import argparse
import collections
import csv
import datetime
import itertools
import logging
# importing it conditionally iff svg generation is selected
# otherwise, it may fail on a system with minimal matplotlib
# install, i.e. where one of the backends loaded by default
# throws
#import matplotlib.pyplot as plt
# importing it conditionally iff csv or not quiet
#import numpy as np
import os
import subprocess
import sys
import tempfile
import time
try:
import colorlog
have_colorlog = True
except ImportError:
have_colorlog = False
def mk_arg_parser():
p = argparse.ArgumentParser(
formatter_class=argparse.RawDescriptionHelpFormatter,
description='run command multiple times and gather stats',
epilog='''Examples:
Run 3 programs 20 times each and write stats to stdout and the raw
data to a file:
$ benchmark --cmd ./find_memchr ./find_find --raw raw.dat -n 20 \\
./find_unroll2 3000 in
Create boxplot SVG (and nicely format the stdout and also write
the stats to a CSV file):
$ benchmark --input raw.dat --svg rss.svg --csv rss.csv \\
| column -t -s, -o ' | '
In case the benchmarked program needs some options the `--` delimiter
has its usual meaning (also explicitly specifiying a tag):
$ benchmark --tags mode2 -n 1000 -- ./find_unroll2 --mode 2
# 2016, Georg Sauthoff <[email protected]>, GPLv3+
'''
)
p.add_argument('argv', nargs='*', help='ARG0.. of the child')
p.add_argument('--cmd', '--cmds', nargs='+', default=[],
help='extra commands to run')
p.add_argument('--cols', nargs='+', default=[1,2,3,4],
help='columns to generate stats for')
p.add_argument('--csv', nargs='?', const='benchmark.csv',
help='also write results as csv')
p.add_argument('--debug', nargs='?', metavar='FILE',
const='benchmark.log', help='log debug messages into file')
p.add_argument('--graph-item', help='item to plot in a graph')
p.add_argument('--height', type=float, help='height of the graph (inch)')
p.add_argument('--input', '-i', metavar='FILE',
help='include raw data from a previous run')
p.add_argument('--items', nargs='+', default=['wall', 'user', 'sys', 'rss'],
help='names for the selected columns')
p.add_argument('--null-out', type=bool, default=True,
help='redirect stdout to /dev/null')
p.add_argument('--pstat', action=InitPstat,
help='set options for `perf stat` instead of GNU time')
p.add_argument('--precision', type=int, default=3,
help='precision for printing values')
p.add_argument('--quiet', '-q', action='store_true', default=False,
help='avoid printing table to stdout')
p.add_argument('--raw', nargs='?', metavar='FILE', const='data.csv',
help='write measurement results to file')
p.add_argument('--repeat', '-n', type=int, default=2,
help='number of times to repeat the measurement')
p.add_argument('--sleep', type=float, default=0.0, metavar='SECONDS',
help='sleep between runs')
p.add_argument('--svg', nargs='?', const='benchmark.svg',
help='write boxplot')
p.add_argument('--tags', nargs='+', default=[],
help='alternative names for the different commands')
p.add_argument('--time', default='/usr/bin/time',
help='measurement program (default: GNU time)')
p.add_argument('--time-args', nargs='+',
default=[ '--append', '--format', '%e,%U,%S,%M', '--output', '$<' ],
help='default arguments to measurement program')
p.add_argument('--timeout', help='timeout for waiting on a child')
p.add_argument('--title', help='title of the graph')
p.add_argument('--width', type=float, help='width of the graph (inch)')
p.add_argument('--xlabel', default='experiment', help='x-axis label')
p.add_argument('--xrotate', type=int,
help='rotate x-labels (default: 75 degrees if more than 4 present')
p.add_argument('--ylabel', default='time (s)', help='y-axis label')
p.add_argument('--ymax', type=float,
help='set upper y-axis limit')
p.add_argument('--ymin', type=float, default=0.0,
help='set lower y-axis limit')
return p
class InitPstat(argparse.Action):
def __init__(self, option_strings, dest, **kwargs):
super(InitPstat, self).__init__(
option_strings, dest, nargs=0, **kwargs)
def __call__(self, parser, args, values, option_string=None):
args.time = 'perfstat.sh'
args.time_args = [ '-o', '$<' ]
args.cols = list(range(1,12))
args.items = [ 'nsec','cswitch','cpu_migr','page_fault','cycles','ghz','ins','ins_cyc','br','br_mis','br_mis_rate' ]
if not args.graph_item:
args.graph_item = 'ins_cyc'
args.title = 'Counter ({})'.format(args.graph_item)
args.ylabel = 'rate'
def parse_args(xs = None):
arg_parser = mk_arg_parser()
if xs or xs == []:
args = arg_parser.parse_args(xs)
else:
args = arg_parser.parse_args()
if not args.argv and not args.input:
raise ValueError('Neither cmd+args nor --input option present')
if args.debug:
setup_file_logging(args.debug)
if args.argv:
args.cmd = [ args.argv[0] ] + args.cmd
args.argv = args.argv[1:]
args.cols = [ int(x) for x in args.cols ]
if args.tags and args.tag.__len__() != args.cmd.__len__():
raise ValueError('not enough tags specified')
if not args.tags:
args.tags = [ os.path.basename(x) for x in args.cmd ]
if not args.graph_item:
args.graph_item = args.items[0]
if not args.title:
args.title = 'Runtime ({})'.format(args.graph_item)
if args.svg:
#import matplotlib.pyplot as plt
global matplotlib
global plt
matplotlib = __import__('matplotlib.pyplot', globals(), locals())
plt = matplotlib.pyplot
if args.csv or not args.quiet or args.svg:
global np
numpy = __import__('numpy', globals(), locals())
np = numpy
#import numpy as np
return args
log_format = '%(asctime)s - %(levelname)-8s - %(message)s'
log_date_format = '%Y-%m-%d %H:%M:%S'
def mk_formatter():
f = logging.Formatter(log_format, log_date_format)
return f
def mk_logger():
log = logging.getLogger() # root logger
log.setLevel(logging.DEBUG)
#log.setLevel(logging.INFO)
if have_colorlog:
cformat = '%(log_color)s' + log_format
cf = colorlog.ColoredFormatter(cformat, log_date_format,
log_colors = { 'DEBUG': 'reset', 'INFO': 'reset',
'WARNING' : 'bold_yellow' , 'ERROR': 'bold_red',
'CRITICAL': 'bold_red'})
else:
cf = logging.Formatter(log_format, log_date_format)
ch = logging.StreamHandler()
ch.setLevel(logging.WARNING)
if os.isatty(2):
ch.setFormatter(cf)
else:
ch.setFormatter(f)
log.addHandler(ch)
return logging.getLogger(__name__)
log = mk_logger()
def setup_file_logging(filename):
log = logging.getLogger()
fh = logging.FileHandler(filename)
fh.setLevel(logging.DEBUG)
f = logging.Formatter(log_format + ' - [%(name)s]', log_date_format)
fh.setFormatter(f)
log.addHandler(fh)
# Reasons for using an external `time` command instead of
# calling e.g. `getrusage()`:
# - the forked child will start
# with the RSS of the python parent - thus, it will be reported
# too high if child actually uses less memory
# - same code path as for other measurement tools
# - elapsed time would have to be measured separately, otherwise
def measure(tag, cmd, args):
errors = 0
if args.null_out:
stdout = subprocess.DEVNULL
else:
stdout = None
with tempfile.NamedTemporaryFile(mode='w+', newline='') as temp_file:
time_args = args.time_args.copy()
time_args[time_args.index('$<')] = temp_file.name
a = [ args.time ] + time_args + [cmd] + args.argv
rc = -1
with subprocess.Popen(a, stdout=stdout) as p:
rc = p.wait(timeout=args.timeout)
if rc != 0:
log.error('Command {} failed with rc: {}'.format(cmd, rc))
errors = errors + 1
reader = csv.reader(temp_file)
r = [tag] + next(reader)
r.append(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
r.append(rc)
r.append(cmd)
r.append(str(args.argv))
return (r, errors)
def execute(args):
xs = []
esum = 0
for (tag, cmd) in zip(args.tags, args.cmd):
rs = []
for i in range(args.repeat):
try:
m, errors = measure(tag, cmd, args)
if args.sleep > 0:
time.sleep(args.sleep)
rs.append(m)
esum = esum + errors
except StopIteration:
esum = esum + 1
log.error("Couldn't read measurements from teporary file"
+ '- {} - {}'.format(tag, i))
xs.append( (tag, rs) )
return (xs, esum)
def read_raw(filename):
with open(filename, 'r', newline='') as f:
reader = csv.reader(f)
rs = []
next(reader)
xs = [ (k, list(l))
for (k, l) in itertools.groupby(reader, lambda row: row[0])]
# is equivalent to:
# prev = None
# xs = []
# l = []
# for row in reader:
# if prev != row[0]:
# l = []
# xs.append( (row[0], l) )
# l.append(row)
# prev = row[0]
return xs
def write_raw(rrs, args, filename):
with open(filename, 'a', newline='') as f:
writer = csv.writer(f)
writer.writerow(['tag'] + args.items + ['date', 'rc', 'cmd', 'args' ])
for rs in rrs:
for row in rs[1]:
writer.writerow(row)
def write_svg(ys, args, filename):
tags, items_l = zip(*ys)
xrotate = args.xrotate
if not xrotate and tags.__len__() > 4:
xrotate = 75
if args.width and args.height:
plt.figure(figsize=(args.width, args.height))
r = plt.boxplot( [ items[args.graph_item] for items in items_l ],
labels=tags )
ymax = args.ymax
if not args.ymax:
m = np.amax([np.amax(items[args.graph_item]) for items in items_l ])
ymax = np.ceil(m + (m - args.ymin) / 10)
plt.ylim(ymin=args.ymin, ymax=ymax)
plt.title(args.title)
if xrotate:
plt.xticks(rotation=xrotate) # 70 # 90
plt.xlabel(args.xlabel)
plt.ylabel(args.ylabel)
plt.tight_layout()
plt.savefig(filename)
# normally, we would just use a csv.writer() but
# we want to control the number of significant figures
def write_csv(zs, args, f):
if not zs:
return
header = ['tag'] + list(zs[0][1]._fields)
fstr = '{:1.'+str(args.precision)+'f}'
print(','.join(header), file=f)
for (tag, stat) in zs:
row = [tag] + list(stat)
srow = []
for r in row:
if type(r) is float or type(r) is np.float64:
srow.append(fstr.format(r))
else:
srow.append(str(r))
print(','.join(srow), file=f)
def get_items(rs, args):
m = np.zeros(rs.__len__(), dtype=[(x, 'float64') for x in args.items ] )
i = 0
for row in rs:
j = 0
for c in args.cols:
v = row[c]
m[i][j] = 0 if v == '' else v
j = j + 1
i = i + 1
return m
Stat = collections.namedtuple('Stat',
['n', 'min', 'Q1', 'median', 'Q3', 'max', 'mean', 'dev', 'item' ])
def gen_stats(items, args):
#for name in items.dtype.names:
name = args.graph_item
c = items[name]
ps = np.percentile(c, [25, 50, 75] )
# there is also np.median()
s = Stat(n=c.__len__(), min=np.amin(c), Q1=ps[0], median=ps[1],
Q3=ps[2], max=np.amax(c),
mean=np.mean(c), dev=np.std(c), item=name)
return s
def run(args):
xs = []
errors = 0
if args.input:
xs = xs + read_raw(args.input)
if args.cmd:
rxs, errors = execute(args)
xs = xs + rxs
if args.csv or not args.quiet or args.svg:
ys = [ (tag, get_items(rs, args)) for (tag, rs) in xs ]
if args.csv or not args.quiet:
zs = [ (tag, gen_stats(items, args)) for (tag, items) in ys ]
if args.csv:
with open(args.csv, 'w') as f:
write_csv(zs, args, f)
if not args.quiet:
write_csv(zs, args, sys.stdout)
if args.raw:
write_raw(xs, args, args.raw)
if args.svg:
write_svg(ys, args, args.svg)
return int(errors != 0)
def main():
args = parse_args()
return run(args)
if __name__ == '__main__':
sys.exit(main())