-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathrickshaw-post-process-tools
executable file
·239 lines (220 loc) · 9.37 KB
/
rickshaw-post-process-tools
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
#!/usr/bin/perl
# -*- mode: perl; indent-tabs-mode: nil; perl-indent-level: 4 -*-
# vim: autoindent tabstop=4 shiftwidth=4 expandtab softtabstop=4 filetype=perl
#
# Author: Andrew Theurer
#
# Rickshaw-post-process-tools will run each tool-specific post-process script
# for every tool it finds from each "collector". A collector is just an instance
# of a engine-script execution which also executed tools. This script should
# only be run after those are complete and the data has been moved to
# $base_run_dir/run/tool-data (a completed execution of rickshaw-run).
#
# Depending on the endpoint types used, a collector could be in a container on a remote
# host, could be a pod/container, a VM, etc. If a engine-script execution
# was also running a benchmark client or server (common for localhost and remotehost
# endpoints), then the name of the collector is "client" or "server". If it was
# not running a benchmark client or server, the collector name will be dependent
# on what the endpoint uses, for example the k8s endpoint has collectors named
# "master" and "worker".
#
# The tool data is organized under the $base_run_dir/run/tool-data, which collector
# name first, then the collector ID (a number), then a directory for each tool that
# was run. Once tool-specific post-processors are complete, there should be a
# "metric-data.json" file containing all of the tool's metrics. Another script,
# "rickshaw-post-process-consolidate" is responsible for combining all the data from
# these files into the rickshaw-result.json file. Once that file has been created,
# rickshaw-export can be used to export to other formats, like CommonDataModel/OpenSearch.
use strict;
use warnings;
use Cwd;
use Data::UUID;
use File::pushd;
use File::Basename;
use File::Temp qw(tempdir);
use File::Copy;
use File::Path qw(make_path);
use JSON::XS;
use JSON::Validator;
use Data::Dumper;
BEGIN {
if (!(exists $ENV{'TOOLBOX_HOME'} && -d "$ENV{'TOOLBOX_HOME'}/perl")) {
print "This script requires libraries that are provided by the toolbox project.\n";
print "Toolbox can be acquired from https://github.com/perftool-incubator/toolbox and\n";
print "then use 'export TOOLBOX_HOME=/path/to/toolbox' so that it can be located.\n";
exit 1;
}
if (!(exists $ENV{'RICKSHAW_HOME'} && -d "$ENV{'RICKSHAW_HOME'}")) {
print "This script requires libraries that are provided by the rickshaw project.\n";
print "Since this script is part of rickshaw you should already have it. You can then\n";
print "use 'export RICKSHAW_HOME=/path/to/rickshaw so that it can be located.\n";
exit 1;
}
}
use lib "$ENV{'TOOLBOX_HOME'}/perl";
use toolbox::json;
use toolbox::logging;
use lib "$ENV{'RICKSHAW_HOME'}";
use rickshaw::fixup;
$toolbox::logging::debug = 0;
my $ug = Data::UUID->new;
my %run; # A multi-dimensional, nested hash, schema TBD
# This hash documents what was run.
my $base_run_dir;
my $run_file; # 'rickshaw-run.json' containing all configuration data
# (generated by 'rickshaw-run' once a run is complete)
my $result_file; # 'rickshaw-result.json' containing all configuration and result data
# (generated by this script)
my $file_rc;
sub usage {
print "\nusage:\n\n";
print "--base-run-dir Directory where result data is located for a previous 'rickshaw-run'\n";
}
sub dump_params {
my $params_ref = shift;
my $cs_id = shift;
my $params_str = "";
foreach my $param (@{ $params_ref }) {
my $arg = $$param{'arg'};
my $val = $$param{'val'};
if (defined $cs_id) {
$val =~ s/\%client-id\%/$cs_id/;
}
$params_str .= " --" . $arg . "=" . $val;
}
$params_str =~ s/^\s//;
return $params_str;
}
my $rickshaw_project_dir;
{
# Get the absolute path of the rickshaw project directory
my $dir = pushd(dirname($0));
$rickshaw_project_dir = getcwd();
}
my $tool_schema_file = $rickshaw_project_dir . "/schema/tool.json";
my $run_schema_file = $rickshaw_project_dir . "/schema/run.json";
my $result_schema_file = $rickshaw_project_dir . "/schema/result.json";
my %bench_config;
# Process the cmdline params
while (scalar @ARGV > 0) {
my $p = shift @ARGV;
debug_log(sprintf "processing \@ARGV, param: [%s]\n", $p);
my $arg;
my $val;
if ( $p =~ /^\-\-(\S+)/ ) {
$arg = $1;
if ( $arg =~ /^(\S+)=(.*)/ ) { # '--arg=val'
$arg = $1;
$val = $2;
} else { # '--arg val'
$val = shift @ARGV;
}
} else {
print "[ERROR]malformed cmdline parameter: %s\n";
usage;
exit 1;
}
debug_log(sprintf "processing \@ARGV, arg is: [%s], val is: [%s]\n", $arg, $val);
if ($arg =~ /^help$/) {
usage;
exit 0;
} elsif ($arg =~ /^base-run-dir$/) {
debug_log(sprintf "argument: [%s]\n", $arg);
$base_run_dir = $val;
} else {
printf "[ERROR]argument not valid: [%s]\n", $arg;
usage;
exit 1;
}
}
# Ensure the run-dir hase absolute path
{
my $dir = pushd($base_run_dir);
debug_log(sprintf "pushd to [%s]\n", $base_run_dir);
my $cwd = getcwd();
debug_log(sprintf "cwd [%s]\n", $cwd);
$base_run_dir = $cwd;
}
my $config_dir = $base_run_dir . "/config";
my $run_dir = $base_run_dir . "/run";
my $iter_dir = $run_dir . "/iterations";
# Load the existing rickshaw-run.json
$run_file = $run_dir . "/rickshaw-run.json";
my $fixup_status = rickshaw_run_schema_fixup($run_file, $run_schema_file);
if ($fixup_status != 0) {
exit $fixup_status;
}
debug_log(sprintf "Opening %s for normal processing\n", $run_file);
($file_rc, my $run_ref) = get_json_file($run_file, $run_schema_file);
if ($file_rc > 0 or ! defined $run_ref) {
print "Could not open the tool config file\n";
exit 1;
} else {
%run = %{ $run_ref };
# TODO checks for minimum fileds for valid run
}
if (! exists $run{'tools-dir'}) {
# Try to locate the tools subproject based on the rickshaw_project_dir
{
my $dir = pushd($rickshaw_project_dir . "/../../../subprojects/tools/");
$run{'tools-dir'} = getcwd();
printf "Assuming tools-dir is %s\n", $run{'tools-dir'};
}
}
printf "Launching a post-process job for each tool * each collector\n";
my %tools_config;
my @pids;
my $tool_dir = "tool-data";
if (opendir(TOOLDIR, $run_dir . "/" . $tool_dir)) {
my @collectors = grep(/\w+/, readdir(TOOLDIR));
for my $collector (@collectors) {
my $collector_dir = $tool_dir . "/" . $collector; # $run_dir/tool-data/[client|server|worker|master|profiler]
if (opendir(COLLECTORDIR, $run_dir . "/" . $collector_dir)) {
# sample directory entry: remotehost-1-kernel-1
my @engines = grep (/\w+-\d+-\w+-\d+/, readdir(COLLECTORDIR));
for my $engine (@engines) {
my $engine_dir = $collector_dir . "/" . $engine; # $run_dir/tool-data/[client|server|worker|master]/[0-N]
if (opendir(ENGDIR, $run_dir . "/" . $engine_dir)) {
my @tools = grep(/\w+/, readdir(ENGDIR));
printf "Working on tool dir %s\n", $engine_dir;
for my $tool (@tools) {
if (! exists($tools_config{$tool})) {
# Load a tool configuration for every tool the user is asking for
my $tool_config = $run{'tools-dir'} . "/" . $tool . "/rickshaw.json";
($file_rc, my $json_ref) = get_json_file($tool_config, $tool_schema_file);
if ($file_rc > 0 or ! defined $json_ref) {
print "Could not open the tool config file\n";
exit 1;
}
if (! exists $$json_ref{'tool'} or $$json_ref{'tool'} ne $tool) {
printf "In the following tool config, found in %s, the value for 'tool'";
printf "does not match the tool name, '%s'\n", $tool_config, $tool;
printf "Either correct the tool config, or remove this tool from your test\n";
my $coder = JSON::XS->new->canonical->pretty;
printf "%s", $coder->encode($json_ref);
exit 1;
}
$tools_config{$$json_ref{'tool'}} = $json_ref;
}
if (my $pid = fork) {
push(@pids, $pid);
} else {
my $pushd_dir = pushd($run_dir . "/" . $engine_dir . "/" . $tool);
my $pp_cmd = $tools_config{$tool}{'controller'}{'post-script'};
$pp_cmd =~ s/\%tool-dir\%/$run{'tools-dir'}\/$tool\//g;
$pp_cmd =~ s/\%run-dir\%/$run_dir\//g;
$pp_cmd =~ s/\%config-dir\%/$config_dir\//g;
if (-e $pp_cmd) {
system($pp_cmd . " >post-process-output.txt 2>&1");
}
exit; # Child exits after post-processing
}
}
}
}
}
}
}
printf "Waiting for %d post-processing jobs to complete\n", scalar @pids;
while (wait() > -1) {}
print "Post-processing complete\n";