rickshaw-post-process-tools

#!/usr/bin/perl
# -*- mode: perl; indent-tabs-mode: nil; perl-indent-level: 4 -*-
# vim: autoindent tabstop=4 shiftwidth=4 expandtab softtabstop=4 filetype=perl
#
# Author: Andrew Theurer
#
# Rickshaw-post-process-tools will run each tool-specific post-process script
# for every tool it finds from each "collector".  A collector is just an instance
# of a engine-script execution which also executed tools.  This script should
# only be run after those are complete and the data has been moved to
# $base_run_dir/run/tool-data (a completed execution of rickshaw-run).
#
# Depending on the endpoint types used, a collector could be in a container on a remote
# host, could be a pod/container, a VM, etc.  If a engine-script execution
# was also running a benchmark client or server (common for localhost and remotehost
# endpoints), then the name of the collector is "client" or "server".  If it was
# not running a benchmark client or server, the collector name will be dependent
# on what the endpoint uses, for example the k8s endpoint has collectors named
# "master" and "worker".
#
# The tool data is organized under the $base_run_dir/run/tool-data, which collector
# name first, then the collector ID (a number), then a directory for each tool that
# was run.  Once tool-specific post-processors are complete, there should be a 
# "metric-data.json" file containing all of the tool's metrics.  Another script,
# "rickshaw-post-process-consolidate" is responsible for combining all the data from
# these files into the rickshaw-result.json file.  Once that file has been created,
# rickshaw-export can be used to export to other formats, like CommonDataModel/OpenSearch.

use strict;
use warnings;
use Cwd;
use Data::UUID;
use File::pushd;
use File::Basename;
use File::Temp qw(tempdir);
use File::Copy;
use File::Path qw(make_path);
use JSON::XS;
use JSON::Validator;
use Data::Dumper;

BEGIN {
    if (!(exists $ENV{'TOOLBOX_HOME'} && -d "$ENV{'TOOLBOX_HOME'}/perl")) {
        print "This script requires libraries that are provided by the toolbox project.\n";
        print "Toolbox can be acquired from https://github.com/perftool-incubator/toolbox and\n";
        print "then use 'export TOOLBOX_HOME=/path/to/toolbox' so that it can be located.\n";
        exit 1;
    }

    if (!(exists $ENV{'RICKSHAW_HOME'} && -d "$ENV{'RICKSHAW_HOME'}")) {
        print "This script requires libraries that are provided by the rickshaw project.\n";
        print "Since this script is part of rickshaw you should already have it.  You can then\n";
        print "use 'export RICKSHAW_HOME=/path/to/rickshaw so that it can be located.\n";
        exit 1;
    }
}
use lib "$ENV{'TOOLBOX_HOME'}/perl";
use toolbox::json;
use toolbox::logging;

use lib "$ENV{'RICKSHAW_HOME'}";
use rickshaw::fixup;

$toolbox::logging::debug = 0;

my $ug = Data::UUID->new;
my %run; # A multi-dimensional, nested hash, schema TBD
         # This hash documents what was run.

my $base_run_dir;
my $run_file;    # 'rickshaw-run.json' containing all configuration data
                 # (generated by 'rickshaw-run' once a run is complete)
my $result_file; # 'rickshaw-result.json' containing all configuration and result data
                 # (generated by this script)
my $file_rc;

sub usage {
    print "\nusage:\n\n";
    print "--base-run-dir  Directory where result data is located for a previous 'rickshaw-run'\n";
}

sub dump_params {
    my $params_ref = shift;
    my $cs_id = shift;
    my $params_str = "";
    foreach my $param (@{ $params_ref }) {
        my $arg = $$param{'arg'};
        my $val = $$param{'val'};
        if (defined $cs_id) {
            $val =~ s/\%client-id\%/$cs_id/;
        }
            $params_str .= " --" . $arg . "=" . $val;
    }
    $params_str =~ s/^\s//;
    return $params_str;
}

my $rickshaw_project_dir;
{
    # Get the absolute path of the rickshaw project directory
    my $dir = pushd(dirname($0));
    $rickshaw_project_dir = getcwd();
}
my $tool_schema_file = $rickshaw_project_dir . "/schema/tool.json";
my $run_schema_file = $rickshaw_project_dir . "/schema/run.json";
my $result_schema_file = $rickshaw_project_dir . "/schema/result.json";
my %bench_config;

# Process the cmdline params
while (scalar @ARGV > 0) {
    my $p = shift @ARGV;
    debug_log(sprintf "processing \@ARGV, param: [%s]\n", $p);
    my $arg;
    my $val;

    if ( $p =~ /^\-\-(\S+)/ ) {
        $arg = $1;
        if ( $arg =~ /^(\S+)=(.*)/ ) { # '--arg=val'
            $arg = $1;
            $val = $2;
        } else { # '--arg val'
            $val = shift @ARGV;
        }
    } else {
        print "[ERROR]malformed cmdline parameter: %s\n";
        usage;
        exit 1;
    }
    debug_log(sprintf "processing \@ARGV, arg is: [%s], val is: [%s]\n", $arg, $val);
    if ($arg =~ /^help$/) {
        usage;
        exit 0;
    } elsif ($arg =~ /^base-run-dir$/) {
        debug_log(sprintf "argument: [%s]\n", $arg);
        $base_run_dir = $val;
    } else {
        printf "[ERROR]argument not valid: [%s]\n", $arg;
        usage;
        exit 1;
    }
}


# Ensure the run-dir hase absolute path
{
    my $dir = pushd($base_run_dir);
    debug_log(sprintf "pushd to [%s]\n", $base_run_dir);
    my $cwd = getcwd();
    debug_log(sprintf "cwd [%s]\n", $cwd);
    $base_run_dir = $cwd;
}
my $config_dir = $base_run_dir . "/config";
my $run_dir = $base_run_dir . "/run";
my $iter_dir = $run_dir . "/iterations";

# Load the existing rickshaw-run.json
$run_file = $run_dir . "/rickshaw-run.json";

my $fixup_status = rickshaw_run_schema_fixup($run_file, $run_schema_file);
if ($fixup_status != 0) {
    exit $fixup_status;
}

debug_log(sprintf "Opening %s for normal processing\n", $run_file);
($file_rc, my $run_ref) = get_json_file($run_file, $run_schema_file);
if ($file_rc > 0 or ! defined $run_ref) {
    print "Could not open the tool config file\n";
    exit 1;
} else {
    %run = %{ $run_ref };
    # TODO checks for minimum fileds for valid run
}

if (! exists $run{'tools-dir'}) {
    # Try to locate the tools subproject based on the rickshaw_project_dir
    {
        my $dir = pushd($rickshaw_project_dir . "/../../../subprojects/tools/");
        $run{'tools-dir'} = getcwd();
        printf "Assuming tools-dir is %s\n", $run{'tools-dir'};
    }
}

printf "Launching a post-process job for each tool * each collector\n";
my %tools_config;
my @pids;
my $tool_dir = "tool-data";
if (opendir(TOOLDIR, $run_dir . "/" . $tool_dir)) {
    my @collectors = grep(/\w+/, readdir(TOOLDIR));
    for my $collector (@collectors) {
        my $collector_dir = $tool_dir . "/" . $collector;  # $run_dir/tool-data/[client|server|worker|master|profiler]
        if (opendir(COLLECTORDIR, $run_dir . "/" . $collector_dir)) {
        # sample directory entry: remotehost-1-kernel-1
            my @engines = grep (/\w+-\d+-\w+-\d+/, readdir(COLLECTORDIR));
            for my $engine (@engines) {
                my $engine_dir = $collector_dir . "/" . $engine; # $run_dir/tool-data/[client|server|worker|master]/[0-N]
                if (opendir(ENGDIR, $run_dir . "/" . $engine_dir)) {
                    my @tools = grep(/\w+/, readdir(ENGDIR));
                    printf "Working on tool dir %s\n", $engine_dir;
                    for my $tool (@tools) {
                        if (! exists($tools_config{$tool})) {
                            # Load a tool configuration for every tool the user is asking for
                            my $tool_config = $run{'tools-dir'} . "/" . $tool . "/rickshaw.json";
                            ($file_rc, my $json_ref) = get_json_file($tool_config, $tool_schema_file);
                            if ($file_rc > 0 or ! defined $json_ref) {
                                print "Could not open the tool config file\n";
                                exit 1;
                            }
                            if (! exists $$json_ref{'tool'} or $$json_ref{'tool'} ne $tool) {
                                printf "In the following tool config, found in %s, the value for 'tool'";
                                printf "does not match the tool name, '%s'\n", $tool_config, $tool;
                                printf "Either correct the tool config, or remove this tool from your test\n";
                                my $coder = JSON::XS->new->canonical->pretty;
                                printf "%s", $coder->encode($json_ref);
                                exit 1;
                            }
                            $tools_config{$$json_ref{'tool'}} = $json_ref;
                        }
                        if (my $pid = fork) {
                            push(@pids, $pid);
                        } else {
                            my $pushd_dir = pushd($run_dir . "/" . $engine_dir . "/" . $tool);
                            my $pp_cmd = $tools_config{$tool}{'controller'}{'post-script'};
                            $pp_cmd =~ s/\%tool-dir\%/$run{'tools-dir'}\/$tool\//g;
                            $pp_cmd =~ s/\%run-dir\%/$run_dir\//g;
                            $pp_cmd =~ s/\%config-dir\%/$config_dir\//g;
                            if (-e $pp_cmd) {
                                system($pp_cmd . " >post-process-output.txt 2>&1");
                            }
                            exit; # Child exits after post-processing
                        }
                    }
                }
            }
        }
    }
}
printf "Waiting for %d post-processing jobs to complete\n", scalar @pids;
while (wait() > -1) {}
print "Post-processing complete\n";