diff --git a/.gitignore b/.gitignore index e11673fc..0ff4352f 100644 --- a/.gitignore +++ b/.gitignore @@ -29,4 +29,9 @@ book/ evaluation/iterative-divider/data.json # Prevents editing vscode extensions further -/.vscode/settings.json \ No newline at end of file +/.vscode/settings.json + +# Ignore hypermapper output files +*_output_samples.csv* +hypermapper_logfile.log +log.txt \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 2bc3cf9f..8eb3ef12 100644 --- a/Dockerfile +++ b/Dockerfile @@ -75,6 +75,9 @@ RUN wget https://github.com/chipsalliance/verible/releases/download/v0.0-3428-gc rm verible.tar.gz ENV PATH=$PATH:/home/verible/bin +# Install hypermapper +RUN pip install hypermapper + # Set rust to 1.76 and runt to 0.4.1 RUN rustup toolchain install 1.76.0 &&\ rustup default 1.76.0 &&\ diff --git a/apps/fft/cmn/bundled.fil b/apps/fft/cmn/bundled.fil index 9c1b27eb..3d37a11e 100644 --- a/apps/fft/cmn/bundled.fil +++ b/apps/fft/cmn/bundled.fil @@ -5,6 +5,7 @@ import "primitives/reshape.fil"; /// FFT that takes in a single wire and outputs a single wire /// Wraps the normal bundle-based fft component +/// N is the number of butterflies in the FFT comp BundledFFT[NStages, N, ?Iterative=1]<'G: II>( go: interface['G], in: ['G, 'G+1] NPoints*2*32, // input bundle diff --git a/tools/hypermapper/.gitignore b/tools/hypermapper/.gitignore new file mode 100644 index 00000000..872aa273 --- /dev/null +++ b/tools/hypermapper/.gitignore @@ -0,0 +1 @@ +results \ No newline at end of file diff --git a/tools/hypermapper/flopocofft.fil b/tools/hypermapper/flopocofft.fil new file mode 100644 index 00000000..276b19f5 --- /dev/null +++ b/tools/hypermapper/flopocofft.fil @@ -0,0 +1,25 @@ +import "apps/fft/versions/flopoco/fft.fil"; + +comp main[Butterflies, Iterative]<'G: II>( + go: interface['G], + in: ['G, 'G+1] NPoints*2*32 // 16 complex numbers bundled together +) -> ( + out: ['G+L, 'G+L+1] NPoints*2*32, // output from pipelined pease fft with 1 butterfly +) with { + let NStages = 4; + let NPoints = pow2(NStages); + some II where II > 0; + some L where L >= II; +} where + II > 0, + Butterflies > 0, + Butterflies <= 8, + NPoints % (2*Butterflies) == 0 +{ + FP_Stream := new BundledFFT[4, Butterflies, Iterative]; + fp_stream := FP_Stream<'G>(in); + out = fp_stream.out; + + L := FP_Stream::L; + II := FP_Stream::II; +} \ No newline at end of file diff --git a/tools/hypermapper/opt.py b/tools/hypermapper/opt.py new file mode 100644 index 00000000..984f48af --- /dev/null +++ b/tools/hypermapper/opt.py @@ -0,0 +1,301 @@ +import json +import pandas +import shutil +from argparse import ArgumentParser +from hypermapper import optimizer +import hypermapper +import math +import subprocess +from os import path +import logging as log +from tempfile import TemporaryDirectory +import os +from utils import dl_to_ld +from multiprocessing import Pool +import matplotlib.pyplot as plt + + +# Interface +def gen_interface(tmpdir: TemporaryDirectory, filamentfile: str, gen_config: str): + out = path.join(tmpdir.name, "interface.json") + + # run the fft file + with open(out, "w") as f: + subprocess.run( + [ + "/home/filament/target/debug/filament", + filamentfile, + "--bindings", + gen_config, + "--preserve-names", + "--dump-interface", + "--library", + "/home/filament", + ], + stdout=f, + ) + + # Change the current working directory back because fud messes with it + os.chdir(path.dirname(__file__)) + + with open(out) as f: + ret = json.load(f) + ret = ret["interfaces"][0] + return {"latency": ret["states"], "ii": ret["delay"]} + + +# Generates verilog +def compile( + tmpdir: TemporaryDirectory, + filamentfile: str, + main_params: list[int], + gen_params: dict[str, dict[str, str]], +): + # Create the globals configuration + conf_file = open(path.join(tmpdir.name, "conf.toml"), "w") + + # Add main parameters + conf_file.write(f"params.main = {main_params}\n") + + # Generate a file that looks like + # [globals.] + # = + # ... + for k, v in gen_params.items(): + conf_file.write(f"[globals.{k}]\n") + for subkey, value in v.items(): + conf_file.write(f'{subkey} = "{value}"\n') + + conf_file.flush() + + latency = gen_interface(tmpdir, filamentfile, conf_file.name) + + subprocess.run( + [ + "fud", + "e", + "-s", + "filament.flags", + f" --bindings {conf_file.name}", + "--from", + "filament", + "--to", + "icarus-verilog", + filamentfile, + "-o", + path.join(tmpdir.name, "fft.sv"), + "--quiet", + ] + ) + + return latency + + +# Synthesize a design and get the resource estimate +def synth(verilog_file, clock_period=7): + tmpdir = TemporaryDirectory() + log.info(f"Synthesizing {verilog_file} to {tmpdir.name} with period {clock_period}") + # Write xdc file + constraint_xdc = open(path.join(tmpdir.name, "constraints.xdc"), "w") + constraint_xdc.write( + f""" +create_clock -period {clock_period:.2f} -name clk [get_ports clk] +""" + ) + constraint_xdc.flush() + + # run the fft file through fud to get a synthesis estimate + # Load the local synth.tcl file + subprocess.run( + [ + "fud", + "e", + "-s", + "synth-verilog.tcl", + path.join(path.dirname(__file__), "synth.tcl"), + "-s", + "synth-verilog.constraints", + constraint_xdc.name, + "--from", + "synth-verilog", + "--to", + "resource-estimate", + verilog_file, + "-o", + path.join(tmpdir.name, "resources.json"), + "--quiet", + ] + ) + + # Read the resource estimate + with open(path.join(tmpdir.name, "resources.json")) as f: + resources = json.load(f) + + tmpdir.cleanup() + # Loop through resources and set -1 values to a very large number + # This is to make failing designs bad + # for k, v in resources.items(): + # if v == -1 or resources["meet_timing"] == 0: + # resources[k] = 1e12 + print(resources) + return resources + + +def compile_and_synth( + filamentfile: str, + clock_period: int, + main_params: list[int], + gen_params: dict[str, dict[str, str]], +): + tmpdir = TemporaryDirectory() + latency = compile(tmpdir, filamentfile, main_params, gen_params) + resources = synth(path.join(tmpdir.name, "fft.sv"), clock_period) + return {**latency, **resources} + + +def compile_flopoco_fft( + iterative: int, num_butterflies: int, target_frequency: int, clock_period: int +): + print( + f"Synthesizing {'Iterative' if iterative > 0 else 'Streaming'} with {num_butterflies} butterflies, target frequency {target_frequency} and clock period {clock_period}" + ) + synth_results = compile_and_synth( + path.join(path.dirname(__file__), "flopocofft.fil"), + clock_period, + [num_butterflies, iterative], + {"globals.flopoco": {"conf": f"frequency={target_frequency} target=Virtex6"}}, + ) + # We care about the time interval between operations + synth_results["time_ii"] = synth_results["ii"] * synth_results["period"] + return synth_results + + +def compile_and_synth_parallel(args): + args = list( + zip( + args["iterative"], + [2**x for x in args["num_butterflies_log2"]], + args["target_frequency"], + args["clock_period"], + ) + ) + print(args) + with Pool(10) as p: + ret = p.starmap(compile_flopoco_fft, args) + ret = dl_to_ld(ret) + print(ret) + return ret + + +if __name__ == "__main__": + root = os.path.dirname(__file__) + tmpdir = TemporaryDirectory() + + parser = ArgumentParser() + parser.add_argument("--graphs-only", action="store_true") + + args = parser.parse_args() + + scenario = { + "application_name": "flopocofft", + "optimization_objectives": ["time_ii", "lut", "registers"], + "optimization_iterations": 10, + "evaluations_per_optimization_iteration": 10, + "input_parameters": { + "iterative": {"parameter_type": "integer", "values": [0, 1]}, + "num_butterflies_log2": {"parameter_type": "integer", "values": [1, 3]}, + "target_frequency": {"parameter_type": "integer", "values": [50, 950]}, + "clock_period": {"parameter_type": "integer", "values": [1, 20]}, + }, + "feasible_output": { + "name": "meet_timing", + "true_value": 1, + "false_value": 0, + "enable_feasible_predictor": True, + }, + } + + if os.path.exists("flopocofft_output_samples.csv"): + scenario = { + **scenario, + "resume_optimization": True, + "resume_optimization_data": "flopocofft_output_samples.csv", + } + + with open(path.join(tmpdir.name, "scenario.json"), "w") as f: + json.dump(scenario, f) + + if not args.graphs_only: + optimizer.optimize( + path.join(tmpdir.name, "scenario.json"), compile_and_synth_parallel + ) + + # Now we are generating graphs + # Handle a bug in hypermapper where "true_value" and "false_value" for feasability must be strings now + scenario["feasible_output"]["true_value"] = str( + scenario["feasible_output"]["true_value"] + ) + scenario["feasible_output"]["false_value"] = str( + scenario["feasible_output"]["false_value"] + ) + + with open(path.join(tmpdir.name, "scenario.json"), "w") as f: + json.dump(scenario, f) + + # Generate the graphs + # Make results directory + resdir = path.join(root, "results") + os.makedirs(resdir, exist_ok=True) + + # Copy csv file to results directory + shutil.copyfile( + "flopocofft_output_samples.csv", + path.join(resdir, "flopocofft_output_samples.csv"), + ) + + # Also copy to tmpdir for plotting + shutil.copyfile( + "flopocofft_output_samples.csv", + path.join(tmpdir.name, "flopocofft_output_samples.csv"), + ) + + hypermapper.plot_optimization_results.plot_regret( + path.join(tmpdir.name, "scenario.json"), [tmpdir.name], out_dir=resdir + ) + + hypermapper.compute_pareto.compute( + path.join(tmpdir.name, "scenario.json"), + path.join(resdir, "flopocofft_output_samples.csv"), + path.join(resdir, "pareto.csv"), + ) + + hypermapper.plot_pareto.plot( + path.join(tmpdir.name, "scenario.json"), + [ + ( + path.join(resdir, "pareto.csv"), + path.join(resdir, "flopocofft_output_samples.csv"), + ) + ], + path.join(resdir, "pareto.pdf"), + ) + + df = pandas.read_csv(path.join(resdir, "pareto.csv")) + + print(df) + + df["frequency"] = 1000 / df["clock_period"] + + for objective in scenario["optimization_objectives"]: + # Plot a scatter plot of all the points + fig = plt.figure() + ax = fig.add_subplot() + + ax.scatter(df["frequency"], df[objective]) + + ax.set_xlabel("Frequency") + ax.set_ylabel(objective) + + plt.savefig(path.join(resdir, f"{objective}_scatter.pdf")) + + tmpdir.cleanup() diff --git a/tools/hypermapper/synth.tcl b/tools/hypermapper/synth.tcl new file mode 100644 index 00000000..f244e695 --- /dev/null +++ b/tools/hypermapper/synth.tcl @@ -0,0 +1,47 @@ +# Run this by typing: +# +# vivado -mode batch -source synth.tcl +# +# Then see the resource utilization (i.e., area) report dumped at: +# +# out/FutilBuild.runs/synth_1/main_utilization_synth.rpt +# +# And if you also do implementation (see below), see the timing report: +# +# out/FutilBuild.runs/impl_1/main_timing_summary_routed.rpt + +# Settings: the output directory and the part number (which is a Zynq +# XC7Z020, found on our ZedBoard). +set outdir ./out +# set partname xc7z020clg484-1 +# You can also use part name "xcu250-figd2104-2-e", which we get on havarti. +# This is a bigger device (larger memory, etc.) and also supports URAM memory, which +# "xczu3eg-sbva484-1-e" does not support. For more information on +# this part type look here: https://docs.xilinx.com/r/en-US/ds962-u200-u250/Summary +set partname "xcu250-figd2104-2-e" + +# Create the project (forcibly overwriting) and add sources SystemVerilog +# (*.sv) and Xilinx constraint files (*.xdc), which contain directives for +# connecting design signals to physical FPGA pins. +create_project -force -part $partname FutilBuild $outdir +add_files [glob ./*.sv] +add_files -fileset constrs_1 [glob ./*.xdc] +set_property top main [current_fileset] + +# Switch the project to "out-of-context" mode, which frees us from the need to +# hook up every input & output wire to a physical device pin. +set_property \ + -name {STEPS.SYNTH_DESIGN.ARGS.MORE OPTIONS} \ + -value {-mode out_of_context -flatten_hierarchy "rebuilt"} \ + -objects [get_runs synth_1] + +# Run synthesis. This is enough to generate the utilization report mentioned +# above but does not include timing information. +launch_runs synth_1 +wait_on_run synth_1 + +# Run implementation to do place & route. This also produces the timing +# report mentioned above. Removing this step makes things go quite a bit +# faster if you just need the resource report! +launch_runs impl_1 -to_step route_design +wait_on_run impl_1 \ No newline at end of file diff --git a/tools/hypermapper/utils.py b/tools/hypermapper/utils.py new file mode 100644 index 00000000..36127acb --- /dev/null +++ b/tools/hypermapper/utils.py @@ -0,0 +1,9 @@ +# Take a list of dictionaries and return a dictionary of lists +def dl_to_ld(dl): + ld = {} + for d in dl: + for k, v in d.items(): + if k not in ld: + ld[k] = [] + ld[k].append(v) + return ld