diff --git a/pybuda/csrc/perf_model/graph.hpp b/pybuda/csrc/perf_model/graph.hpp index 6dc2fa6e..85960702 100644 --- a/pybuda/csrc/perf_model/graph.hpp +++ b/pybuda/csrc/perf_model/graph.hpp @@ -101,15 +101,14 @@ struct OpPerfData _get_execution_cycles(arch_name); return _theoretical_cycles; } - const balancer::OpCycleEstimates& get_op_cycle_estimates( + const balancer::OpCycleEstimates &get_op_cycle_estimates( const DeviceConfig &device_config, const graphlib::Graph *graph, bool input_queues_on_host, bool output_queues_on_host, const std::unordered_map &selected_op_models) { - _get_op_cycle_estimates( - device_config, graph, input_queues_on_host, output_queues_on_host, selected_op_models); + _get_op_cycle_estimates(device_config, graph, input_queues_on_host, output_queues_on_host, selected_op_models); return _op_cycle_estimates; } }; @@ -118,12 +117,13 @@ struct OpPerfCalculatedData { // BWs - ideal/actual std::vector input_bw_needed, input_bw_got; - float output_bw_perc; // the percentage of required bw we got (for worst case operand), which is also output bw% - float output_bw_ideal, output_bw_produced; + float output_bw_perc = + 0; // the percentage of required bw we got (for worst case operand), which is also output bw% + float output_bw_ideal = 0, output_bw_produced = 0; // Cycle counts, utilization - float utilization; - std::uint32_t cycle_count_actual; + float utilization = 0; + std::uint32_t cycle_count_actual = 0; }; struct QueuePerfData @@ -135,12 +135,12 @@ struct QueuePerfData struct QueuePerfCalculatedData { - float total_read_bw_ideal; // ideal total BW requested by all consumers - float write_bw_ideal; // ideal write BW from the producer + float total_read_bw_ideal = 0; // ideal total BW requested by all consumers + float write_bw_ideal = 0; // ideal write BW from the producer - float total_bw_perc; // the percentage of requested bw that we can get from dram - float total_read_bw_produced; // actual BW that can be given to the op - float write_bw_received; // actual write BW from the producer + float total_bw_perc = 0; // the percentage of requested bw that we can get from dram + float total_read_bw_produced = 0; // actual BW that can be given to the op + float write_bw_received = 0; // actual write BW from the producer }; struct Attr diff --git a/pybuda/csrc/perf_model/perf_model.cpp b/pybuda/csrc/perf_model/perf_model.cpp index 6044e01d..6ade6c29 100644 --- a/pybuda/csrc/perf_model/perf_model.cpp +++ b/pybuda/csrc/perf_model/perf_model.cpp @@ -281,7 +281,8 @@ void PerfModel::create_graphs( op_perf << "name, type, epoch, grid, tiles, cycles, limiter_cycles"; for (unsigned int input_idx = 0; input_idx < c_op_max_num_inputs; ++input_idx) { - op_perf << ", " << "estimated_input_bw_" << input_idx; + op_perf << ", " + << "estimated_input_bw_" << input_idx; } op_perf << ", estimated_output_bw_0" << std::endl; @@ -306,7 +307,7 @@ void PerfModel::create_graphs( NodeP op = node_map.at(node); if (dump_op_perf) { - const balancer::OpCycleEstimates& op_cycle_estimates = + const balancer::OpCycleEstimates &op_cycle_estimates = op->get_perf_data()->op_perf_data.get_op_cycle_estimates( device_config, g, @@ -324,8 +325,9 @@ void PerfModel::create_graphs( for (unsigned int input_idx = 0; input_idx < c_op_max_num_inputs; ++input_idx) { - float input_bw = input_idx < op_cycle_estimates.input_bw_estimates.size() ? - op_cycle_estimates.input_bw_estimates[input_idx] : 0.0f; + float input_bw = input_idx < op_cycle_estimates.input_bw_estimates.size() + ? op_cycle_estimates.input_bw_estimates[input_idx] + : 0.0f; op_perf << std::to_string(input_bw) << ", "; } @@ -604,18 +606,18 @@ PerfModel::PerfModel( create_graphs(g, balancer_solution, input_queues_on_host, output_queues_on_host); SystemSpec system = SystemSpec::get_for_device(device_config); - // calculate ideal bandwidths for queues and ops - calculate_ideal_bws(system); + if (env_as("PYBUDA_PERF_SIMULATOR")) + { + // calculate ideal bandwidths for queues and ops + calculate_ideal_bws(system); - // calculate utilization - if (env_as("PYBUDA_PERF_UTIL")) - calculate_utilization(system); + // calculate utilization + if (env_as("PYBUDA_PERF_UTIL")) + calculate_utilization(system); - // Propagate BWs - for (auto &epoch_graph : temporal_epoch_graphs) propagate_bws(epoch_graph.get(), system); + // Propagate BWs + for (auto &epoch_graph : temporal_epoch_graphs) propagate_bws(epoch_graph.get(), system); - if (env_as("PYBUDA_PERF_SIMULATOR")) - { std::uint32_t original_microbatch = g->get_microbatch(); if (auto sim_mb = env_as_optional("PYBUDA_PERF_SIMULATOR_MICROBATCH")) g->set_microbatch(*sim_mb);