From 8335b94980def3145666899c96406c6086098986 Mon Sep 17 00:00:00 2001 From: Lukas Rothenberger Date: Wed, 17 Jan 2024 13:55:33 +0100 Subject: [PATCH 01/26] minor improvements --- .../discopop_optimizer/PETParser/PETParser.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/discopop_library/discopop_optimizer/PETParser/PETParser.py b/discopop_library/discopop_optimizer/PETParser/PETParser.py index 05b4b4b42..d5e22caf0 100644 --- a/discopop_library/discopop_optimizer/PETParser/PETParser.py +++ b/discopop_library/discopop_optimizer/PETParser/PETParser.py @@ -109,6 +109,9 @@ def parse(self) -> Tuple[nx.DiGraph, int]: self.__flatten_function_graphs() + # remove invalid functions + self.__remove_invalid_functions() + convert_temporary_edges(self.graph) if self.experiment.arguments.verbose: print("converted temporary edges") @@ -119,6 +122,8 @@ def parse(self) -> Tuple[nx.DiGraph, int]: if self.experiment.arguments.verbose: print("calculated data flow") + if self.experiment.arguments.verbose: + print("Propagating read/write information...") self.__propagate_reads_and_writes() if self.experiment.arguments.verbose: print("Propagated read/write information") @@ -1070,13 +1075,19 @@ def inlined_data_flow_calculation(current_node, current_last_writes): return current_last_writes # Note: at this point in time, the graph MUST NOT have branched sections - for function_node in get_all_function_nodes(self.graph): + all_function_nodes = get_all_function_nodes(self.graph) + for idx, function_node in enumerate(all_function_nodes): + if self.experiment.arguments.verbose: + print("Calculating dataflow for function: ", data_at(self.graph, function_node).name, idx,"/",len(all_function_nodes)) if ( function_node not in self.experiment.hotspot_function_node_ids and len(self.experiment.hotspot_function_node_ids) > 0 ): print("SKIPPING NON-HOTSPOT FUNCTION: ", data_at(self.graph, function_node).name) continue + if function_node in self.invalid_functions: + print("SKIPPING INVALID FUNCTION: ", data_at(self.graph, function_node).name) + continue try: last_writes: Dict[MemoryRegion, int] = dict() From c3faadbcc6393687a01ddadceccdda7049df66f0 Mon Sep 17 00:00:00 2001 From: Lukas Rothenberger Date: Wed, 17 Jan 2024 14:30:29 +0100 Subject: [PATCH 02/26] fix: extended recusrion limits --- discopop_library/discopop_optimizer/CostModels/utilities.py | 3 +++ .../utilities/optimization/LocalOptimization/TopDown.py | 5 +++++ 2 files changed, 8 insertions(+) diff --git a/discopop_library/discopop_optimizer/CostModels/utilities.py b/discopop_library/discopop_optimizer/CostModels/utilities.py index 5703f974d..588e52976 100644 --- a/discopop_library/discopop_optimizer/CostModels/utilities.py +++ b/discopop_library/discopop_optimizer/CostModels/utilities.py @@ -61,6 +61,8 @@ def get_performance_models_for_functions( # start the collection at the first child of the function for child_id in get_children(graph, node_id): + import sys + sys.setrecursionlimit(100000) performance_models[node_data] = get_node_performance_models( experiment, graph, @@ -70,6 +72,7 @@ def get_performance_models_for_functions( restrict_to_decisions=restrict_to_decisions, allow_sequential=True, ) + sys.setrecursionlimit(1000) # At this point, decisions are restricted to the specified parallelization or the sequential version. # Restrict them to the exact case specified in restrict_to_decisions diff --git a/discopop_library/discopop_optimizer/utilities/optimization/LocalOptimization/TopDown.py b/discopop_library/discopop_optimizer/utilities/optimization/LocalOptimization/TopDown.py index 73a61623c..b1b337c73 100644 --- a/discopop_library/discopop_optimizer/utilities/optimization/LocalOptimization/TopDown.py +++ b/discopop_library/discopop_optimizer/utilities/optimization/LocalOptimization/TopDown.py @@ -6,6 +6,7 @@ # the 3-Clause BSD License. See the LICENSE file in the package base # directory for details. import copy +import sys from typing import Dict, List, Tuple, Set, cast import networkx as nx # type: ignore @@ -51,6 +52,7 @@ def get_locally_optimized_models( for decision in decision_options: try: # create a performance model for the specific decision + sys.setrecursionlimit(100000) performance_models = get_node_performance_models( experiment, graph, @@ -63,6 +65,7 @@ def get_locally_optimized_models( cast(FunctionRoot, data_at(graph, function_node)).node_id ], # ignore first node to prevent duplication of function costs ) + sys.setrecursionlimit(1000) # calculate and append necessary data transfers to the models performance_models_with_transfers = calculate_data_transfers( graph, {cast(FunctionRoot, data_at(graph, function_node)): performance_models}, experiment @@ -132,6 +135,7 @@ def get_locally_optimized_models( continue # construct locally optimal model + sys.setrecursionlimit(100000) performance_models = get_node_performance_models( experiment, graph, @@ -143,6 +147,7 @@ def get_locally_optimized_models( cast(FunctionRoot, data_at(graph, function_node)).node_id ], # ignore first node to prevent duplication of function costs ) + sys.setrecursionlimit(1000) # calculate and append necessary data transfers to the models performance_models_with_transfers = calculate_data_transfers( graph, {cast(FunctionRoot, data_at(graph, function_node)): performance_models}, experiment From 8fab4a73837a34208a526b4571937ebcc7032293 Mon Sep 17 00:00:00 2001 From: Lukas Rothenberger Date: Wed, 17 Jan 2024 15:16:01 +0100 Subject: [PATCH 03/26] type fixes and recursion depth fixes --- discopop_explorer/utils.py | 20 +- .../CostModels/utilities.py | 377 +++++++++--------- .../discopop_optimizer/PETParser/PETParser.py | 8 +- .../discopop_optimizer/optimization/greedy.py | 19 +- .../utilities/MOGUtilities.py | 18 +- .../optimization/LocalOptimization/TopDown.py | 2 - 6 files changed, 234 insertions(+), 210 deletions(-) diff --git a/discopop_explorer/utils.py b/discopop_explorer/utils.py index 356af5735..303070f94 100644 --- a/discopop_explorer/utils.py +++ b/discopop_explorer/utils.py @@ -954,15 +954,15 @@ def filter_for_hotspots( if node.type == NodeType.FUNC and hotspot[2] == HotspotNodeType.FUNCTION: result_set.add(node) -# # check for matches from hotspot functions -# for node in nodes: -# for hotspot in all_hotspot_descriptions: -# if hotspot[2] == HotspotNodeType.FUNCTION: -# if hotspot[0] == node.file_id: -# try: -# if pet.get_parent_function(node).name == hotspot[3]: -# print("HOTSPOT FUNCTION MATCH FROM NODE: ", node.id) -# except AssertionError: -# continue + # # check for matches from hotspot functions + # for node in nodes: + # for hotspot in all_hotspot_descriptions: + # if hotspot[2] == HotspotNodeType.FUNCTION: + # if hotspot[0] == node.file_id: + # try: + # if pet.get_parent_function(node).name == hotspot[3]: + # print("HOTSPOT FUNCTION MATCH FROM NODE: ", node.id) + # except AssertionError: + # continue return list(result_set) diff --git a/discopop_library/discopop_optimizer/CostModels/utilities.py b/discopop_library/discopop_optimizer/CostModels/utilities.py index 588e52976..8dbb126f1 100644 --- a/discopop_library/discopop_optimizer/CostModels/utilities.py +++ b/discopop_library/discopop_optimizer/CostModels/utilities.py @@ -61,8 +61,6 @@ def get_performance_models_for_functions( # start the collection at the first child of the function for child_id in get_children(graph, node_id): - import sys - sys.setrecursionlimit(100000) performance_models[node_data] = get_node_performance_models( experiment, graph, @@ -72,7 +70,6 @@ def get_performance_models_for_functions( restrict_to_decisions=restrict_to_decisions, allow_sequential=True, ) - sys.setrecursionlimit(1000) # At this point, decisions are restricted to the specified parallelization or the sequential version. # Restrict them to the exact case specified in restrict_to_decisions @@ -120,53 +117,62 @@ def get_node_performance_models( If a set of decision is specified for restrict_to_decisions, only those non-sequential decisions will be allowed. Caution: List might be empty! """ - result_list: List[CostModel] = [] - successors = get_successors(graph, node_id) - successor_count = len(successors) - node_data = data_at(graph, node_id) - if node_data.execute_in_parallel: - current_device_id = node_data.device_id - visited_nodes.add(node_id) - - # consider performance models of children - children_models = get_performance_models_for_children( - experiment, - graph, - node_id, - copy.deepcopy(visited_nodes), - all_function_nodes, - restrict_to_decisions=restrict_to_decisions, - do_not_allow_decisions=do_not_allow_decisions, - get_single_random_model=get_single_random_model, - ) + try: + result_list: List[CostModel] = [] + successors = get_successors(graph, node_id) + successor_count = len(successors) + node_data = data_at(graph, node_id) + if node_data.execute_in_parallel: + current_device_id = node_data.device_id + visited_nodes.add(node_id) + + # consider performance models of children + children_models = get_performance_models_for_children( + experiment, + graph, + node_id, + copy.deepcopy(visited_nodes), + all_function_nodes, + restrict_to_decisions=restrict_to_decisions, + do_not_allow_decisions=do_not_allow_decisions, + get_single_random_model=get_single_random_model, + ) + + if len(children_models) == 0: + if ignore_node_costs is not None: + if node_data.node_id in ignore_node_costs: + children_models = [CostModel(Integer(0), Integer(0))] + else: + children_models = [ + node_data.get_cost_model( + experiment, + all_function_nodes, + experiment.get_system().get_device(current_device_id), + ) + ] - if len(children_models) == 0: - if ignore_node_costs is not None: - if node_data.node_id in ignore_node_costs: - children_models = [CostModel(Integer(0), Integer(0))] else: - children_models = [ - node_data.get_cost_model( + if ignore_node_costs is not None: + if node_data.node_id in ignore_node_costs: + tmp_node_cost_model = CostModel(Integer(0), Integer(0)) + else: + tmp_node_cost_model = node_data.get_cost_model( experiment, all_function_nodes, experiment.get_system().get_device(current_device_id), ) - ] - - else: - if ignore_node_costs is not None: - if node_data.node_id in ignore_node_costs: - tmp_node_cost_model = CostModel(Integer(0), Integer(0)) - else: - tmp_node_cost_model = node_data.get_cost_model( - experiment, - all_function_nodes, - experiment.get_system().get_device(current_device_id), - ) - for idx, child_model in enumerate(children_models): - if ignore_node_costs is not None: - if node_data.node_id not in ignore_node_costs: + for idx, child_model in enumerate(children_models): + if ignore_node_costs is not None: + if node_data.node_id not in ignore_node_costs: + children_models[idx] = tmp_node_cost_model.register_child( + child_model, + node_data, + experiment, + all_function_nodes, + experiment.get_system().get_device(current_device_id), + ) + else: children_models[idx] = tmp_node_cost_model.register_child( child_model, node_data, @@ -174,157 +180,162 @@ def get_node_performance_models( all_function_nodes, experiment.get_system().get_device(current_device_id), ) - else: - children_models[idx] = tmp_node_cost_model.register_child( - child_model, - node_data, - experiment, - all_function_nodes, - experiment.get_system().get_device(current_device_id), - ) - # construct the performance models - if successor_count >= 1: - removed_successors = False - if get_single_random_model and successor_count > 1: - # pick only a single successor - successors = [random.choice(successors)] - removed_successors = True - - for children_model in children_models: - for successor in successors: - # ## CHECK REQUIREMENTS ## - # check if successor validates a requirements edge to restrain the created combinations - # 1.1. check if optionEdge between any node in visited_nodes and successor exists - # 1.2. if so, check if option edge to other node in visited nodes exists - # 1.3. if so, check if a requirements edge between both option exists. - # 1.4. if not, the path is not valid since two options for the same - # source code location would be selected - path_invalid = False - # 1.1 - # if successor in [109, 110, 111, 112, 113, 114]: - # print("VISITED NODES: ", visited_nodes) - for visited_node_id in visited_nodes: - options = get_out_mutex_edges(graph, visited_node_id) - if successor in options: - # 1.2 - visited_options = [opt for opt in options if opt in visited_nodes] - if len(visited_options) > 0: - # 1.3 - for vo in visited_options: - # 1.4 - if successor not in get_requirements(graph, vo): - path_invalid = True - break + # construct the performance models + if successor_count >= 1: + removed_successors = False + if get_single_random_model and successor_count > 1: + # pick only a single successor + successors = [random.choice(successors)] + removed_successors = True + + for children_model in children_models: + for successor in successors: + # ## CHECK REQUIREMENTS ## + # check if successor validates a requirements edge to restrain the created combinations + # 1.1. check if optionEdge between any node in visited_nodes and successor exists + # 1.2. if so, check if option edge to other node in visited nodes exists + # 1.3. if so, check if a requirements edge between both option exists. + # 1.4. if not, the path is not valid since two options for the same + # source code location would be selected + path_invalid = False + # 1.1 + # if successor in [109, 110, 111, 112, 113, 114]: + # print("VISITED NODES: ", visited_nodes) + for visited_node_id in visited_nodes: + options = get_out_mutex_edges(graph, visited_node_id) + if successor in options: + # 1.2 + visited_options = [opt for opt in options if opt in visited_nodes] + if len(visited_options) > 0: + # 1.3 + for vo in visited_options: + # 1.4 + if successor not in get_requirements(graph, vo): + path_invalid = True + break + if path_invalid: + break if path_invalid: - break - if path_invalid: - continue + continue + + # 2 check if a sibling of successor exists which has a requirements edge to a visited node + # 2.1 check if an incoming or outgoing option edge exists, get the node id for the sequential version + # 2.2 for all parallelization options + # 2.3 check if a requirements edge to a visited node exists + # 2.4 if so, stop if successor is NOT the parallelization option with the requirements edge + # 2.1 + for sibling in successors: + sequential_version_ids = [] + if len(get_out_mutex_edges(graph, sibling)) > 0: + sequential_version_ids = [sibling] + else: + for seq in get_in_mutex_edges(graph, sibling): + sequential_version_ids.append(seq) + # 2.2 + for seq in sequential_version_ids: + for option in get_out_mutex_edges(graph, seq): + if option == successor: + continue + # 2.3 + for visited_req in [ + req for req in get_requirements(graph, option) if req in visited_nodes + ]: + # 2.4 + if visited_req != successor: + path_invalid = True + break + if path_invalid: + break + if path_invalid: + break - # 2 check if a sibling of successor exists which has a requirements edge to a visited node - # 2.1 check if an incoming or outgoing option edge exists, get the node id for the sequential version - # 2.2 for all parallelization options - # 2.3 check if a requirements edge to a visited node exists - # 2.4 if so, stop if successor is NOT the parallelization option with the requirements edge - # 2.1 - for sibling in successors: - sequential_version_ids = [] - if len(get_out_mutex_edges(graph, sibling)) > 0: - sequential_version_ids = [sibling] - else: - for seq in get_in_mutex_edges(graph, sibling): - sequential_version_ids.append(seq) - # 2.2 - for seq in sequential_version_ids: - for option in get_out_mutex_edges(graph, seq): - if option == successor: - continue - # 2.3 - for visited_req in [req for req in get_requirements(graph, option) if req in visited_nodes]: - # 2.4 - if visited_req != successor: + # do not allow nested parallelization suggestions on devices of type GPU + if True: # option to disable this check + combined_visited_nodes = copy.deepcopy(visited_nodes) + combined_visited_nodes.add(successor) + gpu_suggestions = [ + node_id + for node_id in combined_visited_nodes + if isinstance( + experiment.get_system().get_device(data_at(graph, node_id).device_id), + GPU, + ) + ] + # check if two suggestions are in a contained-in relation + for suggestion_1 in gpu_suggestions: + all_parents = get_all_parents(graph, suggestion_1) + for suggestion_2 in gpu_suggestions: + if suggestion_1 == suggestion_2: + continue + if suggestion_2 in all_parents: path_invalid = True break if path_invalid: break - if path_invalid: - break - - # do not allow nested parallelization suggestions on devices of type GPU - if True: # option to disable this check - combined_visited_nodes = copy.deepcopy(visited_nodes) - combined_visited_nodes.add(successor) - gpu_suggestions = [ - node_id - for node_id in combined_visited_nodes - if isinstance( - experiment.get_system().get_device(data_at(graph, node_id).device_id), - GPU, - ) - ] - # check if two suggestions are in a contained-in relation - for suggestion_1 in gpu_suggestions: - all_parents = get_all_parents(graph, suggestion_1) - for suggestion_2 in gpu_suggestions: - if suggestion_1 == suggestion_2: - continue - if suggestion_2 in all_parents: - path_invalid = True - break - if path_invalid: - break - # check if the current decision invalidates decision requirements, if some are specified - if restrict_to_decisions is not None: - if not (successor in restrict_to_decisions or data_at(graph, successor).suggestion is None): - path_invalid = True - if not path_invalid: - if data_at(graph, successor).suggestion is None: - # if the sequential "fallback" has been used, check if a different option is specifically - # mentioned in restrict_to_decisions. If so, the sequential fallback shall be ignored. - options = get_out_mutex_edges(graph, successor) - restricted_options = [opt for opt in options if opt in restrict_to_decisions] - if len(restricted_options) != 0: - # do not use he sequential fallback since a required option exists - path_invalid = True - - if do_not_allow_decisions is not None: - if successor in do_not_allow_decisions: - path_invalid = True - - if path_invalid: - continue + # check if the current decision invalidates decision requirements, if some are specified + if restrict_to_decisions is not None: + if not (successor in restrict_to_decisions or data_at(graph, successor).suggestion is None): + path_invalid = True + if not path_invalid: + if data_at(graph, successor).suggestion is None: + # if the sequential "fallback" has been used, check if a different option is specifically + # mentioned in restrict_to_decisions. If so, the sequential fallback shall be ignored. + options = get_out_mutex_edges(graph, successor) + restricted_options = [opt for opt in options if opt in restrict_to_decisions] + if len(restricted_options) != 0: + # do not use he sequential fallback since a required option exists + path_invalid = True - # ## END OF REQUIREMENTS CHECK ## + if do_not_allow_decisions is not None: + if successor in do_not_allow_decisions: + path_invalid = True - combined_model = children_model - # add transfer costs - transfer_costs_model = get_edge_data(graph, node_id, successor).get_cost_model() - combined_model = combined_model.parallelizable_plus_combine(transfer_costs_model) + if path_invalid: + continue - # if the successor is "determined" by a path decision, add path decision to the combined model - if len(successors) > 1 or removed_successors: - combined_model.path_decisions.append(successor) - # append the model of the successor - for model in get_node_performance_models( - experiment, - graph, - successor, - copy.deepcopy(visited_nodes), - all_function_nodes, - restrict_to_decisions=restrict_to_decisions, - do_not_allow_decisions=do_not_allow_decisions, - get_single_random_model=get_single_random_model, - ignore_node_costs=ignore_node_costs, - ): - tmp = combined_model.parallelizable_plus_combine(model) - tmp.path_decisions += [d for d in model.path_decisions if d not in tmp.path_decisions] - result_list.append(tmp) - if len(result_list) >= 1: - return result_list - - # successor count == 0 or successor count > 1 - return children_models + # ## END OF REQUIREMENTS CHECK ## + + combined_model = children_model + # add transfer costs + transfer_costs_model = get_edge_data(graph, node_id, successor).get_cost_model() + combined_model = combined_model.parallelizable_plus_combine(transfer_costs_model) + + # if the successor is "determined" by a path decision, add path decision to the combined model + if len(successors) > 1 or removed_successors: + combined_model.path_decisions.append(successor) + # append the model of the successor + for model in get_node_performance_models( + experiment, + graph, + successor, + copy.deepcopy(visited_nodes), + all_function_nodes, + restrict_to_decisions=restrict_to_decisions, + do_not_allow_decisions=do_not_allow_decisions, + get_single_random_model=get_single_random_model, + ignore_node_costs=ignore_node_costs, + ): + tmp = combined_model.parallelizable_plus_combine(model) + tmp.path_decisions += [d for d in model.path_decisions if d not in tmp.path_decisions] + result_list.append(tmp) + if len(result_list) >= 1: + return result_list + + # successor count == 0 or successor count > 1 + return children_models + + except RecursionError: + warnings.warn("Allowed recursion depth exceeded at node " + str(node_id) + " . Results may be inaccurate.") + node_data = data_at(graph, node_id) + return [ + node_data.get_cost_model( + experiment, + all_function_nodes, + experiment.get_system().get_device(current_device_id), + ) + ] def get_performance_models_for_children( diff --git a/discopop_library/discopop_optimizer/PETParser/PETParser.py b/discopop_library/discopop_optimizer/PETParser/PETParser.py index d5e22caf0..580b1c4c7 100644 --- a/discopop_library/discopop_optimizer/PETParser/PETParser.py +++ b/discopop_library/discopop_optimizer/PETParser/PETParser.py @@ -1078,7 +1078,13 @@ def inlined_data_flow_calculation(current_node, current_last_writes): all_function_nodes = get_all_function_nodes(self.graph) for idx, function_node in enumerate(all_function_nodes): if self.experiment.arguments.verbose: - print("Calculating dataflow for function: ", data_at(self.graph, function_node).name, idx,"/",len(all_function_nodes)) + print( + "Calculating dataflow for function: ", + data_at(self.graph, function_node).name, + idx, + "/", + len(all_function_nodes), + ) if ( function_node not in self.experiment.hotspot_function_node_ids and len(self.experiment.hotspot_function_node_ids) > 0 diff --git a/discopop_library/discopop_optimizer/optimization/greedy.py b/discopop_library/discopop_optimizer/optimization/greedy.py index b63ba93e1..0ed1bd270 100644 --- a/discopop_library/discopop_optimizer/optimization/greedy.py +++ b/discopop_library/discopop_optimizer/optimization/greedy.py @@ -80,7 +80,7 @@ def greedy_search( local_results: List[Tuple[Dict[int, List[List[int]]], int, ContextObject]] = [] # prepare arguments for parallel cost calculation - param_list = [] + param_list: List[Dict[int, List[List[int]]]] = [] for decision in decision_set: # copy made decisions local_decision_set: Dict[int, List[List[int]]] = dict() @@ -92,11 +92,16 @@ def greedy_search( local_decision_set[function_node.node_id][dcsi] = [decision] param_list.append(local_decision_set) - # calculate costs in parallel - with Pool(initializer=__initialize_cost_caluclation_worker, initargs=(experiment, arguments)) as pool: - tmp_result = list( - tqdm.tqdm(pool.imap_unordered(__get_score, param_list), total=len(param_list), disable=True) - ) + # # calculate costs in parallel + # with Pool(initializer=__initialize_cost_caluclation_worker, initargs=(experiment, arguments)) as pool: + # tmp_result = list( + # tqdm.tqdm(pool.imap_unordered(__get_score, param_list), total=len(param_list), disable=True) + # ) + # calculate costs + tmp_result: List[Tuple[Dict[int, List[List[int]]], int, ContextObject]] = [] + for param in param_list: + tmp_result.append(__get_score(param)) + for local_result in tmp_result: # remove invalid elements if local_result[1] == -1: @@ -146,7 +151,7 @@ def __initialize_cost_caluclation_worker( global_arguments = arguments -def __get_score(param_tuple) -> Tuple[List[int], int, ContextObject]: +def __get_score(param_tuple) -> Tuple[Dict[int, List[List[int]]], int, ContextObject]: global global_experiment global global_arguments configuration = param_tuple diff --git a/discopop_library/discopop_optimizer/utilities/MOGUtilities.py b/discopop_library/discopop_optimizer/utilities/MOGUtilities.py index 2434812e1..ae4e34950 100644 --- a/discopop_library/discopop_optimizer/utilities/MOGUtilities.py +++ b/discopop_library/discopop_optimizer/utilities/MOGUtilities.py @@ -9,6 +9,7 @@ import itertools from multiprocessing import Pool from typing import Any, ClassVar, Dict, List, Optional, cast, Set, Tuple +import warnings import matplotlib # type: ignore import matplotlib.pyplot as plt # type:ignore @@ -416,13 +417,16 @@ def get_read_and_written_data_from_subgraph( read_memory_regions: Set[MemoryRegion] = set() written_memory_regions: Set[MemoryRegion] = set() # collect reads and writes from successors and children - subgraph = get_children(graph, node_id) - if not ignore_successors: - subgraph += get_successors(graph, node_id) - for successor in subgraph: - reads, writes = get_read_and_written_data_from_subgraph(graph, successor) - read_memory_regions.update(reads) - written_memory_regions.update(writes) + try: + subgraph = get_children(graph, node_id) + if not ignore_successors: + subgraph += get_successors(graph, node_id) + for successor in subgraph: + reads, writes = get_read_and_written_data_from_subgraph(graph, successor) + read_memory_regions.update(reads) + written_memory_regions.update(writes) + except RecursionError: + warnings.warn("Recursion limit exceeeded. Read and write in subtrees might be inaccurate.") # add reads and writes of the node itself node_data = data_at(graph, node_id) read_memory_regions.update([read_access.memory_region for read_access in node_data.read_memory_regions]) diff --git a/discopop_library/discopop_optimizer/utilities/optimization/LocalOptimization/TopDown.py b/discopop_library/discopop_optimizer/utilities/optimization/LocalOptimization/TopDown.py index b1b337c73..1599dfb8d 100644 --- a/discopop_library/discopop_optimizer/utilities/optimization/LocalOptimization/TopDown.py +++ b/discopop_library/discopop_optimizer/utilities/optimization/LocalOptimization/TopDown.py @@ -52,7 +52,6 @@ def get_locally_optimized_models( for decision in decision_options: try: # create a performance model for the specific decision - sys.setrecursionlimit(100000) performance_models = get_node_performance_models( experiment, graph, @@ -65,7 +64,6 @@ def get_locally_optimized_models( cast(FunctionRoot, data_at(graph, function_node)).node_id ], # ignore first node to prevent duplication of function costs ) - sys.setrecursionlimit(1000) # calculate and append necessary data transfers to the models performance_models_with_transfers = calculate_data_transfers( graph, {cast(FunctionRoot, data_at(graph, function_node)): performance_models}, experiment From 54a3328e41f3dff17f2a7f544baecb5a13786410 Mon Sep 17 00:00:00 2001 From: Lukas Rothenberger Date: Wed, 17 Jan 2024 16:15:54 +0100 Subject: [PATCH 04/26] feat(optimizer): added argument to enable optimization step --- .../discopop_optimizer/OptimizerArguments.py | 1 + .../discopop_optimizer/__main__.py | 3 + .../discopop_optimizer/optimizer.py | 67 ++++++++++--------- 3 files changed, 38 insertions(+), 33 deletions(-) diff --git a/discopop_library/discopop_optimizer/OptimizerArguments.py b/discopop_library/discopop_optimizer/OptimizerArguments.py index 255fbf1f3..eea8cab06 100644 --- a/discopop_library/discopop_optimizer/OptimizerArguments.py +++ b/discopop_library/discopop_optimizer/OptimizerArguments.py @@ -26,6 +26,7 @@ class OptimizerArguments(object): check_called_function_for_nested_parallelism: bool profiling: bool greedy: bool + optimization: bool def __post_init__(self): # fix correct optimization method diff --git a/discopop_library/discopop_optimizer/__main__.py b/discopop_library/discopop_optimizer/__main__.py index 6af35bccd..804656905 100644 --- a/discopop_library/discopop_optimizer/__main__.py +++ b/discopop_library/discopop_optimizer/__main__.py @@ -29,6 +29,8 @@ def parse_args() -> OptimizerArguments: help="Enable evolutionary search. By default, a greedy search is performed.") parser.add_argument("-g", "--greedy", action="store_true", help="Enable greedy search. (Default)") + parser.add_argument("-o", "--optimize", action="store_true", + help="Enable optimization.") parser.add_argument( "--doall-microbench-file", type=str, default="None", help="Do-All microbenchmark results" @@ -71,6 +73,7 @@ def parse_args() -> OptimizerArguments: check_called_function_for_nested_parallelism=arguments.check_called_function_for_nested_parallelism, profiling=arguments.profiling, greedy=arguments.greedy, + optimization=arguments.optimize ) diff --git a/discopop_library/discopop_optimizer/optimizer.py b/discopop_library/discopop_optimizer/optimizer.py index 080dc6028..b185f785f 100644 --- a/discopop_library/discopop_optimizer/optimizer.py +++ b/discopop_library/discopop_optimizer/optimizer.py @@ -199,40 +199,41 @@ def run(arguments: OptimizerArguments): if node_id != node_data.node_id: node_data.node_id = node_id - # get values for free symbols - initialize_free_symbol_ranges_and_distributions(experiment, arguments, system) - - if arguments.verbose: - print("# SUBSTITUTIONS:") - for key in experiment.substitutions: - print("#", key, " ->", experiment.substitutions[key]) - print() - - # calculate options for easy access - available_decisions = get_available_decisions_for_functions(experiment.optimization_graph, arguments) - - # calculate costs for all combinations of decisions - if arguments.exhaustive: - best_configuration = evaluate_all_decision_combinations( - experiment, available_decisions, arguments, optimizer_dir - ) - elif arguments.greedy: - best_configuration = greedy_search(experiment, available_decisions, arguments, optimizer_dir) - elif arguments.evolutionary != None: - # perform evolutionary search - best_configuration = perform_evolutionary_search( - experiment, - available_decisions, - arguments, - optimizer_dir, - ) - else: - raise ValueError("No optimization method specified!") + if arguments.optimization: + # get values for free symbols + initialize_free_symbol_ranges_and_distributions(experiment, arguments, system) + + if arguments.verbose: + print("# SUBSTITUTIONS:") + for key in experiment.substitutions: + print("#", key, " ->", experiment.substitutions[key]) + print() + + # calculate options for easy access + available_decisions = get_available_decisions_for_functions(experiment.optimization_graph, arguments) + + # calculate costs for all combinations of decisions + if arguments.exhaustive: + best_configuration = evaluate_all_decision_combinations( + experiment, available_decisions, arguments, optimizer_dir + ) + elif arguments.greedy: + best_configuration = greedy_search(experiment, available_decisions, arguments, optimizer_dir) + elif arguments.evolutionary != None: + # perform evolutionary search + best_configuration = perform_evolutionary_search( + experiment, + available_decisions, + arguments, + optimizer_dir, + ) + else: + raise ValueError("No optimization method specified!") - if best_configuration is not None: - best_configuration = optimize_updates(experiment, best_configuration, arguments) - # append the configuration to the list of patterns - experiment.detection_result.patterns.optimizer_output.append(best_configuration) + if best_configuration is not None: + best_configuration = optimize_updates(experiment, best_configuration, arguments) + # append the configuration to the list of patterns + experiment.detection_result.patterns.optimizer_output.append(best_configuration) if arguments.profiling: experiment.profile.disable() # type: ignore From 9bac12796e1f2e270db3e378d4eb4a54415d7c94 Mon Sep 17 00:00:00 2001 From: Lukas Rothenberger Date: Wed, 17 Jan 2024 17:43:56 +0100 Subject: [PATCH 05/26] feat(optimizer): mark intermediate suggestions as non-applicable --- discopop_explorer/pattern_detectors/PatternBase.py | 2 ++ discopop_library/PatchGenerator/from_json_patterns.py | 3 +++ .../suggestions/optimizers/loop_collapse.py | 8 +++++++- 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/discopop_explorer/pattern_detectors/PatternBase.py b/discopop_explorer/pattern_detectors/PatternBase.py index ae2f0ba68..8861843b7 100644 --- a/discopop_explorer/pattern_detectors/PatternBase.py +++ b/discopop_explorer/pattern_detectors/PatternBase.py @@ -21,6 +21,7 @@ class PatternBase(object): node_id: NodeID start_line: LineID end_line: LineID + applicable_pattern: bool def __init__(self, node: Node): # create a file lock to synchronize processes @@ -41,6 +42,7 @@ def __init__(self, node: Node): self.node_id = node.id self.start_line = node.start_position() self.end_line = node.end_position() + self.applicable_pattern = True def to_json(self): dic = self.__dict__ diff --git a/discopop_library/PatchGenerator/from_json_patterns.py b/discopop_library/PatchGenerator/from_json_patterns.py index fe51f3814..bc46d0f15 100644 --- a/discopop_library/PatchGenerator/from_json_patterns.py +++ b/discopop_library/PatchGenerator/from_json_patterns.py @@ -34,6 +34,9 @@ def from_json_patterns( if suggestion_type == "optimizer_output": from_optimizer_output(file_mapping, patterns_by_type, suggestion, arguments, patch_generator_dir) continue + suggestion_dict = json.loads(suggestion) + if not suggestion_dict["applicable_pattern"]: + continue if arguments.verbose: print("Suggestion: ", suggestion) file_id_to_modified_code: Dict[int, str] = from_json_strings( diff --git a/discopop_library/discopop_optimizer/suggestions/optimizers/loop_collapse.py b/discopop_library/discopop_optimizer/suggestions/optimizers/loop_collapse.py index 37027d54b..4b238cd40 100644 --- a/discopop_library/discopop_optimizer/suggestions/optimizers/loop_collapse.py +++ b/discopop_library/discopop_optimizer/suggestions/optimizers/loop_collapse.py @@ -13,7 +13,8 @@ import networkx as nx # type: ignore -import tqdm # type: ignore +import tqdm +from build.lib.discopop_library.result_classes.OptimizerOutputPattern import OptimizerOutputPattern # type: ignore from discopop_explorer.pattern_detectors.do_all_detector import DoAllInfo # type: ignore from discopop_library.PatternIdManagement.unique_pattern_id import get_unique_pattern_id from discopop_library.discopop_optimizer.Variables.Experiment import Experiment @@ -167,6 +168,7 @@ def __collapse_loops_in_function(function_node_id): pattern_info.device_type = ( global_experiment.get_system().get_device(node_data_copy.device_id).get_device_type() ) + pattern_info.applicable_pattern = False # patterns are only applicable via the optimizer output pattern interfaces due to potential data movement global_experiment.suggestion_to_node_ids_dict[pattern_id] = [new_node_id] global_experiment.node_id_to_suggestion_dict[new_node_id] = pattern_id @@ -240,6 +242,10 @@ def __collapse_loops_in_function(function_node_id): # register pattern for output # todo: find a nicer solution to duplicating the patterns for each device mapping global_experiment.detection_result.patterns.do_all.append(pattern_info) + # construct optimizer output pattern to represent the non-standalone pattern_info + optimizer_output_pattern = OptimizerOutputPattern(pattern_info._node, [new_node_id], global_experiment.get_system().get_host_device_id()) + optimizer_output_pattern.add_pattern(pattern_info.pattern_id, pattern_info.device_id, pattern_info.device_type) + global_experiment.detection_result.patterns.optimizer_output.append(optimizer_output_pattern) print("REGISTERED PATTERN INFO: ", pattern_id, " for Device: ", data_at(global_graph, csrc).device_id) print(pattern_info) print() From deb2ccf3d46053b017cf7371f192f48f6b408fb0 Mon Sep 17 00:00:00 2001 From: Lukas Rothenberger Date: Fri, 19 Jan 2024 10:34:15 +0100 Subject: [PATCH 06/26] feat(profiler): taken branch instrumentation --- DiscoPoP/DiscoPoP.cpp | 66 +++++++++++++++++++++++++++++-- DiscoPoP/DiscoPoP.hpp | 3 ++ rtlib/CMakeLists.txt | 1 + rtlib/cu_taken_branch_counter.cpp | 54 +++++++++++++++++++++++++ rtlib/cu_taken_branch_counter.hpp | 18 +++++++++ 5 files changed, 139 insertions(+), 3 deletions(-) create mode 100644 rtlib/cu_taken_branch_counter.cpp create mode 100644 rtlib/cu_taken_branch_counter.hpp diff --git a/DiscoPoP/DiscoPoP.cpp b/DiscoPoP/DiscoPoP.cpp index 3603548c3..074643803 100644 --- a/DiscoPoP/DiscoPoP.cpp +++ b/DiscoPoP/DiscoPoP.cpp @@ -105,6 +105,8 @@ void DiscoPoP::setupCallbacks() { DpLoopExit = ThisModule->getOrInsertFunction("__dp_loop_exit", Void, Int32, Int32); + + DpTakenBranchCounterIncr = ThisModule->getOrInsertFunction("__dp_incr_taken_branch_counter", Void, CharPtr, Int32, Int32); } bool DiscoPoP::doInitialization(Module &M) { @@ -258,7 +260,6 @@ bool DiscoPoP::doFinalization(Module &M) { } // DPInstrumentationOmission end - return true; } @@ -451,6 +452,62 @@ void DiscoPoP::populateGlobalVariablesSet(Region *TopRegion, } } + +void DiscoPoP::createTakenBranchInstrumentation(Region* TopRegion, map > &BBIDToCUIDsMap){ + /* Create calls to count taken branches inbetween CUs during execution */ + + + for (Region::block_iterator bb = TopRegion->block_begin(); + bb != TopRegion->block_end(); ++bb) { + for (BasicBlock::iterator instruction = (*bb)->begin(); + instruction != (*bb)->end(); ++instruction) { + if(isa(instruction)){ + BranchInst* branchInst = cast(instruction); + branchInst->print(errs()); + errs() << "\n"; + + // check for conditional branches, as unconditional ones can be ignored for counting + if(! branchInst->isUnconditional()){ + // branchInst is conditional + errs() << "\tCONDITIONAL\n"; + + // prepare IRBuilder to insert instrumentation + IRBuilder<> IRB(branchInst); + + // get BBId and CU IDS of the source + string source_BBID = bb->getName().str(); + errs() << "\tsourceBB: " << source_BBID << "\n"; + errs() << "\tnumSuccessors: " << branchInst->getNumSuccessors() << "\n"; + for(auto source_cu : BBIDToCUIDsMap[source_BBID]){ + errs() << "\t\tsourceCU: " << source_cu->ID << "\n"; + + // get BBIds of all targets + for(int i = 0; i < branchInst->getNumSuccessors(); i++){ + string successor_BBID = branchInst->getSuccessor(i)->getName().str(); + errs() << "\t\tsuccessorBB: " << successor_BBID << "\n"; + // get CUs of all targets + for(auto target_cu : BBIDToCUIDsMap[successor_BBID]){ + errs() << "\t\t\tsuccessorCU: " << target_cu->ID << "\n"; + // add instrumentation prior to the branch instruction + vector args; + string source_and_target = source_cu->ID + ";" + target_cu->ID; + args.push_back(getOrInsertVarName_dynamic(source_and_target, IRB)); + args.push_back(branchInst->getCondition()); + bool counter_active_on_cmp_value = (i == 0 ? 1 : 0); + args.push_back(ConstantInt::get(Int32, counter_active_on_cmp_value)); + IRB.CreateCall(DpTakenBranchCounterIncr, args); + } + } + } + } + } + } + } +} + + + + void DiscoPoP::createCUs(Region *TopRegion, set &globalVariablesSet, vector &CUVector, map > &BBIDToCUIDsMap, @@ -1889,6 +1946,7 @@ void DiscoPoP::dp_reduction_insert_functions() { llvm::FunctionType* output_fn_type = llvm::FunctionType::get(llvm::Type::getVoidTy(*ctx_), false); FunctionCallee loop_counter_output_callee = module_->getOrInsertFunction("loop_counter_output", output_fn_type); + FunctionCallee cu_taken_branch_counter_output_callee = module_->getOrInsertFunction("__dp_taken_branch_counter_output", output_fn_type); llvm::Function* main_fn = module_->getFunction("main"); if (main_fn) { for (auto it = llvm::inst_begin(main_fn); it != llvm::inst_end(main_fn); @@ -1896,6 +1954,7 @@ void DiscoPoP::dp_reduction_insert_functions() { if (llvm::isa(&(*it))) { llvm::IRBuilder<> ir_builder(&(*it)); ir_builder.CreateCall(loop_counter_output_callee); + ir_builder.CreateCall(cu_taken_branch_counter_output_callee); break; } } @@ -2087,7 +2146,7 @@ bool DiscoPoP::runOnModule(Module &M) { bool DiscoPoP::runOnFunction(Function &F) { if (DP_DEBUG) { - errs() << "pass DiscoPoP: run pass on function\n"; + errs() << "pass DiscoPoP: run pass on function " << funcName.str() << "\n"; } StringRef funcName = F.getName(); @@ -2173,8 +2232,9 @@ bool DiscoPoP::runOnFunction(Function &F) { createCUs(TopRegion, globalVariablesSet, CUVector, BBIDToCUIDsMap, root, LI); - fillCUVariables(TopRegion, globalVariablesSet, CUVector, BBIDToCUIDsMap); + createTakenBranchInstrumentation(TopRegion, BBIDToCUIDsMap); + fillCUVariables(TopRegion, globalVariablesSet, CUVector, BBIDToCUIDsMap); fillStartEndLineNumbers(root, LI); diff --git a/DiscoPoP/DiscoPoP.hpp b/DiscoPoP/DiscoPoP.hpp index f254ae964..d64f66bc3 100644 --- a/DiscoPoP/DiscoPoP.hpp +++ b/DiscoPoP/DiscoPoP.hpp @@ -301,6 +301,7 @@ namespace { FunctionCallee DpCallOrInvoke; FunctionCallee DpFuncEntry, DpFuncExit; FunctionCallee DpLoopEntry, DpLoopExit; + FunctionCallee DpTakenBranchCounterIncr; // Basic types Type *Void; @@ -378,6 +379,8 @@ namespace { map > &BBIDToCUIDsMap, Node *root, LoopInfo &LI); + void createTakenBranchInstrumentation(Region* TopRegion, map > &BBIDToCUIDsMap); + void fillCUVariables(Region *TopRegion, set &globalVariablesSet, vector &CUVector, map > &BBIDToCUIDsMap); diff --git a/rtlib/CMakeLists.txt b/rtlib/CMakeLists.txt index 5f1880b1f..7b03a51e7 100644 --- a/rtlib/CMakeLists.txt +++ b/rtlib/CMakeLists.txt @@ -15,6 +15,7 @@ set(DiscoPoP_SOURCES iFunctions.cpp signature.cpp loop_counter.cpp + cu_taken_branch_counter.cpp #../share/lib/DPUtils.cpp MemoryRegionTree.cpp ) diff --git a/rtlib/cu_taken_branch_counter.cpp b/rtlib/cu_taken_branch_counter.cpp new file mode 100644 index 000000000..ab4044d75 --- /dev/null +++ b/rtlib/cu_taken_branch_counter.cpp @@ -0,0 +1,54 @@ +/* + * This file is part of the DiscoPoP software (http://www.discopop.tu-darmstadt.de) + * + * Copyright (c) 2020, Technische Universitaet Darmstadt, Germany + * + * This software may be modified and distributed under the terms of + * the 3-Clause BSD License. See the LICENSE file in the package base + * directory for details. + * + */ + +#include "cu_taken_branch_counter.hpp" + + +static std::unordered_map cuec; + +extern "C" +{ + +void __dp_incr_taken_branch_counter(char* source_and_target, int cmp_res, int active_on) { + if(cmp_res == active_on){ + if(cuec.count(source_and_target) == 0){ + cuec[source_and_target] = 1; + } + else{ + cuec[source_and_target] = cuec[source_and_target] + 1; + } + } +} + +void __dp_taken_branch_counter_output() { + std::cout << "Outputting instrumentation results (taken branches)... "; + + std::ifstream ifile; + std::string line; + std::ofstream ofile; + + // output information about the loops + std::string tmp(getenv("DOT_DISCOPOP_PROFILER")); + tmp += "/cu_taken_branch_counter_output.txt"; + ofile.open(tmp.data()); + + for(auto pair : cuec){ + ofile << pair.first << ";" << pair.second << "\n"; + } + + ofile.close(); + + std::cout << "done" << std::endl; + + + +} +} \ No newline at end of file diff --git a/rtlib/cu_taken_branch_counter.hpp b/rtlib/cu_taken_branch_counter.hpp new file mode 100644 index 000000000..677c88d9c --- /dev/null +++ b/rtlib/cu_taken_branch_counter.hpp @@ -0,0 +1,18 @@ +/* + * This file is part of the DiscoPoP software (http://www.discopop.tu-darmstadt.de) + * + * Copyright (c) 2020, Technische Universitaet Darmstadt, Germany + * + * This software may be modified and distributed under the terms of + * the 3-Clause BSD License. See the LICENSE file in the package base + * directory for details. + * + */ + +#pragma once + +#include +#include +#include +#include +#include \ No newline at end of file From 3c905d6f2fdc87f71a7ecb4eb88051f2fcd929d8 Mon Sep 17 00:00:00 2001 From: Lukas Rothenberger Date: Fri, 19 Jan 2024 10:47:02 +0100 Subject: [PATCH 07/26] chore(profiler): cleanup --- DiscoPoP/DiscoPoP.cpp | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/DiscoPoP/DiscoPoP.cpp b/DiscoPoP/DiscoPoP.cpp index 074643803..00c7e2593 100644 --- a/DiscoPoP/DiscoPoP.cpp +++ b/DiscoPoP/DiscoPoP.cpp @@ -455,39 +455,25 @@ void DiscoPoP::populateGlobalVariablesSet(Region *TopRegion, void DiscoPoP::createTakenBranchInstrumentation(Region* TopRegion, map > &BBIDToCUIDsMap){ /* Create calls to count taken branches inbetween CUs during execution */ - - for (Region::block_iterator bb = TopRegion->block_begin(); bb != TopRegion->block_end(); ++bb) { for (BasicBlock::iterator instruction = (*bb)->begin(); instruction != (*bb)->end(); ++instruction) { if(isa(instruction)){ BranchInst* branchInst = cast(instruction); - branchInst->print(errs()); - errs() << "\n"; - // check for conditional branches, as unconditional ones can be ignored for counting if(! branchInst->isUnconditional()){ // branchInst is conditional - errs() << "\tCONDITIONAL\n"; - // prepare IRBuilder to insert instrumentation IRBuilder<> IRB(branchInst); - // get BBId and CU IDS of the source string source_BBID = bb->getName().str(); - errs() << "\tsourceBB: " << source_BBID << "\n"; - errs() << "\tnumSuccessors: " << branchInst->getNumSuccessors() << "\n"; for(auto source_cu : BBIDToCUIDsMap[source_BBID]){ - errs() << "\t\tsourceCU: " << source_cu->ID << "\n"; - // get BBIds of all targets for(int i = 0; i < branchInst->getNumSuccessors(); i++){ string successor_BBID = branchInst->getSuccessor(i)->getName().str(); - errs() << "\t\tsuccessorBB: " << successor_BBID << "\n"; // get CUs of all targets for(auto target_cu : BBIDToCUIDsMap[successor_BBID]){ - errs() << "\t\t\tsuccessorCU: " << target_cu->ID << "\n"; // add instrumentation prior to the branch instruction vector args; string source_and_target = source_cu->ID + ";" + target_cu->ID; @@ -2146,7 +2132,7 @@ bool DiscoPoP::runOnModule(Module &M) { bool DiscoPoP::runOnFunction(Function &F) { if (DP_DEBUG) { - errs() << "pass DiscoPoP: run pass on function " << funcName.str() << "\n"; + errs() << "pass DiscoPoP: run pass on function " << F.getName().str() << "\n"; } StringRef funcName = F.getName(); From e174dc1aab8874e91fdede684b5ace3a14022498 Mon Sep 17 00:00:00 2001 From: Lukas Rothenberger Date: Fri, 19 Jan 2024 11:10:38 +0100 Subject: [PATCH 08/26] feat(optimizer): preparations for graph pruning --- .../discopop_optimizer/PETParser/PETParser.py | 47 +++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/discopop_library/discopop_optimizer/PETParser/PETParser.py b/discopop_library/discopop_optimizer/PETParser/PETParser.py index 580b1c4c7..011058477 100644 --- a/discopop_library/discopop_optimizer/PETParser/PETParser.py +++ b/discopop_library/discopop_optimizer/PETParser/PETParser.py @@ -107,6 +107,14 @@ def parse(self) -> Tuple[nx.DiGraph, int]: # self.__new_parse_branched_sections() + if self.experiment.arguments.verbose: + print("pruning graphs based on taken branches") + self.__prune_branches() + print("\tDone.") + + import sys + sys.exit(0) + self.__flatten_function_graphs() # remove invalid functions @@ -139,6 +147,45 @@ def get_new_node_id(self) -> int: self.next_free_node_id += 1 return buffer + def __prune_branches(self): + """Prune branches based on the measured likelihood of execution""" + # load observed branching information + branch_counter_dict: Dict[str, Dict[str, int]] = dict() + with open("profiler/cu_taken_branch_counter_output.txt", "r") as f: + for line in f.readlines(): + line = line.replace("\n", "") + split_line = line.split(";") + source_cu_id = split_line[0] + target_cu_id = split_line[1] + counter = int(split_line[2]) + if source_cu_id not in branch_counter_dict: + branch_counter_dict[source_cu_id] = dict() + branch_counter_dict[source_cu_id][target_cu_id] = counter + print("Branch counter dict: ") + print(branch_counter_dict) + + # convert counters to likelihood + branch_likelihood_dict: Dict[str, Dict[str, float]] = dict() + for source_cu_id in branch_counter_dict: + total_counter = 0 + for target_cu_id in branch_counter_dict[source_cu_id]: + total_counter += branch_counter_dict[source_cu_id][target_cu_id] + branch_likelihood_dict[source_cu_id] = dict() + for target_cu_id in branch_counter_dict[source_cu_id]: + branch_likelihood_dict[source_cu_id][target_cu_id] = branch_counter_dict[source_cu_id][target_cu_id] / total_counter + + print("Branch likelihood dict:") + print(branch_likelihood_dict) + + # calculate total branch likelihood + + + for function in get_all_function_nodes(self.graph): + print("pruning function: ", cast(FunctionRoot, data_at(self.graph, function)).name) + + + + def __flatten_function_graphs(self): # TODO: remove deepcopies by storing data independently from the nodes From 58e0661102296204b20e324d3c3a885156cd9eba Mon Sep 17 00:00:00 2001 From: Lukas Rothenberger Date: Fri, 19 Jan 2024 11:48:21 +0100 Subject: [PATCH 09/26] node likelihood calculation --- .../discopop_optimizer/PETParser/PETParser.py | 60 ++++++++++++++++++- 1 file changed, 58 insertions(+), 2 deletions(-) diff --git a/discopop_library/discopop_optimizer/PETParser/PETParser.py b/discopop_library/discopop_optimizer/PETParser/PETParser.py index 011058477..9e27ab686 100644 --- a/discopop_library/discopop_optimizer/PETParser/PETParser.py +++ b/discopop_library/discopop_optimizer/PETParser/PETParser.py @@ -177,13 +177,69 @@ def __prune_branches(self): print("Branch likelihood dict:") print(branch_likelihood_dict) - # calculate total branch likelihood - + # fix branch likelihood, necessary due to different structure of BB vs. Optimization graph + for function in get_all_function_nodes(self.graph): print("pruning function: ", cast(FunctionRoot, data_at(self.graph, function)).name) + # calculate node likelihoods + node_likelihood_dict: Dict[int, float] = dict() + # initialize + queue: List[int] = [] + for node in get_all_nodes_in_function(self.graph, function): + if len(get_predecessors(self.graph, node)) == 0: + node_likelihood_dict[node] = 1 + queue += get_successors(self.graph, node) + # calculate node likelihoods by traversing the graph + while len(queue) > 0: + current_node = queue.pop(0) # BFS + if current_node in node_likelihood_dict: + continue + predecessors = get_predecessors(self.graph, current_node) + # if node likelihoods for all predecessors exist, calculate the likelihood for current_node + valid_target = True + for pred in predecessors: + if pred not in node_likelihood_dict: + valid_target = False + # add the missing predecessor to the queue + queue.append(pred) + break + if valid_target: + current_node_cu_id = data_at(self.graph, current_node).original_cu_id + # calculate likelihood for current_node + likelihood = 0 + for pred in predecessors: + pred_cu_id = data_at(self.graph, pred).original_cu_id + edge_likelihood = 1 # fallback if no data exists or not a branching point + if len(get_successors(self.graph, pred)) > 1: + if pred_cu_id in branch_likelihood_dict: + if current_node_cu_id in branch_likelihood_dict[pred_cu_id]: + edge_likelihood = branch_likelihood_dict[pred_cu_id][current_node_cu_id] + print("Set edge likelihood: ", pred_cu_id, current_node_cu_id, edge_likelihood) + else: + # branch was not executed + edge_likelihood = 0 + print("Set edge likelihood: ", pred_cu_id, current_node_cu_id, edge_likelihood) + + likelihood += node_likelihood_dict[pred] * edge_likelihood + node_likelihood_dict[current_node] = likelihood + print("Set likelihood: ", current_node, likelihood) + + # add successors to queue + queue += get_successors(self.graph, current_node) + + else: + # add current_node to the queue for another try + queue.append(current_node) + + + print("node likelihood:") + for key in sorted(node_likelihood_dict.keys()): + print(key, "->", node_likelihood_dict[key]) + show_function(self.graph, data_at(self.graph, function)) + # calculate best branches using upwards search using branch and node likelihoods def __flatten_function_graphs(self): From cd357b5b6105a8766b51bbe86d6664368febaff0 Mon Sep 17 00:00:00 2001 From: Lukas Rothenberger Date: Fri, 19 Jan 2024 12:17:18 +0100 Subject: [PATCH 10/26] pruning to most likely path --- .../discopop_optimizer/PETParser/PETParser.py | 41 ++++++++++++++++--- 1 file changed, 36 insertions(+), 5 deletions(-) diff --git a/discopop_library/discopop_optimizer/PETParser/PETParser.py b/discopop_library/discopop_optimizer/PETParser/PETParser.py index 9e27ab686..a64dd3251 100644 --- a/discopop_library/discopop_optimizer/PETParser/PETParser.py +++ b/discopop_library/discopop_optimizer/PETParser/PETParser.py @@ -215,15 +215,12 @@ def __prune_branches(self): if pred_cu_id in branch_likelihood_dict: if current_node_cu_id in branch_likelihood_dict[pred_cu_id]: edge_likelihood = branch_likelihood_dict[pred_cu_id][current_node_cu_id] - print("Set edge likelihood: ", pred_cu_id, current_node_cu_id, edge_likelihood) else: # branch was not executed edge_likelihood = 0 - print("Set edge likelihood: ", pred_cu_id, current_node_cu_id, edge_likelihood) likelihood += node_likelihood_dict[pred] * edge_likelihood node_likelihood_dict[current_node] = likelihood - print("Set likelihood: ", current_node, likelihood) # add successors to queue queue += get_successors(self.graph, current_node) @@ -235,11 +232,45 @@ def __prune_branches(self): print("node likelihood:") for key in sorted(node_likelihood_dict.keys()): - print(key, "->", node_likelihood_dict[key]) - show_function(self.graph, data_at(self.graph, function)) + print("DONE") # calculate best branches using upwards search using branch and node likelihoods + keep_nodes: List[int] = self.__identify_most_likely_path(node_likelihood_dict, function) + + # prune the graph + to_be_removed: List[int] = [n for n in get_all_nodes_in_function(self.graph, function) if n not in keep_nodes] + for n in to_be_removed: + self.graph.remove_node(n) + + + def __identify_most_likely_path(self, node_likelihood_dict: Dict[int, float], function: int) -> List[int]: + """Traverse graph upwards and return a list of the most likely nodes which constitute the most likely execution path.""" + keep_nodes: List[int] = [] + queue: List[int] = [] + # get path end points + for node in get_all_nodes_in_function(self.graph, function): + if len(get_successors(self.graph, node)) == 0: + queue.append(node) + + while len(queue) > 0: + current = queue.pop() + keep_nodes.append(current) + # identify most likely predecessor + predecessor_likelihoods: List[Tuple[int, float]] = [] + for pred in get_predecessors(self.graph, current): + predecessor_likelihoods.append((pred, node_likelihood_dict[pred])) + if len(predecessor_likelihoods) == 0: + # path entry reached + continue + most_likely_predecessor = sorted(predecessor_likelihoods, reverse=True, key=lambda x: x[1])[0][0] + # add most likely predecessor to the queue and thus keep_nodes + queue.append(most_likely_predecessor) + + return keep_nodes + + + def __flatten_function_graphs(self): From 3b837430160da6bb82ef94f5d6e0cb90997310f2 Mon Sep 17 00:00:00 2001 From: Lukas Rothenberger Date: Fri, 19 Jan 2024 12:24:22 +0100 Subject: [PATCH 11/26] minor fix --- .../discopop_optimizer/PETParser/PETParser.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/discopop_library/discopop_optimizer/PETParser/PETParser.py b/discopop_library/discopop_optimizer/PETParser/PETParser.py index a64dd3251..1eb7f8463 100644 --- a/discopop_library/discopop_optimizer/PETParser/PETParser.py +++ b/discopop_library/discopop_optimizer/PETParser/PETParser.py @@ -111,9 +111,6 @@ def parse(self) -> Tuple[nx.DiGraph, int]: print("pruning graphs based on taken branches") self.__prune_branches() print("\tDone.") - - import sys - sys.exit(0) self.__flatten_function_graphs() @@ -189,7 +186,7 @@ def __prune_branches(self): for node in get_all_nodes_in_function(self.graph, function): if len(get_predecessors(self.graph, node)) == 0: node_likelihood_dict[node] = 1 - queue += get_successors(self.graph, node) + queue.append(node) # calculate node likelihoods by traversing the graph while len(queue) > 0: current_node = queue.pop(0) # BFS @@ -202,7 +199,8 @@ def __prune_branches(self): if pred not in node_likelihood_dict: valid_target = False # add the missing predecessor to the queue - queue.append(pred) + if pred not in queue: + queue.append(pred) break if valid_target: current_node_cu_id = data_at(self.graph, current_node).original_cu_id @@ -223,7 +221,7 @@ def __prune_branches(self): node_likelihood_dict[current_node] = likelihood # add successors to queue - queue += get_successors(self.graph, current_node) + queue += [s for s in get_successors(self.graph, current_node) if s not in queue] else: # add current_node to the queue for another try From c4fe94826c57b341bcd6769919ea515a6b180f18 Mon Sep 17 00:00:00 2001 From: Lukas Rothenberger Date: Fri, 19 Jan 2024 13:28:55 +0100 Subject: [PATCH 12/26] minor reformatting --- .../discopop_optimizer/optimization/greedy.py | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/discopop_library/discopop_optimizer/optimization/greedy.py b/discopop_library/discopop_optimizer/optimization/greedy.py index 0ed1bd270..ebe96db5d 100644 --- a/discopop_library/discopop_optimizer/optimization/greedy.py +++ b/discopop_library/discopop_optimizer/optimization/greedy.py @@ -77,6 +77,7 @@ def greedy_search( for idx, function_node in enumerate(available_decisions): print("Greedy searching function: ", function_node.name, idx, "/", len(available_decisions)) for dcsi, decision_set in enumerate(available_decisions[function_node]): + print("\tDecision:", dcsi, "/", len(available_decisions[function_node])) local_results: List[Tuple[Dict[int, List[List[int]]], int, ContextObject]] = [] # prepare arguments for parallel cost calculation @@ -92,15 +93,17 @@ def greedy_search( local_decision_set[function_node.node_id][dcsi] = [decision] param_list.append(local_decision_set) - # # calculate costs in parallel - # with Pool(initializer=__initialize_cost_caluclation_worker, initargs=(experiment, arguments)) as pool: - # tmp_result = list( - # tqdm.tqdm(pool.imap_unordered(__get_score, param_list), total=len(param_list), disable=True) - # ) - # calculate costs - tmp_result: List[Tuple[Dict[int, List[List[int]]], int, ContextObject]] = [] - for param in param_list: - tmp_result.append(__get_score(param)) + if True: + # calculate costs in parallel + with Pool(initializer=__initialize_cost_caluclation_worker, initargs=(experiment, arguments)) as pool: + tmp_result = list( + tqdm.tqdm(pool.imap_unordered(__get_score, param_list), total=len(param_list), disable=True) + ) + else: + # calculate costs sequentially + tmp_result: List[Tuple[Dict[int, List[List[int]]], int, ContextObject]] = [] + for param in param_list: + tmp_result.append(__get_score(param)) for local_result in tmp_result: # remove invalid elements From 4521b9b0986ba8d8a72d54573050c7e0cc5c3f1e Mon Sep 17 00:00:00 2001 From: Lukas Rothenberger Date: Fri, 19 Jan 2024 14:04:04 +0100 Subject: [PATCH 13/26] chore: formatting and type fixes --- .../discopop_optimizer/PETParser/PETParser.py | 21 ++++++++----------- .../discopop_optimizer/__main__.py | 2 +- .../discopop_optimizer/optimization/greedy.py | 3 ++- .../suggestions/optimizers/loop_collapse.py | 12 +++++++---- 4 files changed, 20 insertions(+), 18 deletions(-) diff --git a/discopop_library/discopop_optimizer/PETParser/PETParser.py b/discopop_library/discopop_optimizer/PETParser/PETParser.py index 1eb7f8463..0f347494a 100644 --- a/discopop_library/discopop_optimizer/PETParser/PETParser.py +++ b/discopop_library/discopop_optimizer/PETParser/PETParser.py @@ -111,7 +111,7 @@ def parse(self) -> Tuple[nx.DiGraph, int]: print("pruning graphs based on taken branches") self.__prune_branches() print("\tDone.") - + self.__flatten_function_graphs() # remove invalid functions @@ -169,14 +169,15 @@ def __prune_branches(self): total_counter += branch_counter_dict[source_cu_id][target_cu_id] branch_likelihood_dict[source_cu_id] = dict() for target_cu_id in branch_counter_dict[source_cu_id]: - branch_likelihood_dict[source_cu_id][target_cu_id] = branch_counter_dict[source_cu_id][target_cu_id] / total_counter + branch_likelihood_dict[source_cu_id][target_cu_id] = ( + branch_counter_dict[source_cu_id][target_cu_id] / total_counter + ) print("Branch likelihood dict:") print(branch_likelihood_dict) # fix branch likelihood, necessary due to different structure of BB vs. Optimization graph - for function in get_all_function_nodes(self.graph): print("pruning function: ", cast(FunctionRoot, data_at(self.graph, function)).name) # calculate node likelihoods @@ -222,12 +223,11 @@ def __prune_branches(self): # add successors to queue queue += [s for s in get_successors(self.graph, current_node) if s not in queue] - + else: # add current_node to the queue for another try queue.append(current_node) - print("node likelihood:") for key in sorted(node_likelihood_dict.keys()): print(key, "->", node_likelihood_dict[key]) @@ -237,11 +237,12 @@ def __prune_branches(self): keep_nodes: List[int] = self.__identify_most_likely_path(node_likelihood_dict, function) # prune the graph - to_be_removed: List[int] = [n for n in get_all_nodes_in_function(self.graph, function) if n not in keep_nodes] + to_be_removed: List[int] = [ + n for n in get_all_nodes_in_function(self.graph, function) if n not in keep_nodes + ] for n in to_be_removed: self.graph.remove_node(n) - def __identify_most_likely_path(self, node_likelihood_dict: Dict[int, float], function: int) -> List[int]: """Traverse graph upwards and return a list of the most likely nodes which constitute the most likely execution path.""" keep_nodes: List[int] = [] @@ -250,7 +251,7 @@ def __identify_most_likely_path(self, node_likelihood_dict: Dict[int, float], fu for node in get_all_nodes_in_function(self.graph, function): if len(get_successors(self.graph, node)) == 0: queue.append(node) - + while len(queue) > 0: current = queue.pop() keep_nodes.append(current) @@ -267,10 +268,6 @@ def __identify_most_likely_path(self, node_likelihood_dict: Dict[int, float], fu return keep_nodes - - - - def __flatten_function_graphs(self): # TODO: remove deepcopies by storing data independently from the nodes diff --git a/discopop_library/discopop_optimizer/__main__.py b/discopop_library/discopop_optimizer/__main__.py index 804656905..f9a677de1 100644 --- a/discopop_library/discopop_optimizer/__main__.py +++ b/discopop_library/discopop_optimizer/__main__.py @@ -73,7 +73,7 @@ def parse_args() -> OptimizerArguments: check_called_function_for_nested_parallelism=arguments.check_called_function_for_nested_parallelism, profiling=arguments.profiling, greedy=arguments.greedy, - optimization=arguments.optimize + optimization=arguments.optimize, ) diff --git a/discopop_library/discopop_optimizer/optimization/greedy.py b/discopop_library/discopop_optimizer/optimization/greedy.py index ebe96db5d..e095bb240 100644 --- a/discopop_library/discopop_optimizer/optimization/greedy.py +++ b/discopop_library/discopop_optimizer/optimization/greedy.py @@ -93,6 +93,7 @@ def greedy_search( local_decision_set[function_node.node_id][dcsi] = [decision] param_list.append(local_decision_set) + tmp_result: List[Tuple[Dict[int, List[List[int]]], int, ContextObject]] = [] if True: # calculate costs in parallel with Pool(initializer=__initialize_cost_caluclation_worker, initargs=(experiment, arguments)) as pool: @@ -101,7 +102,7 @@ def greedy_search( ) else: # calculate costs sequentially - tmp_result: List[Tuple[Dict[int, List[List[int]]], int, ContextObject]] = [] + tmp_result = [] for param in param_list: tmp_result.append(__get_score(param)) diff --git a/discopop_library/discopop_optimizer/suggestions/optimizers/loop_collapse.py b/discopop_library/discopop_optimizer/suggestions/optimizers/loop_collapse.py index 4b238cd40..7c7c08fac 100644 --- a/discopop_library/discopop_optimizer/suggestions/optimizers/loop_collapse.py +++ b/discopop_library/discopop_optimizer/suggestions/optimizers/loop_collapse.py @@ -13,8 +13,8 @@ import networkx as nx # type: ignore -import tqdm -from build.lib.discopop_library.result_classes.OptimizerOutputPattern import OptimizerOutputPattern # type: ignore +import tqdm # type: ignore +from discopop_library.result_classes.OptimizerOutputPattern import OptimizerOutputPattern # type: ignore from discopop_explorer.pattern_detectors.do_all_detector import DoAllInfo # type: ignore from discopop_library.PatternIdManagement.unique_pattern_id import get_unique_pattern_id from discopop_library.discopop_optimizer.Variables.Experiment import Experiment @@ -243,8 +243,12 @@ def __collapse_loops_in_function(function_node_id): # todo: find a nicer solution to duplicating the patterns for each device mapping global_experiment.detection_result.patterns.do_all.append(pattern_info) # construct optimizer output pattern to represent the non-standalone pattern_info - optimizer_output_pattern = OptimizerOutputPattern(pattern_info._node, [new_node_id], global_experiment.get_system().get_host_device_id()) - optimizer_output_pattern.add_pattern(pattern_info.pattern_id, pattern_info.device_id, pattern_info.device_type) + optimizer_output_pattern = OptimizerOutputPattern( + pattern_info._node, [new_node_id], global_experiment.get_system().get_host_device_id() + ) + optimizer_output_pattern.add_pattern( + pattern_info.pattern_id, pattern_info.device_id, pattern_info.device_type + ) global_experiment.detection_result.patterns.optimizer_output.append(optimizer_output_pattern) print("REGISTERED PATTERN INFO: ", pattern_id, " for Device: ", data_at(global_graph, csrc).device_id) print(pattern_info) From 3ab9bc567e814ad9beaa90b192e70e1999694c83 Mon Sep 17 00:00:00 2001 From: Lukas Rothenberger Date: Tue, 23 Jan 2024 11:29:00 +0100 Subject: [PATCH 14/26] feat(optimizer, profiler): optional branch tracking --- DiscoPoP/DiscoPoP.cpp | 11 ++++++++--- .../discopop_optimizer/PETParser/PETParser.py | 8 +++++++- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/DiscoPoP/DiscoPoP.cpp b/DiscoPoP/DiscoPoP.cpp index 00c7e2593..9ea124d82 100644 --- a/DiscoPoP/DiscoPoP.cpp +++ b/DiscoPoP/DiscoPoP.cpp @@ -19,7 +19,8 @@ #define DP_VERBOSE false // prints warning messages #define DP_hybrid_DEBUG false #define DP_hybrid_SKIP false //todo add parameter to disable hybrid dependence analysis on demand. - +#define DP_BRANCH_TRACKING false // toggles the creation of instrumentation calls for tracking taken branches. + // Required by the graph pruning step of the DiscoPoP optimizer. using namespace llvm; using namespace std; @@ -1940,7 +1941,9 @@ void DiscoPoP::dp_reduction_insert_functions() { if (llvm::isa(&(*it))) { llvm::IRBuilder<> ir_builder(&(*it)); ir_builder.CreateCall(loop_counter_output_callee); - ir_builder.CreateCall(cu_taken_branch_counter_output_callee); + if(DP_BRANCH_TRACKING){ + ir_builder.CreateCall(cu_taken_branch_counter_output_callee); + } break; } } @@ -2218,7 +2221,9 @@ bool DiscoPoP::runOnFunction(Function &F) { createCUs(TopRegion, globalVariablesSet, CUVector, BBIDToCUIDsMap, root, LI); - createTakenBranchInstrumentation(TopRegion, BBIDToCUIDsMap); + if(DP_BRANCH_TRACKING){ + createTakenBranchInstrumentation(TopRegion, BBIDToCUIDsMap); + } fillCUVariables(TopRegion, globalVariablesSet, CUVector, BBIDToCUIDsMap); diff --git a/discopop_library/discopop_optimizer/PETParser/PETParser.py b/discopop_library/discopop_optimizer/PETParser/PETParser.py index 0f347494a..a61195f80 100644 --- a/discopop_library/discopop_optimizer/PETParser/PETParser.py +++ b/discopop_library/discopop_optimizer/PETParser/PETParser.py @@ -110,7 +110,8 @@ def parse(self) -> Tuple[nx.DiGraph, int]: if self.experiment.arguments.verbose: print("pruning graphs based on taken branches") self.__prune_branches() - print("\tDone.") + if self.experiment.arguments.verbose: + print("\tDone.") self.__flatten_function_graphs() @@ -146,6 +147,11 @@ def get_new_node_id(self) -> int: def __prune_branches(self): """Prune branches based on the measured likelihood of execution""" + # check if branch information exists. If not, skip this step. + if not os.path.exists("profiler/cu_taken_branch_counter_output.txt"): + if self.experiment.arguments.verbose: + print("\tNo information on taken branches found. Skipping.") + return # load observed branching information branch_counter_dict: Dict[str, Dict[str, int]] = dict() with open("profiler/cu_taken_branch_counter_output.txt", "r") as f: From 08e15c7821eca6b255b4a71d425912d6ce396631 Mon Sep 17 00:00:00 2001 From: Lukas Rothenberger Date: Tue, 23 Jan 2024 11:35:20 +0100 Subject: [PATCH 15/26] fix(profiler): enable branch tracking --- DiscoPoP/DiscoPoP.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DiscoPoP/DiscoPoP.cpp b/DiscoPoP/DiscoPoP.cpp index 9ea124d82..139380c8b 100644 --- a/DiscoPoP/DiscoPoP.cpp +++ b/DiscoPoP/DiscoPoP.cpp @@ -19,7 +19,7 @@ #define DP_VERBOSE false // prints warning messages #define DP_hybrid_DEBUG false #define DP_hybrid_SKIP false //todo add parameter to disable hybrid dependence analysis on demand. -#define DP_BRANCH_TRACKING false // toggles the creation of instrumentation calls for tracking taken branches. +#define DP_BRANCH_TRACKING true // toggles the creation of instrumentation calls for tracking taken branches. // Required by the graph pruning step of the DiscoPoP optimizer. using namespace llvm; From d4ae32eadac61ad73f34b261819103070ecd7156 Mon Sep 17 00:00:00 2001 From: Lukas Rothenberger Date: Tue, 23 Jan 2024 14:32:44 +0100 Subject: [PATCH 16/26] feat(optimizer): add configurable pruning levels --- .../discopop_optimizer/OptimizerArguments.py | 5 +++++ .../discopop_optimizer/PETParser/PETParser.py | 12 ++++++++++-- discopop_library/discopop_optimizer/__main__.py | 5 ++++- 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/discopop_library/discopop_optimizer/OptimizerArguments.py b/discopop_library/discopop_optimizer/OptimizerArguments.py index eea8cab06..9ffa86247 100644 --- a/discopop_library/discopop_optimizer/OptimizerArguments.py +++ b/discopop_library/discopop_optimizer/OptimizerArguments.py @@ -27,6 +27,7 @@ class OptimizerArguments(object): profiling: bool greedy: bool optimization: bool + pruning_level: int def __post_init__(self): # fix correct optimization method @@ -44,4 +45,8 @@ def __validate(self): if self.reduction_microbench_file is not "None": if not os.path.isfile(self.reduction_microbench_file): raise FileNotFoundError(f"Microbenchmark file not found: {self.reduction_microbench_file}") + + # check pruning level values + if self.pruning_level not in [0, 1]: + raise ValueError("Unsupported pruning level: ", self.pruning_level) pass diff --git a/discopop_library/discopop_optimizer/PETParser/PETParser.py b/discopop_library/discopop_optimizer/PETParser/PETParser.py index a61195f80..35409efe1 100644 --- a/discopop_library/discopop_optimizer/PETParser/PETParser.py +++ b/discopop_library/discopop_optimizer/PETParser/PETParser.py @@ -148,6 +148,10 @@ def get_new_node_id(self) -> int: def __prune_branches(self): """Prune branches based on the measured likelihood of execution""" # check if branch information exists. If not, skip this step. + if self.experiment.arguments.pruning_level == 0: + if self.experiment.arguments.verbose: + print("\tPruning level 0. Skipping.") + return if not os.path.exists("profiler/cu_taken_branch_counter_output.txt"): if self.experiment.arguments.verbose: print("\tNo information on taken branches found. Skipping.") @@ -239,8 +243,12 @@ def __prune_branches(self): print(key, "->", node_likelihood_dict[key]) print("DONE") - # calculate best branches using upwards search using branch and node likelihoods - keep_nodes: List[int] = self.__identify_most_likely_path(node_likelihood_dict, function) + keep_nodes: List[int] = [] + if self.experiment.arguments.pruning_level == 1: + # calculate best branches using upwards search using branch and node likelihoods + keep_nodes = self.__identify_most_likely_path(node_likelihood_dict, function) + else: + raise ValueError("Unknown pruning level: ", self.experiment.arguments.pruning_level) # prune the graph to_be_removed: List[int] = [ diff --git a/discopop_library/discopop_optimizer/__main__.py b/discopop_library/discopop_optimizer/__main__.py index f9a677de1..5d9b1b362 100644 --- a/discopop_library/discopop_optimizer/__main__.py +++ b/discopop_library/discopop_optimizer/__main__.py @@ -31,6 +31,8 @@ def parse_args() -> OptimizerArguments: help="Enable greedy search. (Default)") parser.add_argument("-o", "--optimize", action="store_true", help="Enable optimization.") + parser.add_argument("-p", "--pruning-level", type=int, default=0, + help="Program path pruning aggressiveness. 0: no pruning. 1: prune to most likely path.") parser.add_argument( "--doall-microbench-file", type=str, default="None", help="Do-All microbenchmark results" @@ -43,7 +45,7 @@ def parse_args() -> OptimizerArguments: "--system-configuration", type=str, default="optimizer/system_configuration.json", help="System configuration file" ) - parser.add_argument("-p", "--profiling", action="store_true", + parser.add_argument("--profiling", action="store_true", help="Enable profiling.") # EXPERIMENTAL FLAGS: experimental_parser.add_argument("--allow-nested-parallelism", action="store_true", @@ -74,6 +76,7 @@ def parse_args() -> OptimizerArguments: profiling=arguments.profiling, greedy=arguments.greedy, optimization=arguments.optimize, + pruning_level=arguments.pruning_level, ) From 76ac11f115b6a67efb2b542df17532ea8f039e47 Mon Sep 17 00:00:00 2001 From: Lukas Rothenberger Date: Tue, 23 Jan 2024 14:40:43 +0100 Subject: [PATCH 17/26] feat(optimizer): preparations for pruning level 2 --- discopop_library/discopop_optimizer/OptimizerArguments.py | 2 +- discopop_library/discopop_optimizer/__main__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/discopop_library/discopop_optimizer/OptimizerArguments.py b/discopop_library/discopop_optimizer/OptimizerArguments.py index 9ffa86247..fc4bb01f3 100644 --- a/discopop_library/discopop_optimizer/OptimizerArguments.py +++ b/discopop_library/discopop_optimizer/OptimizerArguments.py @@ -47,6 +47,6 @@ def __validate(self): raise FileNotFoundError(f"Microbenchmark file not found: {self.reduction_microbench_file}") # check pruning level values - if self.pruning_level not in [0, 1]: + if self.pruning_level not in [0, 1, 2]: raise ValueError("Unsupported pruning level: ", self.pruning_level) pass diff --git a/discopop_library/discopop_optimizer/__main__.py b/discopop_library/discopop_optimizer/__main__.py index 5d9b1b362..fa0e6bd53 100644 --- a/discopop_library/discopop_optimizer/__main__.py +++ b/discopop_library/discopop_optimizer/__main__.py @@ -32,7 +32,7 @@ def parse_args() -> OptimizerArguments: parser.add_argument("-o", "--optimize", action="store_true", help="Enable optimization.") parser.add_argument("-p", "--pruning-level", type=int, default=0, - help="Program path pruning aggressiveness. 0: no pruning. 1: prune to most likely path.") + help="Program path pruning aggressiveness. 0: no pruning. 1: prune to most likely path. 2: prune to paths that cover 80%% of observed executions.") parser.add_argument( "--doall-microbench-file", type=str, default="None", help="Do-All microbenchmark results" From 9cdd77f7d9b55111d643fc78b1101e860c212853 Mon Sep 17 00:00:00 2001 From: Lukas Rothenberger Date: Tue, 23 Jan 2024 15:41:51 +0100 Subject: [PATCH 18/26] feat(optimizer): pruning level 2 --- .../discopop_optimizer/PETParser/PETParser.py | 52 +++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/discopop_library/discopop_optimizer/PETParser/PETParser.py b/discopop_library/discopop_optimizer/PETParser/PETParser.py index 35409efe1..c5cd00545 100644 --- a/discopop_library/discopop_optimizer/PETParser/PETParser.py +++ b/discopop_library/discopop_optimizer/PETParser/PETParser.py @@ -247,6 +247,9 @@ def __prune_branches(self): if self.experiment.arguments.pruning_level == 1: # calculate best branches using upwards search using branch and node likelihoods keep_nodes = self.__identify_most_likely_path(node_likelihood_dict, function) + elif self.experiment.arguments.pruning_level == 2: + # calculate branches which are executed in 80% of the observed cases + keep_nodes = self.__identify_most_likely_paths_80_percent_cutoff(branch_likelihood_dict, function) else: raise ValueError("Unknown pruning level: ", self.experiment.arguments.pruning_level) @@ -257,6 +260,55 @@ def __prune_branches(self): for n in to_be_removed: self.graph.remove_node(n) + show_function(self.graph, data_at(self.graph, function), show_dataflow=False, show_mutex_edges=False) + + def __identify_most_likely_paths_80_percent_cutoff(self, branch_likelihood_dict: Dict[str, Dict[str, float]], function: int) -> List[int]: + """Traverse graph downwards and return a list of the nodes visited if all branches were taken that constitute a sum of at least 80% of the observed cases.""" + keep_nodes: List[int] = [] + queue: List[int] = [] + # get path entries points + for node in get_all_nodes_in_function(self.graph, function): + if len(get_predecessors(self.graph, node)) == 0: + queue.append(node) + + while len(queue) > 0: + current = queue.pop() + current_cu_id = data_at(self.graph, current).original_cu_id + keep_nodes.append(current) + + # get successors and their cu ids + successors = get_successors(self.graph, current) + if len(successors) < 2: + queue += [s for s in successors if s not in queue and s not in keep_nodes] + continue + successor_cus = [(s, data_at(self.graph, s).original_cu_id) for s in successors] + + # get likelihoods for transitions to successors + if current_cu_id not in branch_likelihood_dict: + warnings.warn("No branch counters available for path split at CU Node: " + current_cu_id + ". Fallback: Preserving all successors.") + # fallback: preserve all successors + queue += [s for s in successors if s not in queue and s not in keep_nodes] + continue + else: + successor_likelihood = [] + for succ, succ_cu_id in successor_cus: + if succ_cu_id not in branch_likelihood_dict[current_cu_id]: + successor_likelihood.append((succ, succ_cu_id, 0.0)) + else: + successor_likelihood.append((succ, succ_cu_id, branch_likelihood_dict[current_cu_id][succ_cu_id])) + + # select successors until total probability is > THRESHOLD + threshold = 0.8 + total_probability = 0 + for succ, succ_cu_id, succ_prob in sorted(successor_likelihood, reverse=True, key=lambda x: x[2]): + if total_probability < threshold: + queue.append(succ) + total_probability += succ_prob + else: + break + + return keep_nodes + def __identify_most_likely_path(self, node_likelihood_dict: Dict[int, float], function: int) -> List[int]: """Traverse graph upwards and return a list of the most likely nodes which constitute the most likely execution path.""" keep_nodes: List[int] = [] From 47f787950c4ba40df3e24fd9e43d7676731860fc Mon Sep 17 00:00:00 2001 From: Lukas Rothenberger Date: Tue, 23 Jan 2024 15:44:45 +0100 Subject: [PATCH 19/26] chore: cleanup --- discopop_library/discopop_optimizer/PETParser/PETParser.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/discopop_library/discopop_optimizer/PETParser/PETParser.py b/discopop_library/discopop_optimizer/PETParser/PETParser.py index c5cd00545..0819bc282 100644 --- a/discopop_library/discopop_optimizer/PETParser/PETParser.py +++ b/discopop_library/discopop_optimizer/PETParser/PETParser.py @@ -260,8 +260,6 @@ def __prune_branches(self): for n in to_be_removed: self.graph.remove_node(n) - show_function(self.graph, data_at(self.graph, function), show_dataflow=False, show_mutex_edges=False) - def __identify_most_likely_paths_80_percent_cutoff(self, branch_likelihood_dict: Dict[str, Dict[str, float]], function: int) -> List[int]: """Traverse graph downwards and return a list of the nodes visited if all branches were taken that constitute a sum of at least 80% of the observed cases.""" keep_nodes: List[int] = [] From c0da1faf494f4f8397cf47ca206287195f27b6b5 Mon Sep 17 00:00:00 2001 From: Lukas Rothenberger Date: Tue, 23 Jan 2024 15:57:08 +0100 Subject: [PATCH 20/26] feat(optimizer)[-p2]: formatted verbose output --- .../discopop_optimizer/PETParser/PETParser.py | 25 ++++++++++++++----- 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/discopop_library/discopop_optimizer/PETParser/PETParser.py b/discopop_library/discopop_optimizer/PETParser/PETParser.py index 0819bc282..0cb939cc4 100644 --- a/discopop_library/discopop_optimizer/PETParser/PETParser.py +++ b/discopop_library/discopop_optimizer/PETParser/PETParser.py @@ -190,6 +190,15 @@ def __prune_branches(self): for function in get_all_function_nodes(self.graph): print("pruning function: ", cast(FunctionRoot, data_at(self.graph, function)).name) + verbose_print_pruning_statistics = False + if self.experiment.arguments.verbose: + ct = 0 + for node in get_all_nodes_in_function(self.graph, function): + if len(get_successors(self.graph, node)) > 1: + ct += 1 + if ct > 0: + verbose_print_pruning_statistics = True + print("\tpath splits before pruning: ", ct) # calculate node likelihoods node_likelihood_dict: Dict[int, float] = dict() # initialize @@ -238,11 +247,6 @@ def __prune_branches(self): # add current_node to the queue for another try queue.append(current_node) - print("node likelihood:") - for key in sorted(node_likelihood_dict.keys()): - print(key, "->", node_likelihood_dict[key]) - print("DONE") - keep_nodes: List[int] = [] if self.experiment.arguments.pruning_level == 1: # calculate best branches using upwards search using branch and node likelihoods @@ -254,11 +258,20 @@ def __prune_branches(self): raise ValueError("Unknown pruning level: ", self.experiment.arguments.pruning_level) # prune the graph + function_nodes = get_all_nodes_in_function(self.graph, function) to_be_removed: List[int] = [ - n for n in get_all_nodes_in_function(self.graph, function) if n not in keep_nodes + n for n in function_nodes if n not in keep_nodes ] for n in to_be_removed: self.graph.remove_node(n) + + if self.experiment.arguments.verbose and verbose_print_pruning_statistics: + ct = 0 + for node in get_all_nodes_in_function(self.graph, function): + if len(get_successors(self.graph, node)) > 1: + ct += 1 + print("\tpath splits after pruning: ", ct) + def __identify_most_likely_paths_80_percent_cutoff(self, branch_likelihood_dict: Dict[str, Dict[str, float]], function: int) -> List[int]: """Traverse graph downwards and return a list of the nodes visited if all branches were taken that constitute a sum of at least 80% of the observed cases.""" From af80e6ca611cd32b0ab59adcf1d2b39df21443d4 Mon Sep 17 00:00:00 2001 From: Lukas Rothenberger Date: Tue, 23 Jan 2024 15:58:20 +0100 Subject: [PATCH 21/26] doc(optimizer): cleanup help string --- discopop_library/discopop_optimizer/__main__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/discopop_library/discopop_optimizer/__main__.py b/discopop_library/discopop_optimizer/__main__.py index fa0e6bd53..7007c0dec 100644 --- a/discopop_library/discopop_optimizer/__main__.py +++ b/discopop_library/discopop_optimizer/__main__.py @@ -32,7 +32,7 @@ def parse_args() -> OptimizerArguments: parser.add_argument("-o", "--optimize", action="store_true", help="Enable optimization.") parser.add_argument("-p", "--pruning-level", type=int, default=0, - help="Program path pruning aggressiveness. 0: no pruning. 1: prune to most likely path. 2: prune to paths that cover 80%% of observed executions.") + help="Program path pruning aggressiveness. 0: no pruning. 1: prune to most likely path. 2: prune to paths that cover 80%% of observed decisions per path split.") parser.add_argument( "--doall-microbench-file", type=str, default="None", help="Do-All microbenchmark results" From 321e9d3490a70c53b97a62bea63da7c8a0283fd8 Mon Sep 17 00:00:00 2001 From: Lukas Rothenberger Date: Wed, 24 Jan 2024 08:46:40 +0100 Subject: [PATCH 22/26] chore(optimizer): formatting --- .../discopop_optimizer/PETParser/PETParser.py | 23 +++++++++++-------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/discopop_library/discopop_optimizer/PETParser/PETParser.py b/discopop_library/discopop_optimizer/PETParser/PETParser.py index 0cb939cc4..810eda1e6 100644 --- a/discopop_library/discopop_optimizer/PETParser/PETParser.py +++ b/discopop_library/discopop_optimizer/PETParser/PETParser.py @@ -259,12 +259,10 @@ def __prune_branches(self): # prune the graph function_nodes = get_all_nodes_in_function(self.graph, function) - to_be_removed: List[int] = [ - n for n in function_nodes if n not in keep_nodes - ] + to_be_removed: List[int] = [n for n in function_nodes if n not in keep_nodes] for n in to_be_removed: self.graph.remove_node(n) - + if self.experiment.arguments.verbose and verbose_print_pruning_statistics: ct = 0 for node in get_all_nodes_in_function(self.graph, function): @@ -272,8 +270,9 @@ def __prune_branches(self): ct += 1 print("\tpath splits after pruning: ", ct) - - def __identify_most_likely_paths_80_percent_cutoff(self, branch_likelihood_dict: Dict[str, Dict[str, float]], function: int) -> List[int]: + def __identify_most_likely_paths_80_percent_cutoff( + self, branch_likelihood_dict: Dict[str, Dict[str, float]], function: int + ) -> List[int]: """Traverse graph downwards and return a list of the nodes visited if all branches were taken that constitute a sum of at least 80% of the observed cases.""" keep_nodes: List[int] = [] queue: List[int] = [] @@ -296,7 +295,11 @@ def __identify_most_likely_paths_80_percent_cutoff(self, branch_likelihood_dict: # get likelihoods for transitions to successors if current_cu_id not in branch_likelihood_dict: - warnings.warn("No branch counters available for path split at CU Node: " + current_cu_id + ". Fallback: Preserving all successors.") + warnings.warn( + "No branch counters available for path split at CU Node: " + + str(current_cu_id) + + ". Fallback: Preserving all successors." + ) # fallback: preserve all successors queue += [s for s in successors if s not in queue and s not in keep_nodes] continue @@ -306,11 +309,13 @@ def __identify_most_likely_paths_80_percent_cutoff(self, branch_likelihood_dict: if succ_cu_id not in branch_likelihood_dict[current_cu_id]: successor_likelihood.append((succ, succ_cu_id, 0.0)) else: - successor_likelihood.append((succ, succ_cu_id, branch_likelihood_dict[current_cu_id][succ_cu_id])) + successor_likelihood.append( + (succ, succ_cu_id, branch_likelihood_dict[current_cu_id][succ_cu_id]) + ) # select successors until total probability is > THRESHOLD threshold = 0.8 - total_probability = 0 + total_probability = 0.0 for succ, succ_cu_id, succ_prob in sorted(successor_likelihood, reverse=True, key=lambda x: x[2]): if total_probability < threshold: queue.append(succ) From c13792becc1dc1ca80dea0384f39f5a95d88f35b Mon Sep 17 00:00:00 2001 From: Lukas Rothenberger Date: Wed, 24 Jan 2024 11:01:00 +0100 Subject: [PATCH 23/26] fix(CI): profiler test --- .github/workflows/tests/profiler.sh | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/.github/workflows/tests/profiler.sh b/.github/workflows/tests/profiler.sh index ec3f769d3..998a28afb 100755 --- a/.github/workflows/tests/profiler.sh +++ b/.github/workflows/tests/profiler.sh @@ -20,11 +20,14 @@ TARGET_NAME=$1 PASS_NAME=$2 function test_discopopPass { - cp ${DISCOPOP_SRC}/scripts/dp-fmap . - ./dp-fmap - clang++ -g -c -O0 -S -emit-llvm -fno-discard-value-names "$1" -o out.ll || return 1 - opt-11 -S -load=${DISCOPOP_INSTALL}/libi/LLVMDiscoPoP.so --DiscoPoP out.ll -o out_dp.ll || return 1 - clang++ out_dp.ll -o out_prof -L${DISCOPOP_INSTALL}/rtlib -lDiscoPoP_RT -lpthread || return 1 + ${DISCOPOP_INSTALL}/scripts/CXX_wrapper.sh "$1" -o out_prof + + +# cp ${DISCOPOP_SRC}/scripts/dp-fmap . +# ./dp-fmap +# clang++ -g -c -O0 -S -emit-llvm -fno-discard-value-names "$1" -o out.ll || return 1 +# opt-11 -S -load=${DISCOPOP_INSTALL}/libi/LLVMDiscoPoP.so --DiscoPoP out.ll -o out_dp.ll || return 1 +# clang++ out_dp.ll -o out_prof -L${DISCOPOP_INSTALL}/rtlib -lDiscoPoP_RT -lpthread || return 1 ./out_prof || return 1 } From 0b40562beb0c7c3a60e4fe82f904d9fc488f6fd1 Mon Sep 17 00:00:00 2001 From: Lukas Rothenberger Date: Wed, 24 Jan 2024 11:45:45 +0100 Subject: [PATCH 24/26] chore: formatting --- discopop_library/discopop_optimizer/OptimizerArguments.py | 2 +- discopop_library/discopop_optimizer/__main__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/discopop_library/discopop_optimizer/OptimizerArguments.py b/discopop_library/discopop_optimizer/OptimizerArguments.py index 1c36e1899..30f546343 100644 --- a/discopop_library/discopop_optimizer/OptimizerArguments.py +++ b/discopop_library/discopop_optimizer/OptimizerArguments.py @@ -42,7 +42,7 @@ def __validate(self): # check pruning level values if self.pruning_level not in [0, 1, 2]: raise ValueError("Unsupported pruning level: ", self.pruning_level) - + # check optimization level if self.optimization_level not in [0, 1, 2, 3]: raise ValueError("Unknown optimization level requested: ", self.optimization_level) diff --git a/discopop_library/discopop_optimizer/__main__.py b/discopop_library/discopop_optimizer/__main__.py index deb199147..cf1bb4cb3 100644 --- a/discopop_library/discopop_optimizer/__main__.py +++ b/discopop_library/discopop_optimizer/__main__.py @@ -67,7 +67,7 @@ def parse_args() -> OptimizerArguments: plot=arguments.plot, system_configuration_path=arguments.system_configuration, check_called_function_for_nested_parallelism=arguments.check_called_function_for_nested_parallelism, - profiling=arguments.profiling, + profiling=arguments.profiling, pruning_level=arguments.pruning_level, optimization_level=arguments.o, optimization_level_2_parameters=arguments.opt_2_params, From af9a3c68c96b28e877f7024b547487b3fc662497 Mon Sep 17 00:00:00 2001 From: Lukas Rothenberger Date: Wed, 24 Jan 2024 11:49:10 +0100 Subject: [PATCH 25/26] fix(optimizer)[pruning]: switched level 1 and 2 due to agressiveness --- .../discopop_optimizer/PETParser/PETParser.py | 8 ++++---- discopop_library/discopop_optimizer/__main__.py | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/discopop_library/discopop_optimizer/PETParser/PETParser.py b/discopop_library/discopop_optimizer/PETParser/PETParser.py index 810eda1e6..6d629e4c6 100644 --- a/discopop_library/discopop_optimizer/PETParser/PETParser.py +++ b/discopop_library/discopop_optimizer/PETParser/PETParser.py @@ -108,7 +108,7 @@ def parse(self) -> Tuple[nx.DiGraph, int]: # self.__new_parse_branched_sections() if self.experiment.arguments.verbose: - print("pruning graphs based on taken branches") + print("pruning graphs based on taken branches. Pruning level: ", self.experiment.arguments.pruning_level) self.__prune_branches() if self.experiment.arguments.verbose: print("\tDone.") @@ -249,11 +249,11 @@ def __prune_branches(self): keep_nodes: List[int] = [] if self.experiment.arguments.pruning_level == 1: - # calculate best branches using upwards search using branch and node likelihoods - keep_nodes = self.__identify_most_likely_path(node_likelihood_dict, function) - elif self.experiment.arguments.pruning_level == 2: # calculate branches which are executed in 80% of the observed cases keep_nodes = self.__identify_most_likely_paths_80_percent_cutoff(branch_likelihood_dict, function) + elif self.experiment.arguments.pruning_level == 2: + # calculate best branches using upwards search using branch and node likelihoods + keep_nodes = self.__identify_most_likely_path(node_likelihood_dict, function) else: raise ValueError("Unknown pruning level: ", self.experiment.arguments.pruning_level) diff --git a/discopop_library/discopop_optimizer/__main__.py b/discopop_library/discopop_optimizer/__main__.py index cf1bb4cb3..22aa7ffff 100644 --- a/discopop_library/discopop_optimizer/__main__.py +++ b/discopop_library/discopop_optimizer/__main__.py @@ -23,8 +23,8 @@ def parse_args() -> OptimizerArguments: # fmt: off parser.add_argument("-v", "--verbose", action="store_true", help="Enable verbose output.") - parser.add_argument("-p", "--pruning-level", type=int, default=0, - help="Program path pruning aggressiveness. 0: no pruning. 1: prune to most likely path. 2: prune to paths that cover 80%% of observed decisions per path split.") + parser.add_argument("-p", type=int, default=0, + help="Program path pruning aggressiveness. 0: no pruning. 1: prune to paths that cover 80%% of observed decisions per path split. 2: prune to most likely path.") parser.add_argument("-o", type=int, default=0, help="Optimization level: 0 -> no optimization. 1 -> greedy. 2 -> evolutionary. 3 -> exhaustive") parser.add_argument("-opt-2-params", type=str, default=None, nargs=2, metavar=("population_size", "generations"), help="Configure parameters of the evolutionary optimization (-o2). Default: 50 5") From 8eabcfc085b95063470d46dce91653bde5fad1b0 Mon Sep 17 00:00:00 2001 From: Lukas Rothenberger Date: Wed, 24 Jan 2024 11:53:10 +0100 Subject: [PATCH 26/26] fix(optimizer): incorrect argument name --- discopop_library/discopop_optimizer/__main__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/discopop_library/discopop_optimizer/__main__.py b/discopop_library/discopop_optimizer/__main__.py index 22aa7ffff..5e3b537a2 100644 --- a/discopop_library/discopop_optimizer/__main__.py +++ b/discopop_library/discopop_optimizer/__main__.py @@ -68,7 +68,7 @@ def parse_args() -> OptimizerArguments: system_configuration_path=arguments.system_configuration, check_called_function_for_nested_parallelism=arguments.check_called_function_for_nested_parallelism, profiling=arguments.profiling, - pruning_level=arguments.pruning_level, + pruning_level=arguments.p, optimization_level=arguments.o, optimization_level_2_parameters=arguments.opt_2_params, )