From 8335b94980def3145666899c96406c6086098986 Mon Sep 17 00:00:00 2001
From: Lukas Rothenberger <lukas.rothenberger@tu-darmstadt.de>
Date: Wed, 17 Jan 2024 13:55:33 +0100
Subject: [PATCH 01/26] minor improvements

---
 .../discopop_optimizer/PETParser/PETParser.py       | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/discopop_library/discopop_optimizer/PETParser/PETParser.py b/discopop_library/discopop_optimizer/PETParser/PETParser.py
index 05b4b4b42..d5e22caf0 100644
--- a/discopop_library/discopop_optimizer/PETParser/PETParser.py
+++ b/discopop_library/discopop_optimizer/PETParser/PETParser.py
@@ -109,6 +109,9 @@ def parse(self) -> Tuple[nx.DiGraph, int]:
 
         self.__flatten_function_graphs()
 
+        # remove invalid functions
+        self.__remove_invalid_functions()
+
         convert_temporary_edges(self.graph)
         if self.experiment.arguments.verbose:
             print("converted temporary edges")
@@ -119,6 +122,8 @@ def parse(self) -> Tuple[nx.DiGraph, int]:
         if self.experiment.arguments.verbose:
             print("calculated data flow")
 
+        if self.experiment.arguments.verbose:
+            print("Propagating read/write information...")
         self.__propagate_reads_and_writes()
         if self.experiment.arguments.verbose:
             print("Propagated read/write information")
@@ -1070,13 +1075,19 @@ def inlined_data_flow_calculation(current_node, current_last_writes):
             return current_last_writes
 
         # Note: at this point in time, the graph MUST NOT have branched sections
-        for function_node in get_all_function_nodes(self.graph):
+        all_function_nodes = get_all_function_nodes(self.graph)
+        for idx, function_node in enumerate(all_function_nodes):
+            if self.experiment.arguments.verbose:
+                print("Calculating dataflow for function: ", data_at(self.graph, function_node).name, idx,"/",len(all_function_nodes))
             if (
                 function_node not in self.experiment.hotspot_function_node_ids
                 and len(self.experiment.hotspot_function_node_ids) > 0
             ):
                 print("SKIPPING NON-HOTSPOT FUNCTION: ", data_at(self.graph, function_node).name)
                 continue
+            if function_node in self.invalid_functions:
+                print("SKIPPING INVALID FUNCTION: ", data_at(self.graph, function_node).name)
+                continue
 
             try:
                 last_writes: Dict[MemoryRegion, int] = dict()

From c3faadbcc6393687a01ddadceccdda7049df66f0 Mon Sep 17 00:00:00 2001
From: Lukas Rothenberger <lukas.rothenberger@tu-darmstadt.de>
Date: Wed, 17 Jan 2024 14:30:29 +0100
Subject: [PATCH 02/26] fix: extended recusrion limits

---
 discopop_library/discopop_optimizer/CostModels/utilities.py  | 3 +++
 .../utilities/optimization/LocalOptimization/TopDown.py      | 5 +++++
 2 files changed, 8 insertions(+)

diff --git a/discopop_library/discopop_optimizer/CostModels/utilities.py b/discopop_library/discopop_optimizer/CostModels/utilities.py
index 5703f974d..588e52976 100644
--- a/discopop_library/discopop_optimizer/CostModels/utilities.py
+++ b/discopop_library/discopop_optimizer/CostModels/utilities.py
@@ -61,6 +61,8 @@ def get_performance_models_for_functions(
 
                 # start the collection at the first child of the function
                 for child_id in get_children(graph, node_id):
+                    import sys
+                    sys.setrecursionlimit(100000)
                     performance_models[node_data] = get_node_performance_models(
                         experiment,
                         graph,
@@ -70,6 +72,7 @@ def get_performance_models_for_functions(
                         restrict_to_decisions=restrict_to_decisions,
                         allow_sequential=True,
                     )
+                    sys.setrecursionlimit(1000)
 
                 # At this point, decisions are restricted to the specified parallelization or the sequential version.
                 # Restrict them to the exact case specified in restrict_to_decisions
diff --git a/discopop_library/discopop_optimizer/utilities/optimization/LocalOptimization/TopDown.py b/discopop_library/discopop_optimizer/utilities/optimization/LocalOptimization/TopDown.py
index 73a61623c..b1b337c73 100644
--- a/discopop_library/discopop_optimizer/utilities/optimization/LocalOptimization/TopDown.py
+++ b/discopop_library/discopop_optimizer/utilities/optimization/LocalOptimization/TopDown.py
@@ -6,6 +6,7 @@
 # the 3-Clause BSD License.  See the LICENSE file in the package base
 # directory for details.
 import copy
+import sys
 from typing import Dict, List, Tuple, Set, cast
 
 import networkx as nx  # type: ignore
@@ -51,6 +52,7 @@ def get_locally_optimized_models(
             for decision in decision_options:
                 try:
                     # create a performance model for the specific decision
+                    sys.setrecursionlimit(100000)
                     performance_models = get_node_performance_models(
                         experiment,
                         graph,
@@ -63,6 +65,7 @@ def get_locally_optimized_models(
                             cast(FunctionRoot, data_at(graph, function_node)).node_id
                         ],  # ignore first node to prevent duplication of function costs
                     )
+                    sys.setrecursionlimit(1000)
                     # calculate and append necessary data transfers to the models
                     performance_models_with_transfers = calculate_data_transfers(
                         graph, {cast(FunctionRoot, data_at(graph, function_node)): performance_models}, experiment
@@ -132,6 +135,7 @@ def get_locally_optimized_models(
             continue
 
         # construct locally optimal model
+        sys.setrecursionlimit(100000)
         performance_models = get_node_performance_models(
             experiment,
             graph,
@@ -143,6 +147,7 @@ def get_locally_optimized_models(
                 cast(FunctionRoot, data_at(graph, function_node)).node_id
             ],  # ignore first node to prevent duplication of function costs
         )
+        sys.setrecursionlimit(1000)
         # calculate and append necessary data transfers to the models
         performance_models_with_transfers = calculate_data_transfers(
             graph, {cast(FunctionRoot, data_at(graph, function_node)): performance_models}, experiment

From 8fab4a73837a34208a526b4571937ebcc7032293 Mon Sep 17 00:00:00 2001
From: Lukas Rothenberger <lukas.rothenberger@tu-darmstadt.de>
Date: Wed, 17 Jan 2024 15:16:01 +0100
Subject: [PATCH 03/26] type fixes and recursion depth fixes

---
 discopop_explorer/utils.py                    |  20 +-
 .../CostModels/utilities.py                   | 377 +++++++++---------
 .../discopop_optimizer/PETParser/PETParser.py |   8 +-
 .../discopop_optimizer/optimization/greedy.py |  19 +-
 .../utilities/MOGUtilities.py                 |  18 +-
 .../optimization/LocalOptimization/TopDown.py |   2 -
 6 files changed, 234 insertions(+), 210 deletions(-)

diff --git a/discopop_explorer/utils.py b/discopop_explorer/utils.py
index 356af5735..303070f94 100644
--- a/discopop_explorer/utils.py
+++ b/discopop_explorer/utils.py
@@ -954,15 +954,15 @@ def filter_for_hotspots(
                     if node.type == NodeType.FUNC and hotspot[2] == HotspotNodeType.FUNCTION:
                         result_set.add(node)
 
-#    # check for matches from hotspot functions
-#    for node in nodes:
-#        for hotspot in all_hotspot_descriptions:
-#            if hotspot[2] == HotspotNodeType.FUNCTION:
-#                if hotspot[0] == node.file_id:
-#                    try:
-#                        if pet.get_parent_function(node).name == hotspot[3]:
-#                            print("HOTSPOT FUNCTION MATCH FROM NODE: ", node.id)
-#                    except AssertionError:
-#                        continue
+    #    # check for matches from hotspot functions
+    #    for node in nodes:
+    #        for hotspot in all_hotspot_descriptions:
+    #            if hotspot[2] == HotspotNodeType.FUNCTION:
+    #                if hotspot[0] == node.file_id:
+    #                    try:
+    #                        if pet.get_parent_function(node).name == hotspot[3]:
+    #                            print("HOTSPOT FUNCTION MATCH FROM NODE: ", node.id)
+    #                    except AssertionError:
+    #                        continue
 
     return list(result_set)
diff --git a/discopop_library/discopop_optimizer/CostModels/utilities.py b/discopop_library/discopop_optimizer/CostModels/utilities.py
index 588e52976..8dbb126f1 100644
--- a/discopop_library/discopop_optimizer/CostModels/utilities.py
+++ b/discopop_library/discopop_optimizer/CostModels/utilities.py
@@ -61,8 +61,6 @@ def get_performance_models_for_functions(
 
                 # start the collection at the first child of the function
                 for child_id in get_children(graph, node_id):
-                    import sys
-                    sys.setrecursionlimit(100000)
                     performance_models[node_data] = get_node_performance_models(
                         experiment,
                         graph,
@@ -72,7 +70,6 @@ def get_performance_models_for_functions(
                         restrict_to_decisions=restrict_to_decisions,
                         allow_sequential=True,
                     )
-                    sys.setrecursionlimit(1000)
 
                 # At this point, decisions are restricted to the specified parallelization or the sequential version.
                 # Restrict them to the exact case specified in restrict_to_decisions
@@ -120,53 +117,62 @@ def get_node_performance_models(
     If a set of decision is specified for restrict_to_decisions, only those non-sequential decisions will be allowed.
     Caution: List might be empty!
     """
-    result_list: List[CostModel] = []
-    successors = get_successors(graph, node_id)
-    successor_count = len(successors)
-    node_data = data_at(graph, node_id)
-    if node_data.execute_in_parallel:
-        current_device_id = node_data.device_id
-    visited_nodes.add(node_id)
-
-    # consider performance models of children
-    children_models = get_performance_models_for_children(
-        experiment,
-        graph,
-        node_id,
-        copy.deepcopy(visited_nodes),
-        all_function_nodes,
-        restrict_to_decisions=restrict_to_decisions,
-        do_not_allow_decisions=do_not_allow_decisions,
-        get_single_random_model=get_single_random_model,
-    )
+    try:
+        result_list: List[CostModel] = []
+        successors = get_successors(graph, node_id)
+        successor_count = len(successors)
+        node_data = data_at(graph, node_id)
+        if node_data.execute_in_parallel:
+            current_device_id = node_data.device_id
+        visited_nodes.add(node_id)
+
+        # consider performance models of children
+        children_models = get_performance_models_for_children(
+            experiment,
+            graph,
+            node_id,
+            copy.deepcopy(visited_nodes),
+            all_function_nodes,
+            restrict_to_decisions=restrict_to_decisions,
+            do_not_allow_decisions=do_not_allow_decisions,
+            get_single_random_model=get_single_random_model,
+        )
+
+        if len(children_models) == 0:
+            if ignore_node_costs is not None:
+                if node_data.node_id in ignore_node_costs:
+                    children_models = [CostModel(Integer(0), Integer(0))]
+            else:
+                children_models = [
+                    node_data.get_cost_model(
+                        experiment,
+                        all_function_nodes,
+                        experiment.get_system().get_device(current_device_id),
+                    )
+                ]
 
-    if len(children_models) == 0:
-        if ignore_node_costs is not None:
-            if node_data.node_id in ignore_node_costs:
-                children_models = [CostModel(Integer(0), Integer(0))]
         else:
-            children_models = [
-                node_data.get_cost_model(
+            if ignore_node_costs is not None:
+                if node_data.node_id in ignore_node_costs:
+                    tmp_node_cost_model = CostModel(Integer(0), Integer(0))
+            else:
+                tmp_node_cost_model = node_data.get_cost_model(
                     experiment,
                     all_function_nodes,
                     experiment.get_system().get_device(current_device_id),
                 )
-            ]
-
-    else:
-        if ignore_node_costs is not None:
-            if node_data.node_id in ignore_node_costs:
-                tmp_node_cost_model = CostModel(Integer(0), Integer(0))
-        else:
-            tmp_node_cost_model = node_data.get_cost_model(
-                experiment,
-                all_function_nodes,
-                experiment.get_system().get_device(current_device_id),
-            )
 
-        for idx, child_model in enumerate(children_models):
-            if ignore_node_costs is not None:
-                if node_data.node_id not in ignore_node_costs:
+            for idx, child_model in enumerate(children_models):
+                if ignore_node_costs is not None:
+                    if node_data.node_id not in ignore_node_costs:
+                        children_models[idx] = tmp_node_cost_model.register_child(
+                            child_model,
+                            node_data,
+                            experiment,
+                            all_function_nodes,
+                            experiment.get_system().get_device(current_device_id),
+                        )
+                else:
                     children_models[idx] = tmp_node_cost_model.register_child(
                         child_model,
                         node_data,
@@ -174,157 +180,162 @@ def get_node_performance_models(
                         all_function_nodes,
                         experiment.get_system().get_device(current_device_id),
                     )
-            else:
-                children_models[idx] = tmp_node_cost_model.register_child(
-                    child_model,
-                    node_data,
-                    experiment,
-                    all_function_nodes,
-                    experiment.get_system().get_device(current_device_id),
-                )
 
-    # construct the performance models
-    if successor_count >= 1:
-        removed_successors = False
-        if get_single_random_model and successor_count > 1:
-            # pick only a single successor
-            successors = [random.choice(successors)]
-            removed_successors = True
-
-        for children_model in children_models:
-            for successor in successors:
-                # ## CHECK REQUIREMENTS ##
-                # check if successor validates a requirements edge to restrain the created combinations
-                # 1.1. check if optionEdge between any node in visited_nodes and successor exists
-                # 1.2. if so, check if option edge to other node in visited nodes exists
-                # 1.3. if so, check if a requirements edge between both option exists.
-                # 1.4. if not, the path is not valid since two options for the same
-                #      source code location would be selected
-                path_invalid = False
-                # 1.1
-                #               if successor in [109, 110, 111, 112, 113, 114]:
-                #                      print("VISITED NODES: ", visited_nodes)
-                for visited_node_id in visited_nodes:
-                    options = get_out_mutex_edges(graph, visited_node_id)
-                    if successor in options:
-                        # 1.2
-                        visited_options = [opt for opt in options if opt in visited_nodes]
-                        if len(visited_options) > 0:
-                            # 1.3
-                            for vo in visited_options:
-                                # 1.4
-                                if successor not in get_requirements(graph, vo):
-                                    path_invalid = True
-                                    break
+        # construct the performance models
+        if successor_count >= 1:
+            removed_successors = False
+            if get_single_random_model and successor_count > 1:
+                # pick only a single successor
+                successors = [random.choice(successors)]
+                removed_successors = True
+
+            for children_model in children_models:
+                for successor in successors:
+                    # ## CHECK REQUIREMENTS ##
+                    # check if successor validates a requirements edge to restrain the created combinations
+                    # 1.1. check if optionEdge between any node in visited_nodes and successor exists
+                    # 1.2. if so, check if option edge to other node in visited nodes exists
+                    # 1.3. if so, check if a requirements edge between both option exists.
+                    # 1.4. if not, the path is not valid since two options for the same
+                    #      source code location would be selected
+                    path_invalid = False
+                    # 1.1
+                    #               if successor in [109, 110, 111, 112, 113, 114]:
+                    #                      print("VISITED NODES: ", visited_nodes)
+                    for visited_node_id in visited_nodes:
+                        options = get_out_mutex_edges(graph, visited_node_id)
+                        if successor in options:
+                            # 1.2
+                            visited_options = [opt for opt in options if opt in visited_nodes]
+                            if len(visited_options) > 0:
+                                # 1.3
+                                for vo in visited_options:
+                                    # 1.4
+                                    if successor not in get_requirements(graph, vo):
+                                        path_invalid = True
+                                        break
+                        if path_invalid:
+                            break
                     if path_invalid:
-                        break
-                if path_invalid:
-                    continue
+                        continue
+
+                    # 2 check if a sibling of successor exists which has a requirements edge to a visited node
+                    # 2.1 check if an incoming or outgoing option edge exists, get the node id for the sequential version
+                    # 2.2 for all parallelization options
+                    # 2.3 check if a requirements edge to a visited node exists
+                    # 2.4 if so, stop if successor is NOT the parallelization option with the requirements edge
+                    # 2.1
+                    for sibling in successors:
+                        sequential_version_ids = []
+                        if len(get_out_mutex_edges(graph, sibling)) > 0:
+                            sequential_version_ids = [sibling]
+                        else:
+                            for seq in get_in_mutex_edges(graph, sibling):
+                                sequential_version_ids.append(seq)
+                        # 2.2
+                        for seq in sequential_version_ids:
+                            for option in get_out_mutex_edges(graph, seq):
+                                if option == successor:
+                                    continue
+                                # 2.3
+                                for visited_req in [
+                                    req for req in get_requirements(graph, option) if req in visited_nodes
+                                ]:
+                                    # 2.4
+                                    if visited_req != successor:
+                                        path_invalid = True
+                                        break
+                                if path_invalid:
+                                    break
+                        if path_invalid:
+                            break
 
-                # 2 check if a sibling of successor exists which has a requirements edge to a visited node
-                # 2.1 check if an incoming or outgoing option edge exists, get the node id for the sequential version
-                # 2.2 for all parallelization options
-                # 2.3 check if a requirements edge to a visited node exists
-                # 2.4 if so, stop if successor is NOT the parallelization option with the requirements edge
-                # 2.1
-                for sibling in successors:
-                    sequential_version_ids = []
-                    if len(get_out_mutex_edges(graph, sibling)) > 0:
-                        sequential_version_ids = [sibling]
-                    else:
-                        for seq in get_in_mutex_edges(graph, sibling):
-                            sequential_version_ids.append(seq)
-                    # 2.2
-                    for seq in sequential_version_ids:
-                        for option in get_out_mutex_edges(graph, seq):
-                            if option == successor:
-                                continue
-                            # 2.3
-                            for visited_req in [req for req in get_requirements(graph, option) if req in visited_nodes]:
-                                # 2.4
-                                if visited_req != successor:
+                    # do not allow nested parallelization suggestions on devices of type GPU
+                    if True:  # option to disable this check
+                        combined_visited_nodes = copy.deepcopy(visited_nodes)
+                        combined_visited_nodes.add(successor)
+                        gpu_suggestions = [
+                            node_id
+                            for node_id in combined_visited_nodes
+                            if isinstance(
+                                experiment.get_system().get_device(data_at(graph, node_id).device_id),
+                                GPU,
+                            )
+                        ]
+                        # check if two suggestions are in a contained-in relation
+                        for suggestion_1 in gpu_suggestions:
+                            all_parents = get_all_parents(graph, suggestion_1)
+                            for suggestion_2 in gpu_suggestions:
+                                if suggestion_1 == suggestion_2:
+                                    continue
+                                if suggestion_2 in all_parents:
                                     path_invalid = True
                                     break
                             if path_invalid:
                                 break
-                    if path_invalid:
-                        break
-
-                # do not allow nested parallelization suggestions on devices of type GPU
-                if True:  # option to disable this check
-                    combined_visited_nodes = copy.deepcopy(visited_nodes)
-                    combined_visited_nodes.add(successor)
-                    gpu_suggestions = [
-                        node_id
-                        for node_id in combined_visited_nodes
-                        if isinstance(
-                            experiment.get_system().get_device(data_at(graph, node_id).device_id),
-                            GPU,
-                        )
-                    ]
-                    # check if two suggestions are in a contained-in relation
-                    for suggestion_1 in gpu_suggestions:
-                        all_parents = get_all_parents(graph, suggestion_1)
-                        for suggestion_2 in gpu_suggestions:
-                            if suggestion_1 == suggestion_2:
-                                continue
-                            if suggestion_2 in all_parents:
-                                path_invalid = True
-                                break
-                        if path_invalid:
-                            break
 
-                # check if the current decision invalidates decision requirements, if some are specified
-                if restrict_to_decisions is not None:
-                    if not (successor in restrict_to_decisions or data_at(graph, successor).suggestion is None):
-                        path_invalid = True
-                    if not path_invalid:
-                        if data_at(graph, successor).suggestion is None:
-                            # if the sequential "fallback" has been used, check if a different option is specifically
-                            # mentioned in restrict_to_decisions. If so, the sequential fallback shall be ignored.
-                            options = get_out_mutex_edges(graph, successor)
-                            restricted_options = [opt for opt in options if opt in restrict_to_decisions]
-                            if len(restricted_options) != 0:
-                                # do not use he sequential fallback since a required option exists
-                                path_invalid = True
-
-                if do_not_allow_decisions is not None:
-                    if successor in do_not_allow_decisions:
-                        path_invalid = True
-
-                if path_invalid:
-                    continue
+                    # check if the current decision invalidates decision requirements, if some are specified
+                    if restrict_to_decisions is not None:
+                        if not (successor in restrict_to_decisions or data_at(graph, successor).suggestion is None):
+                            path_invalid = True
+                        if not path_invalid:
+                            if data_at(graph, successor).suggestion is None:
+                                # if the sequential "fallback" has been used, check if a different option is specifically
+                                # mentioned in restrict_to_decisions. If so, the sequential fallback shall be ignored.
+                                options = get_out_mutex_edges(graph, successor)
+                                restricted_options = [opt for opt in options if opt in restrict_to_decisions]
+                                if len(restricted_options) != 0:
+                                    # do not use he sequential fallback since a required option exists
+                                    path_invalid = True
 
-                # ## END OF REQUIREMENTS CHECK ##
+                    if do_not_allow_decisions is not None:
+                        if successor in do_not_allow_decisions:
+                            path_invalid = True
 
-                combined_model = children_model
-                # add transfer costs
-                transfer_costs_model = get_edge_data(graph, node_id, successor).get_cost_model()
-                combined_model = combined_model.parallelizable_plus_combine(transfer_costs_model)
+                    if path_invalid:
+                        continue
 
-                # if the successor is "determined" by a path decision, add path decision to the combined model
-                if len(successors) > 1 or removed_successors:
-                    combined_model.path_decisions.append(successor)
-                # append the model of the successor
-                for model in get_node_performance_models(
-                    experiment,
-                    graph,
-                    successor,
-                    copy.deepcopy(visited_nodes),
-                    all_function_nodes,
-                    restrict_to_decisions=restrict_to_decisions,
-                    do_not_allow_decisions=do_not_allow_decisions,
-                    get_single_random_model=get_single_random_model,
-                    ignore_node_costs=ignore_node_costs,
-                ):
-                    tmp = combined_model.parallelizable_plus_combine(model)
-                    tmp.path_decisions += [d for d in model.path_decisions if d not in tmp.path_decisions]
-                    result_list.append(tmp)
-        if len(result_list) >= 1:
-            return result_list
-
-    # successor count == 0 or successor count > 1
-    return children_models
+                    # ## END OF REQUIREMENTS CHECK ##
+
+                    combined_model = children_model
+                    # add transfer costs
+                    transfer_costs_model = get_edge_data(graph, node_id, successor).get_cost_model()
+                    combined_model = combined_model.parallelizable_plus_combine(transfer_costs_model)
+
+                    # if the successor is "determined" by a path decision, add path decision to the combined model
+                    if len(successors) > 1 or removed_successors:
+                        combined_model.path_decisions.append(successor)
+                    # append the model of the successor
+                    for model in get_node_performance_models(
+                        experiment,
+                        graph,
+                        successor,
+                        copy.deepcopy(visited_nodes),
+                        all_function_nodes,
+                        restrict_to_decisions=restrict_to_decisions,
+                        do_not_allow_decisions=do_not_allow_decisions,
+                        get_single_random_model=get_single_random_model,
+                        ignore_node_costs=ignore_node_costs,
+                    ):
+                        tmp = combined_model.parallelizable_plus_combine(model)
+                        tmp.path_decisions += [d for d in model.path_decisions if d not in tmp.path_decisions]
+                        result_list.append(tmp)
+            if len(result_list) >= 1:
+                return result_list
+
+        # successor count == 0 or successor count > 1
+        return children_models
+
+    except RecursionError:
+        warnings.warn("Allowed recursion depth exceeded at node " + str(node_id) + " . Results may be inaccurate.")
+        node_data = data_at(graph, node_id)
+        return [
+            node_data.get_cost_model(
+                experiment,
+                all_function_nodes,
+                experiment.get_system().get_device(current_device_id),
+            )
+        ]
 
 
 def get_performance_models_for_children(
diff --git a/discopop_library/discopop_optimizer/PETParser/PETParser.py b/discopop_library/discopop_optimizer/PETParser/PETParser.py
index d5e22caf0..580b1c4c7 100644
--- a/discopop_library/discopop_optimizer/PETParser/PETParser.py
+++ b/discopop_library/discopop_optimizer/PETParser/PETParser.py
@@ -1078,7 +1078,13 @@ def inlined_data_flow_calculation(current_node, current_last_writes):
         all_function_nodes = get_all_function_nodes(self.graph)
         for idx, function_node in enumerate(all_function_nodes):
             if self.experiment.arguments.verbose:
-                print("Calculating dataflow for function: ", data_at(self.graph, function_node).name, idx,"/",len(all_function_nodes))
+                print(
+                    "Calculating dataflow for function: ",
+                    data_at(self.graph, function_node).name,
+                    idx,
+                    "/",
+                    len(all_function_nodes),
+                )
             if (
                 function_node not in self.experiment.hotspot_function_node_ids
                 and len(self.experiment.hotspot_function_node_ids) > 0
diff --git a/discopop_library/discopop_optimizer/optimization/greedy.py b/discopop_library/discopop_optimizer/optimization/greedy.py
index b63ba93e1..0ed1bd270 100644
--- a/discopop_library/discopop_optimizer/optimization/greedy.py
+++ b/discopop_library/discopop_optimizer/optimization/greedy.py
@@ -80,7 +80,7 @@ def greedy_search(
             local_results: List[Tuple[Dict[int, List[List[int]]], int, ContextObject]] = []
 
             # prepare arguments for parallel cost calculation
-            param_list = []
+            param_list: List[Dict[int, List[List[int]]]] = []
             for decision in decision_set:
                 # copy made decisions
                 local_decision_set: Dict[int, List[List[int]]] = dict()
@@ -92,11 +92,16 @@ def greedy_search(
                 local_decision_set[function_node.node_id][dcsi] = [decision]
                 param_list.append(local_decision_set)
 
-            # calculate costs in parallel
-            with Pool(initializer=__initialize_cost_caluclation_worker, initargs=(experiment, arguments)) as pool:
-                tmp_result = list(
-                    tqdm.tqdm(pool.imap_unordered(__get_score, param_list), total=len(param_list), disable=True)
-                )
+            #            # calculate costs in parallel
+            #            with Pool(initializer=__initialize_cost_caluclation_worker, initargs=(experiment, arguments)) as pool:
+            #                tmp_result = list(
+            #                    tqdm.tqdm(pool.imap_unordered(__get_score, param_list), total=len(param_list), disable=True)
+            #                )
+            # calculate costs
+            tmp_result: List[Tuple[Dict[int, List[List[int]]], int, ContextObject]] = []
+            for param in param_list:
+                tmp_result.append(__get_score(param))
+
             for local_result in tmp_result:
                 # remove invalid elements
                 if local_result[1] == -1:
@@ -146,7 +151,7 @@ def __initialize_cost_caluclation_worker(
     global_arguments = arguments
 
 
-def __get_score(param_tuple) -> Tuple[List[int], int, ContextObject]:
+def __get_score(param_tuple) -> Tuple[Dict[int, List[List[int]]], int, ContextObject]:
     global global_experiment
     global global_arguments
     configuration = param_tuple
diff --git a/discopop_library/discopop_optimizer/utilities/MOGUtilities.py b/discopop_library/discopop_optimizer/utilities/MOGUtilities.py
index 2434812e1..ae4e34950 100644
--- a/discopop_library/discopop_optimizer/utilities/MOGUtilities.py
+++ b/discopop_library/discopop_optimizer/utilities/MOGUtilities.py
@@ -9,6 +9,7 @@
 import itertools
 from multiprocessing import Pool
 from typing import Any, ClassVar, Dict, List, Optional, cast, Set, Tuple
+import warnings
 
 import matplotlib  # type: ignore
 import matplotlib.pyplot as plt  # type:ignore
@@ -416,13 +417,16 @@ def get_read_and_written_data_from_subgraph(
     read_memory_regions: Set[MemoryRegion] = set()
     written_memory_regions: Set[MemoryRegion] = set()
     # collect reads and writes from successors and children
-    subgraph = get_children(graph, node_id)
-    if not ignore_successors:
-        subgraph += get_successors(graph, node_id)
-    for successor in subgraph:
-        reads, writes = get_read_and_written_data_from_subgraph(graph, successor)
-        read_memory_regions.update(reads)
-        written_memory_regions.update(writes)
+    try:
+        subgraph = get_children(graph, node_id)
+        if not ignore_successors:
+            subgraph += get_successors(graph, node_id)
+        for successor in subgraph:
+            reads, writes = get_read_and_written_data_from_subgraph(graph, successor)
+            read_memory_regions.update(reads)
+            written_memory_regions.update(writes)
+    except RecursionError:
+        warnings.warn("Recursion limit exceeeded. Read and write in subtrees might be inaccurate.")
     # add reads and writes of the node itself
     node_data = data_at(graph, node_id)
     read_memory_regions.update([read_access.memory_region for read_access in node_data.read_memory_regions])
diff --git a/discopop_library/discopop_optimizer/utilities/optimization/LocalOptimization/TopDown.py b/discopop_library/discopop_optimizer/utilities/optimization/LocalOptimization/TopDown.py
index b1b337c73..1599dfb8d 100644
--- a/discopop_library/discopop_optimizer/utilities/optimization/LocalOptimization/TopDown.py
+++ b/discopop_library/discopop_optimizer/utilities/optimization/LocalOptimization/TopDown.py
@@ -52,7 +52,6 @@ def get_locally_optimized_models(
             for decision in decision_options:
                 try:
                     # create a performance model for the specific decision
-                    sys.setrecursionlimit(100000)
                     performance_models = get_node_performance_models(
                         experiment,
                         graph,
@@ -65,7 +64,6 @@ def get_locally_optimized_models(
                             cast(FunctionRoot, data_at(graph, function_node)).node_id
                         ],  # ignore first node to prevent duplication of function costs
                     )
-                    sys.setrecursionlimit(1000)
                     # calculate and append necessary data transfers to the models
                     performance_models_with_transfers = calculate_data_transfers(
                         graph, {cast(FunctionRoot, data_at(graph, function_node)): performance_models}, experiment

From 54a3328e41f3dff17f2a7f544baecb5a13786410 Mon Sep 17 00:00:00 2001
From: Lukas Rothenberger <lukas.rothenberger@tu-darmstadt.de>
Date: Wed, 17 Jan 2024 16:15:54 +0100
Subject: [PATCH 04/26] feat(optimizer): added argument to enable optimization
 step

---
 .../discopop_optimizer/OptimizerArguments.py  |  1 +
 .../discopop_optimizer/__main__.py            |  3 +
 .../discopop_optimizer/optimizer.py           | 67 ++++++++++---------
 3 files changed, 38 insertions(+), 33 deletions(-)

diff --git a/discopop_library/discopop_optimizer/OptimizerArguments.py b/discopop_library/discopop_optimizer/OptimizerArguments.py
index 255fbf1f3..eea8cab06 100644
--- a/discopop_library/discopop_optimizer/OptimizerArguments.py
+++ b/discopop_library/discopop_optimizer/OptimizerArguments.py
@@ -26,6 +26,7 @@ class OptimizerArguments(object):
     check_called_function_for_nested_parallelism: bool
     profiling: bool
     greedy: bool
+    optimization: bool
 
     def __post_init__(self):
         # fix correct optimization method
diff --git a/discopop_library/discopop_optimizer/__main__.py b/discopop_library/discopop_optimizer/__main__.py
index 6af35bccd..804656905 100644
--- a/discopop_library/discopop_optimizer/__main__.py
+++ b/discopop_library/discopop_optimizer/__main__.py
@@ -29,6 +29,8 @@ def parse_args() -> OptimizerArguments:
         help="Enable evolutionary search. By default, a greedy search is performed.")
     parser.add_argument("-g", "--greedy", action="store_true",
         help="Enable greedy search. (Default)")
+    parser.add_argument("-o", "--optimize", action="store_true",
+        help="Enable optimization.")
     parser.add_argument(
         "--doall-microbench-file", type=str, default="None",
         help="Do-All microbenchmark results"
@@ -71,6 +73,7 @@ def parse_args() -> OptimizerArguments:
         check_called_function_for_nested_parallelism=arguments.check_called_function_for_nested_parallelism,
         profiling=arguments.profiling,
         greedy=arguments.greedy,
+        optimization=arguments.optimize
     )
 
 
diff --git a/discopop_library/discopop_optimizer/optimizer.py b/discopop_library/discopop_optimizer/optimizer.py
index 080dc6028..b185f785f 100644
--- a/discopop_library/discopop_optimizer/optimizer.py
+++ b/discopop_library/discopop_optimizer/optimizer.py
@@ -199,40 +199,41 @@ def run(arguments: OptimizerArguments):
         if node_id != node_data.node_id:
             node_data.node_id = node_id
 
-    # get values for free symbols
-    initialize_free_symbol_ranges_and_distributions(experiment, arguments, system)
-
-    if arguments.verbose:
-        print("# SUBSTITUTIONS:")
-        for key in experiment.substitutions:
-            print("#", key, " ->", experiment.substitutions[key])
-        print()
-
-    # calculate options for easy access
-    available_decisions = get_available_decisions_for_functions(experiment.optimization_graph, arguments)
-
-    # calculate costs for all combinations of decisions
-    if arguments.exhaustive:
-        best_configuration = evaluate_all_decision_combinations(
-            experiment, available_decisions, arguments, optimizer_dir
-        )
-    elif arguments.greedy:
-        best_configuration = greedy_search(experiment, available_decisions, arguments, optimizer_dir)
-    elif arguments.evolutionary != None:
-        # perform evolutionary search
-        best_configuration = perform_evolutionary_search(
-            experiment,
-            available_decisions,
-            arguments,
-            optimizer_dir,
-        )
-    else:
-        raise ValueError("No optimization method specified!")
+    if arguments.optimization:
+        # get values for free symbols
+        initialize_free_symbol_ranges_and_distributions(experiment, arguments, system)
+
+        if arguments.verbose:
+            print("# SUBSTITUTIONS:")
+            for key in experiment.substitutions:
+                print("#", key, " ->", experiment.substitutions[key])
+            print()
+
+        # calculate options for easy access
+        available_decisions = get_available_decisions_for_functions(experiment.optimization_graph, arguments)
+
+        # calculate costs for all combinations of decisions
+        if arguments.exhaustive:
+            best_configuration = evaluate_all_decision_combinations(
+                experiment, available_decisions, arguments, optimizer_dir
+            )
+        elif arguments.greedy:
+            best_configuration = greedy_search(experiment, available_decisions, arguments, optimizer_dir)
+        elif arguments.evolutionary != None:
+            # perform evolutionary search
+            best_configuration = perform_evolutionary_search(
+                experiment,
+                available_decisions,
+                arguments,
+                optimizer_dir,
+            )
+        else:
+            raise ValueError("No optimization method specified!")
 
-    if best_configuration is not None:
-        best_configuration = optimize_updates(experiment, best_configuration, arguments)
-        # append the configuration to the list of patterns
-        experiment.detection_result.patterns.optimizer_output.append(best_configuration)
+        if best_configuration is not None:
+            best_configuration = optimize_updates(experiment, best_configuration, arguments)
+            # append the configuration to the list of patterns
+            experiment.detection_result.patterns.optimizer_output.append(best_configuration)
 
     if arguments.profiling:
         experiment.profile.disable()  # type: ignore

From 9bac12796e1f2e270db3e378d4eb4a54415d7c94 Mon Sep 17 00:00:00 2001
From: Lukas Rothenberger <lukas.rothenberger@tu-darmstadt.de>
Date: Wed, 17 Jan 2024 17:43:56 +0100
Subject: [PATCH 05/26] feat(optimizer): mark intermediate suggestions as
 non-applicable

---
 discopop_explorer/pattern_detectors/PatternBase.py        | 2 ++
 discopop_library/PatchGenerator/from_json_patterns.py     | 3 +++
 .../suggestions/optimizers/loop_collapse.py               | 8 +++++++-
 3 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/discopop_explorer/pattern_detectors/PatternBase.py b/discopop_explorer/pattern_detectors/PatternBase.py
index ae2f0ba68..8861843b7 100644
--- a/discopop_explorer/pattern_detectors/PatternBase.py
+++ b/discopop_explorer/pattern_detectors/PatternBase.py
@@ -21,6 +21,7 @@ class PatternBase(object):
     node_id: NodeID
     start_line: LineID
     end_line: LineID
+    applicable_pattern: bool
 
     def __init__(self, node: Node):
         # create a file lock to synchronize processes
@@ -41,6 +42,7 @@ def __init__(self, node: Node):
         self.node_id = node.id
         self.start_line = node.start_position()
         self.end_line = node.end_position()
+        self.applicable_pattern = True
 
     def to_json(self):
         dic = self.__dict__
diff --git a/discopop_library/PatchGenerator/from_json_patterns.py b/discopop_library/PatchGenerator/from_json_patterns.py
index fe51f3814..bc46d0f15 100644
--- a/discopop_library/PatchGenerator/from_json_patterns.py
+++ b/discopop_library/PatchGenerator/from_json_patterns.py
@@ -34,6 +34,9 @@ def from_json_patterns(
             if suggestion_type == "optimizer_output":
                 from_optimizer_output(file_mapping, patterns_by_type, suggestion, arguments, patch_generator_dir)
                 continue
+            suggestion_dict = json.loads(suggestion)
+            if not suggestion_dict["applicable_pattern"]:
+                continue
             if arguments.verbose:
                 print("Suggestion: ", suggestion)
             file_id_to_modified_code: Dict[int, str] = from_json_strings(
diff --git a/discopop_library/discopop_optimizer/suggestions/optimizers/loop_collapse.py b/discopop_library/discopop_optimizer/suggestions/optimizers/loop_collapse.py
index 37027d54b..4b238cd40 100644
--- a/discopop_library/discopop_optimizer/suggestions/optimizers/loop_collapse.py
+++ b/discopop_library/discopop_optimizer/suggestions/optimizers/loop_collapse.py
@@ -13,7 +13,8 @@
 
 import networkx as nx  # type: ignore
 
-import tqdm  # type: ignore
+import tqdm
+from build.lib.discopop_library.result_classes.OptimizerOutputPattern import OptimizerOutputPattern  # type: ignore
 from discopop_explorer.pattern_detectors.do_all_detector import DoAllInfo  # type: ignore
 from discopop_library.PatternIdManagement.unique_pattern_id import get_unique_pattern_id
 from discopop_library.discopop_optimizer.Variables.Experiment import Experiment
@@ -167,6 +168,7 @@ def __collapse_loops_in_function(function_node_id):
                 pattern_info.device_type = (
                     global_experiment.get_system().get_device(node_data_copy.device_id).get_device_type()
                 )
+                pattern_info.applicable_pattern = False  # patterns are only applicable via the optimizer output pattern interfaces due to potential data movement
                 global_experiment.suggestion_to_node_ids_dict[pattern_id] = [new_node_id]
                 global_experiment.node_id_to_suggestion_dict[new_node_id] = pattern_id
 
@@ -240,6 +242,10 @@ def __collapse_loops_in_function(function_node_id):
                 # register pattern for output
                 # todo: find a nicer solution to duplicating the patterns for each device mapping
                 global_experiment.detection_result.patterns.do_all.append(pattern_info)
+                # construct optimizer output pattern to represent the non-standalone pattern_info
+                optimizer_output_pattern = OptimizerOutputPattern(pattern_info._node, [new_node_id], global_experiment.get_system().get_host_device_id())
+                optimizer_output_pattern.add_pattern(pattern_info.pattern_id, pattern_info.device_id, pattern_info.device_type)
+                global_experiment.detection_result.patterns.optimizer_output.append(optimizer_output_pattern)
                 print("REGISTERED PATTERN INFO: ", pattern_id, " for Device: ", data_at(global_graph, csrc).device_id)
                 print(pattern_info)
                 print()

From deb2ccf3d46053b017cf7371f192f48f6b408fb0 Mon Sep 17 00:00:00 2001
From: Lukas Rothenberger <lukas.rothenberger@tu-darmstadt.de>
Date: Fri, 19 Jan 2024 10:34:15 +0100
Subject: [PATCH 06/26] feat(profiler): taken branch instrumentation

---
 DiscoPoP/DiscoPoP.cpp             | 66 +++++++++++++++++++++++++++++--
 DiscoPoP/DiscoPoP.hpp             |  3 ++
 rtlib/CMakeLists.txt              |  1 +
 rtlib/cu_taken_branch_counter.cpp | 54 +++++++++++++++++++++++++
 rtlib/cu_taken_branch_counter.hpp | 18 +++++++++
 5 files changed, 139 insertions(+), 3 deletions(-)
 create mode 100644 rtlib/cu_taken_branch_counter.cpp
 create mode 100644 rtlib/cu_taken_branch_counter.hpp

diff --git a/DiscoPoP/DiscoPoP.cpp b/DiscoPoP/DiscoPoP.cpp
index 3603548c3..074643803 100644
--- a/DiscoPoP/DiscoPoP.cpp
+++ b/DiscoPoP/DiscoPoP.cpp
@@ -105,6 +105,8 @@ void DiscoPoP::setupCallbacks() {
     DpLoopExit = ThisModule->getOrInsertFunction("__dp_loop_exit",
                                                  Void,
                                                  Int32, Int32);
+
+    DpTakenBranchCounterIncr = ThisModule->getOrInsertFunction("__dp_incr_taken_branch_counter", Void, CharPtr, Int32, Int32);
 }
 
 bool DiscoPoP::doInitialization(Module &M) {
@@ -258,7 +260,6 @@ bool DiscoPoP::doFinalization(Module &M) {
     }
 
     // DPInstrumentationOmission end
-
     return true;
 }
 
@@ -451,6 +452,62 @@ void DiscoPoP::populateGlobalVariablesSet(Region *TopRegion,
     }
 }
 
+
+void DiscoPoP::createTakenBranchInstrumentation(Region* TopRegion, map <string, vector<CU *>> &BBIDToCUIDsMap){
+    /* Create calls to count taken branches inbetween CUs during execution */
+
+
+    for (Region::block_iterator bb = TopRegion->block_begin();
+         bb != TopRegion->block_end(); ++bb) {
+        for (BasicBlock::iterator instruction = (*bb)->begin();
+            instruction != (*bb)->end(); ++instruction) {
+            if(isa<BranchInst>(instruction)){
+                BranchInst* branchInst = cast<BranchInst>(instruction);
+                branchInst->print(errs());
+                errs() << "\n";
+
+                // check for conditional branches, as unconditional ones can be ignored for counting
+                if(! branchInst->isUnconditional()){
+                    // branchInst is conditional
+                    errs() << "\tCONDITIONAL\n";
+
+                    // prepare IRBuilder to insert instrumentation
+                    IRBuilder<> IRB(branchInst);
+
+                    // get BBId and CU IDS of the source 
+                    string source_BBID = bb->getName().str();
+                    errs() << "\tsourceBB: " << source_BBID << "\n";
+                    errs() << "\tnumSuccessors: " << branchInst->getNumSuccessors() << "\n";
+                    for(auto source_cu : BBIDToCUIDsMap[source_BBID]){
+                        errs() << "\t\tsourceCU: " << source_cu->ID << "\n";
+                    
+                        // get BBIds of all targets
+                        for(int i = 0; i < branchInst->getNumSuccessors(); i++){
+                            string successor_BBID = branchInst->getSuccessor(i)->getName().str();
+                            errs() << "\t\tsuccessorBB: " << successor_BBID << "\n";
+                            // get CUs of all targets
+                            for(auto target_cu : BBIDToCUIDsMap[successor_BBID]){
+                                errs() << "\t\t\tsuccessorCU: " << target_cu->ID << "\n";
+                                // add instrumentation prior to the branch instruction
+                                vector<Value*> args;
+                                string source_and_target = source_cu->ID + ";" + target_cu->ID;
+                                args.push_back(getOrInsertVarName_dynamic(source_and_target, IRB));
+                                args.push_back(branchInst->getCondition());
+                                bool counter_active_on_cmp_value = (i == 0 ? 1 : 0);
+                                args.push_back(ConstantInt::get(Int32, counter_active_on_cmp_value));
+                                IRB.CreateCall(DpTakenBranchCounterIncr, args);
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+
+
+
 void DiscoPoP::createCUs(Region *TopRegion, set <string> &globalVariablesSet,
                          vector<CU *> &CUVector,
                          map <string, vector<CU *>> &BBIDToCUIDsMap,
@@ -1889,6 +1946,7 @@ void DiscoPoP::dp_reduction_insert_functions() {
     llvm::FunctionType* output_fn_type =
             llvm::FunctionType::get(llvm::Type::getVoidTy(*ctx_), false);
     FunctionCallee loop_counter_output_callee = module_->getOrInsertFunction("loop_counter_output", output_fn_type);
+    FunctionCallee cu_taken_branch_counter_output_callee = module_->getOrInsertFunction("__dp_taken_branch_counter_output", output_fn_type);
     llvm::Function* main_fn = module_->getFunction("main");
     if (main_fn) {
         for (auto it = llvm::inst_begin(main_fn); it != llvm::inst_end(main_fn);
@@ -1896,6 +1954,7 @@ void DiscoPoP::dp_reduction_insert_functions() {
             if (llvm::isa<llvm::ReturnInst>(&(*it))) {
                 llvm::IRBuilder<> ir_builder(&(*it));
                 ir_builder.CreateCall(loop_counter_output_callee);
+                ir_builder.CreateCall(cu_taken_branch_counter_output_callee);
                 break;
             }
         }
@@ -2087,7 +2146,7 @@ bool DiscoPoP::runOnModule(Module &M) {
 
 bool DiscoPoP::runOnFunction(Function &F) {
     if (DP_DEBUG) {
-        errs() << "pass DiscoPoP: run pass on function\n";
+        errs() << "pass DiscoPoP: run pass on function " << funcName.str() << "\n";
     }
 
     StringRef funcName = F.getName();
@@ -2173,8 +2232,9 @@ bool DiscoPoP::runOnFunction(Function &F) {
 
         createCUs(TopRegion, globalVariablesSet, CUVector, BBIDToCUIDsMap, root, LI);
 
-        fillCUVariables(TopRegion, globalVariablesSet, CUVector, BBIDToCUIDsMap);
+        createTakenBranchInstrumentation(TopRegion, BBIDToCUIDsMap);
 
+        fillCUVariables(TopRegion, globalVariablesSet, CUVector, BBIDToCUIDsMap);
 
         fillStartEndLineNumbers(root, LI);
 
diff --git a/DiscoPoP/DiscoPoP.hpp b/DiscoPoP/DiscoPoP.hpp
index f254ae964..d64f66bc3 100644
--- a/DiscoPoP/DiscoPoP.hpp
+++ b/DiscoPoP/DiscoPoP.hpp
@@ -301,6 +301,7 @@ namespace {
         FunctionCallee DpCallOrInvoke;
         FunctionCallee DpFuncEntry, DpFuncExit;
         FunctionCallee DpLoopEntry, DpLoopExit;
+        FunctionCallee DpTakenBranchCounterIncr;
 
         // Basic types
         Type *Void;
@@ -378,6 +379,8 @@ namespace {
                        map <string, vector<CU *>> &BBIDToCUIDsMap, Node *root,
                        LoopInfo &LI);
 
+        void createTakenBranchInstrumentation(Region* TopRegion, map <string, vector<CU *>> &BBIDToCUIDsMap);
+
         void fillCUVariables(Region *TopRegion, set <string> &globalVariablesSet,
                              vector<CU *> &CUVector,
                              map <string, vector<CU *>> &BBIDToCUIDsMap);
diff --git a/rtlib/CMakeLists.txt b/rtlib/CMakeLists.txt
index 5f1880b1f..7b03a51e7 100644
--- a/rtlib/CMakeLists.txt
+++ b/rtlib/CMakeLists.txt
@@ -15,6 +15,7 @@ set(DiscoPoP_SOURCES
         iFunctions.cpp
         signature.cpp
         loop_counter.cpp
+        cu_taken_branch_counter.cpp
         #../share/lib/DPUtils.cpp
         MemoryRegionTree.cpp
         )
diff --git a/rtlib/cu_taken_branch_counter.cpp b/rtlib/cu_taken_branch_counter.cpp
new file mode 100644
index 000000000..ab4044d75
--- /dev/null
+++ b/rtlib/cu_taken_branch_counter.cpp
@@ -0,0 +1,54 @@
+/*
+ * This file is part of the DiscoPoP software (http://www.discopop.tu-darmstadt.de)
+ *
+ * Copyright (c) 2020, Technische Universitaet Darmstadt, Germany
+ *
+ * This software may be modified and distributed under the terms of
+ * the 3-Clause BSD License. See the LICENSE file in the package base
+ * directory for details.
+ *
+ */
+
+#include "cu_taken_branch_counter.hpp"
+
+
+static std::unordered_map<char*, long> cuec;
+
+extern "C"
+{
+
+void __dp_incr_taken_branch_counter(char* source_and_target, int cmp_res, int active_on) {
+    if(cmp_res == active_on){
+        if(cuec.count(source_and_target) == 0){
+            cuec[source_and_target] = 1;
+        }
+        else{
+            cuec[source_and_target] = cuec[source_and_target] + 1;
+        }
+    }
+}
+
+void __dp_taken_branch_counter_output() {
+    std::cout << "Outputting instrumentation results (taken branches)... ";
+
+    std::ifstream ifile;
+    std::string line;
+    std::ofstream ofile;
+
+    // output information about the loops
+    std::string tmp(getenv("DOT_DISCOPOP_PROFILER"));
+    tmp += "/cu_taken_branch_counter_output.txt";
+    ofile.open(tmp.data());
+
+    for(auto pair : cuec){
+        ofile << pair.first << ";" << pair.second << "\n";
+    }
+
+    ofile.close();
+
+    std::cout << "done" << std::endl;
+
+
+
+}
+}
\ No newline at end of file
diff --git a/rtlib/cu_taken_branch_counter.hpp b/rtlib/cu_taken_branch_counter.hpp
new file mode 100644
index 000000000..677c88d9c
--- /dev/null
+++ b/rtlib/cu_taken_branch_counter.hpp
@@ -0,0 +1,18 @@
+/*
+ * This file is part of the DiscoPoP software (http://www.discopop.tu-darmstadt.de)
+ *
+ * Copyright (c) 2020, Technische Universitaet Darmstadt, Germany
+ *
+ * This software may be modified and distributed under the terms of
+ * the 3-Clause BSD License. See the LICENSE file in the package base
+ * directory for details.
+ *
+ */
+
+#pragma once
+
+#include <fstream>
+#include <iomanip>
+#include <iostream>
+#include <sstream>
+#include <unordered_map>
\ No newline at end of file

From 3c905d6f2fdc87f71a7ecb4eb88051f2fcd929d8 Mon Sep 17 00:00:00 2001
From: Lukas Rothenberger <lukas.rothenberger@tu-darmstadt.de>
Date: Fri, 19 Jan 2024 10:47:02 +0100
Subject: [PATCH 07/26] chore(profiler): cleanup

---
 DiscoPoP/DiscoPoP.cpp | 16 +---------------
 1 file changed, 1 insertion(+), 15 deletions(-)

diff --git a/DiscoPoP/DiscoPoP.cpp b/DiscoPoP/DiscoPoP.cpp
index 074643803..00c7e2593 100644
--- a/DiscoPoP/DiscoPoP.cpp
+++ b/DiscoPoP/DiscoPoP.cpp
@@ -455,39 +455,25 @@ void DiscoPoP::populateGlobalVariablesSet(Region *TopRegion,
 
 void DiscoPoP::createTakenBranchInstrumentation(Region* TopRegion, map <string, vector<CU *>> &BBIDToCUIDsMap){
     /* Create calls to count taken branches inbetween CUs during execution */
-
-
     for (Region::block_iterator bb = TopRegion->block_begin();
          bb != TopRegion->block_end(); ++bb) {
         for (BasicBlock::iterator instruction = (*bb)->begin();
             instruction != (*bb)->end(); ++instruction) {
             if(isa<BranchInst>(instruction)){
                 BranchInst* branchInst = cast<BranchInst>(instruction);
-                branchInst->print(errs());
-                errs() << "\n";
-
                 // check for conditional branches, as unconditional ones can be ignored for counting
                 if(! branchInst->isUnconditional()){
                     // branchInst is conditional
-                    errs() << "\tCONDITIONAL\n";
-
                     // prepare IRBuilder to insert instrumentation
                     IRBuilder<> IRB(branchInst);
-
                     // get BBId and CU IDS of the source 
                     string source_BBID = bb->getName().str();
-                    errs() << "\tsourceBB: " << source_BBID << "\n";
-                    errs() << "\tnumSuccessors: " << branchInst->getNumSuccessors() << "\n";
                     for(auto source_cu : BBIDToCUIDsMap[source_BBID]){
-                        errs() << "\t\tsourceCU: " << source_cu->ID << "\n";
-                    
                         // get BBIds of all targets
                         for(int i = 0; i < branchInst->getNumSuccessors(); i++){
                             string successor_BBID = branchInst->getSuccessor(i)->getName().str();
-                            errs() << "\t\tsuccessorBB: " << successor_BBID << "\n";
                             // get CUs of all targets
                             for(auto target_cu : BBIDToCUIDsMap[successor_BBID]){
-                                errs() << "\t\t\tsuccessorCU: " << target_cu->ID << "\n";
                                 // add instrumentation prior to the branch instruction
                                 vector<Value*> args;
                                 string source_and_target = source_cu->ID + ";" + target_cu->ID;
@@ -2146,7 +2132,7 @@ bool DiscoPoP::runOnModule(Module &M) {
 
 bool DiscoPoP::runOnFunction(Function &F) {
     if (DP_DEBUG) {
-        errs() << "pass DiscoPoP: run pass on function " << funcName.str() << "\n";
+        errs() << "pass DiscoPoP: run pass on function " << F.getName().str() << "\n";
     }
 
     StringRef funcName = F.getName();

From e174dc1aab8874e91fdede684b5ace3a14022498 Mon Sep 17 00:00:00 2001
From: Lukas Rothenberger <lukas.rothenberger@tu-darmstadt.de>
Date: Fri, 19 Jan 2024 11:10:38 +0100
Subject: [PATCH 08/26] feat(optimizer): preparations for graph pruning

---
 .../discopop_optimizer/PETParser/PETParser.py | 47 +++++++++++++++++++
 1 file changed, 47 insertions(+)

diff --git a/discopop_library/discopop_optimizer/PETParser/PETParser.py b/discopop_library/discopop_optimizer/PETParser/PETParser.py
index 580b1c4c7..011058477 100644
--- a/discopop_library/discopop_optimizer/PETParser/PETParser.py
+++ b/discopop_library/discopop_optimizer/PETParser/PETParser.py
@@ -107,6 +107,14 @@ def parse(self) -> Tuple[nx.DiGraph, int]:
 
         # self.__new_parse_branched_sections()
 
+        if self.experiment.arguments.verbose:
+            print("pruning graphs based on taken branches")
+        self.__prune_branches()
+        print("\tDone.")
+
+        import sys
+        sys.exit(0)
+        
         self.__flatten_function_graphs()
 
         # remove invalid functions
@@ -139,6 +147,45 @@ def get_new_node_id(self) -> int:
         self.next_free_node_id += 1
         return buffer
 
+    def __prune_branches(self):
+        """Prune branches based on the measured likelihood of execution"""
+        # load observed branching information
+        branch_counter_dict: Dict[str, Dict[str, int]] = dict()
+        with open("profiler/cu_taken_branch_counter_output.txt", "r") as f:
+            for line in f.readlines():
+                line = line.replace("\n", "")
+                split_line = line.split(";")
+                source_cu_id = split_line[0]
+                target_cu_id = split_line[1]
+                counter = int(split_line[2])
+                if source_cu_id not in branch_counter_dict:
+                    branch_counter_dict[source_cu_id] = dict()
+                branch_counter_dict[source_cu_id][target_cu_id] = counter
+        print("Branch counter dict: ")
+        print(branch_counter_dict)
+
+        # convert counters to likelihood
+        branch_likelihood_dict: Dict[str, Dict[str, float]] = dict()
+        for source_cu_id in branch_counter_dict:
+            total_counter = 0
+            for target_cu_id in branch_counter_dict[source_cu_id]:
+                total_counter += branch_counter_dict[source_cu_id][target_cu_id]
+            branch_likelihood_dict[source_cu_id] = dict()
+            for target_cu_id in branch_counter_dict[source_cu_id]:
+                branch_likelihood_dict[source_cu_id][target_cu_id] = branch_counter_dict[source_cu_id][target_cu_id] / total_counter
+
+        print("Branch likelihood dict:")
+        print(branch_likelihood_dict)
+
+        # calculate total branch likelihood
+        
+
+        for function in get_all_function_nodes(self.graph):
+            print("pruning function: ", cast(FunctionRoot, data_at(self.graph, function)).name)
+
+
+
+
     def __flatten_function_graphs(self):
         # TODO: remove deepcopies by storing data independently from the nodes
 

From 58e0661102296204b20e324d3c3a885156cd9eba Mon Sep 17 00:00:00 2001
From: Lukas Rothenberger <lukas.rothenberger@tu-darmstadt.de>
Date: Fri, 19 Jan 2024 11:48:21 +0100
Subject: [PATCH 09/26] node likelihood calculation

---
 .../discopop_optimizer/PETParser/PETParser.py | 60 ++++++++++++++++++-
 1 file changed, 58 insertions(+), 2 deletions(-)

diff --git a/discopop_library/discopop_optimizer/PETParser/PETParser.py b/discopop_library/discopop_optimizer/PETParser/PETParser.py
index 011058477..9e27ab686 100644
--- a/discopop_library/discopop_optimizer/PETParser/PETParser.py
+++ b/discopop_library/discopop_optimizer/PETParser/PETParser.py
@@ -177,13 +177,69 @@ def __prune_branches(self):
         print("Branch likelihood dict:")
         print(branch_likelihood_dict)
 
-        # calculate total branch likelihood
-        
+        # fix branch likelihood, necessary due to different structure of BB vs. Optimization graph
+
 
         for function in get_all_function_nodes(self.graph):
             print("pruning function: ", cast(FunctionRoot, data_at(self.graph, function)).name)
+            # calculate node likelihoods
+            node_likelihood_dict: Dict[int, float] = dict()
+            # initialize
+            queue: List[int] = []
+            for node in get_all_nodes_in_function(self.graph, function):
+                if len(get_predecessors(self.graph, node)) == 0:
+                    node_likelihood_dict[node] = 1
+                    queue += get_successors(self.graph, node)
+            # calculate node likelihoods by traversing the graph
+            while len(queue) > 0:
+                current_node = queue.pop(0)  # BFS
+                if current_node in node_likelihood_dict:
+                    continue
+                predecessors = get_predecessors(self.graph, current_node)
+                # if node likelihoods for all predecessors exist, calculate the likelihood for current_node
+                valid_target = True
+                for pred in predecessors:
+                    if pred not in node_likelihood_dict:
+                        valid_target = False
+                        # add the missing predecessor to the queue
+                        queue.append(pred)
+                        break
+                if valid_target:
+                    current_node_cu_id = data_at(self.graph, current_node).original_cu_id
+                    # calculate likelihood for current_node
+                    likelihood = 0
+                    for pred in predecessors:
+                        pred_cu_id = data_at(self.graph, pred).original_cu_id
+                        edge_likelihood = 1  # fallback if no data exists or not a branching point
+                        if len(get_successors(self.graph, pred)) > 1:
+                            if pred_cu_id in branch_likelihood_dict:
+                                if current_node_cu_id in branch_likelihood_dict[pred_cu_id]:
+                                    edge_likelihood = branch_likelihood_dict[pred_cu_id][current_node_cu_id]
+                                    print("Set edge likelihood: ", pred_cu_id, current_node_cu_id, edge_likelihood)
+                                else:
+                                    # branch was not executed
+                                    edge_likelihood = 0
+                                    print("Set edge likelihood: ", pred_cu_id, current_node_cu_id, edge_likelihood)
+
+                        likelihood += node_likelihood_dict[pred] * edge_likelihood
+                    node_likelihood_dict[current_node] = likelihood
+                    print("Set likelihood: ", current_node, likelihood)
+
+                    # add successors to queue
+                    queue += get_successors(self.graph, current_node)                        
+                            
+                else:
+                    # add current_node to the queue for another try
+                    queue.append(current_node)
+
+
+            print("node likelihood:")
+            for key in sorted(node_likelihood_dict.keys()):
 
+                print(key, "->", node_likelihood_dict[key])
+            show_function(self.graph, data_at(self.graph, function))
 
+            # calculate best branches using upwards search using branch and node likelihoods
 
 
     def __flatten_function_graphs(self):

From cd357b5b6105a8766b51bbe86d6664368febaff0 Mon Sep 17 00:00:00 2001
From: Lukas Rothenberger <lukas.rothenberger@tu-darmstadt.de>
Date: Fri, 19 Jan 2024 12:17:18 +0100
Subject: [PATCH 10/26] pruning to most likely path

---
 .../discopop_optimizer/PETParser/PETParser.py | 41 ++++++++++++++++---
 1 file changed, 36 insertions(+), 5 deletions(-)

diff --git a/discopop_library/discopop_optimizer/PETParser/PETParser.py b/discopop_library/discopop_optimizer/PETParser/PETParser.py
index 9e27ab686..a64dd3251 100644
--- a/discopop_library/discopop_optimizer/PETParser/PETParser.py
+++ b/discopop_library/discopop_optimizer/PETParser/PETParser.py
@@ -215,15 +215,12 @@ def __prune_branches(self):
                             if pred_cu_id in branch_likelihood_dict:
                                 if current_node_cu_id in branch_likelihood_dict[pred_cu_id]:
                                     edge_likelihood = branch_likelihood_dict[pred_cu_id][current_node_cu_id]
-                                    print("Set edge likelihood: ", pred_cu_id, current_node_cu_id, edge_likelihood)
                                 else:
                                     # branch was not executed
                                     edge_likelihood = 0
-                                    print("Set edge likelihood: ", pred_cu_id, current_node_cu_id, edge_likelihood)
 
                         likelihood += node_likelihood_dict[pred] * edge_likelihood
                     node_likelihood_dict[current_node] = likelihood
-                    print("Set likelihood: ", current_node, likelihood)
 
                     # add successors to queue
                     queue += get_successors(self.graph, current_node)                        
@@ -235,11 +232,45 @@ def __prune_branches(self):
 
             print("node likelihood:")
             for key in sorted(node_likelihood_dict.keys()):
-
                 print(key, "->", node_likelihood_dict[key])
-            show_function(self.graph, data_at(self.graph, function))
+            print("DONE")
 
             # calculate best branches using upwards search using branch and node likelihoods
+            keep_nodes: List[int] = self.__identify_most_likely_path(node_likelihood_dict, function)
+
+            # prune the graph
+            to_be_removed: List[int] = [n for n in get_all_nodes_in_function(self.graph, function) if n not in keep_nodes]
+            for n in to_be_removed:
+                self.graph.remove_node(n)
+
+
+    def __identify_most_likely_path(self, node_likelihood_dict: Dict[int, float], function: int) -> List[int]:
+        """Traverse graph upwards and return a list of the most likely nodes which constitute the most likely execution path."""
+        keep_nodes: List[int] = []
+        queue: List[int] = []
+        # get path end points
+        for node in get_all_nodes_in_function(self.graph, function):
+            if len(get_successors(self.graph, node)) == 0:
+                queue.append(node)
+        
+        while len(queue) > 0:
+            current = queue.pop()
+            keep_nodes.append(current)
+            # identify most likely predecessor
+            predecessor_likelihoods: List[Tuple[int, float]] = []
+            for pred in get_predecessors(self.graph, current):
+                predecessor_likelihoods.append((pred, node_likelihood_dict[pred]))
+            if len(predecessor_likelihoods) == 0:
+                # path entry reached
+                continue
+            most_likely_predecessor = sorted(predecessor_likelihoods, reverse=True, key=lambda x: x[1])[0][0]
+            # add most likely predecessor to the queue and thus keep_nodes
+            queue.append(most_likely_predecessor)
+
+        return keep_nodes
+
+        
+        
 
 
     def __flatten_function_graphs(self):

From 3b837430160da6bb82ef94f5d6e0cb90997310f2 Mon Sep 17 00:00:00 2001
From: Lukas Rothenberger <lukas.rothenberger@tu-darmstadt.de>
Date: Fri, 19 Jan 2024 12:24:22 +0100
Subject: [PATCH 11/26] minor fix

---
 .../discopop_optimizer/PETParser/PETParser.py          | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/discopop_library/discopop_optimizer/PETParser/PETParser.py b/discopop_library/discopop_optimizer/PETParser/PETParser.py
index a64dd3251..1eb7f8463 100644
--- a/discopop_library/discopop_optimizer/PETParser/PETParser.py
+++ b/discopop_library/discopop_optimizer/PETParser/PETParser.py
@@ -111,9 +111,6 @@ def parse(self) -> Tuple[nx.DiGraph, int]:
             print("pruning graphs based on taken branches")
         self.__prune_branches()
         print("\tDone.")
-
-        import sys
-        sys.exit(0)
         
         self.__flatten_function_graphs()
 
@@ -189,7 +186,7 @@ def __prune_branches(self):
             for node in get_all_nodes_in_function(self.graph, function):
                 if len(get_predecessors(self.graph, node)) == 0:
                     node_likelihood_dict[node] = 1
-                    queue += get_successors(self.graph, node)
+                queue.append(node)
             # calculate node likelihoods by traversing the graph
             while len(queue) > 0:
                 current_node = queue.pop(0)  # BFS
@@ -202,7 +199,8 @@ def __prune_branches(self):
                     if pred not in node_likelihood_dict:
                         valid_target = False
                         # add the missing predecessor to the queue
-                        queue.append(pred)
+                        if pred not in queue:
+                            queue.append(pred)
                         break
                 if valid_target:
                     current_node_cu_id = data_at(self.graph, current_node).original_cu_id
@@ -223,7 +221,7 @@ def __prune_branches(self):
                     node_likelihood_dict[current_node] = likelihood
 
                     # add successors to queue
-                    queue += get_successors(self.graph, current_node)                        
+                    queue += [s for s in get_successors(self.graph, current_node) if s not in queue]
                             
                 else:
                     # add current_node to the queue for another try

From c4fe94826c57b341bcd6769919ea515a6b180f18 Mon Sep 17 00:00:00 2001
From: Lukas Rothenberger <lukas.rothenberger@tu-darmstadt.de>
Date: Fri, 19 Jan 2024 13:28:55 +0100
Subject: [PATCH 12/26] minor reformatting

---
 .../discopop_optimizer/optimization/greedy.py | 21 +++++++++++--------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/discopop_library/discopop_optimizer/optimization/greedy.py b/discopop_library/discopop_optimizer/optimization/greedy.py
index 0ed1bd270..ebe96db5d 100644
--- a/discopop_library/discopop_optimizer/optimization/greedy.py
+++ b/discopop_library/discopop_optimizer/optimization/greedy.py
@@ -77,6 +77,7 @@ def greedy_search(
     for idx, function_node in enumerate(available_decisions):
         print("Greedy searching function: ", function_node.name, idx, "/", len(available_decisions))
         for dcsi, decision_set in enumerate(available_decisions[function_node]):
+            print("\tDecision:", dcsi, "/", len(available_decisions[function_node]))
             local_results: List[Tuple[Dict[int, List[List[int]]], int, ContextObject]] = []
 
             # prepare arguments for parallel cost calculation
@@ -92,15 +93,17 @@ def greedy_search(
                 local_decision_set[function_node.node_id][dcsi] = [decision]
                 param_list.append(local_decision_set)
 
-            #            # calculate costs in parallel
-            #            with Pool(initializer=__initialize_cost_caluclation_worker, initargs=(experiment, arguments)) as pool:
-            #                tmp_result = list(
-            #                    tqdm.tqdm(pool.imap_unordered(__get_score, param_list), total=len(param_list), disable=True)
-            #                )
-            # calculate costs
-            tmp_result: List[Tuple[Dict[int, List[List[int]]], int, ContextObject]] = []
-            for param in param_list:
-                tmp_result.append(__get_score(param))
+            if True:
+                # calculate costs in parallel
+                with Pool(initializer=__initialize_cost_caluclation_worker, initargs=(experiment, arguments)) as pool:
+                    tmp_result = list(
+                        tqdm.tqdm(pool.imap_unordered(__get_score, param_list), total=len(param_list), disable=True)
+                    )
+            else:
+                # calculate costs sequentially
+                tmp_result: List[Tuple[Dict[int, List[List[int]]], int, ContextObject]] = []
+                for param in param_list:
+                    tmp_result.append(__get_score(param))
 
             for local_result in tmp_result:
                 # remove invalid elements

From 4521b9b0986ba8d8a72d54573050c7e0cc5c3f1e Mon Sep 17 00:00:00 2001
From: Lukas Rothenberger <lukas.rothenberger@tu-darmstadt.de>
Date: Fri, 19 Jan 2024 14:04:04 +0100
Subject: [PATCH 13/26] chore: formatting and type fixes

---
 .../discopop_optimizer/PETParser/PETParser.py | 21 ++++++++-----------
 .../discopop_optimizer/__main__.py            |  2 +-
 .../discopop_optimizer/optimization/greedy.py |  3 ++-
 .../suggestions/optimizers/loop_collapse.py   | 12 +++++++----
 4 files changed, 20 insertions(+), 18 deletions(-)

diff --git a/discopop_library/discopop_optimizer/PETParser/PETParser.py b/discopop_library/discopop_optimizer/PETParser/PETParser.py
index 1eb7f8463..0f347494a 100644
--- a/discopop_library/discopop_optimizer/PETParser/PETParser.py
+++ b/discopop_library/discopop_optimizer/PETParser/PETParser.py
@@ -111,7 +111,7 @@ def parse(self) -> Tuple[nx.DiGraph, int]:
             print("pruning graphs based on taken branches")
         self.__prune_branches()
         print("\tDone.")
-        
+
         self.__flatten_function_graphs()
 
         # remove invalid functions
@@ -169,14 +169,15 @@ def __prune_branches(self):
                 total_counter += branch_counter_dict[source_cu_id][target_cu_id]
             branch_likelihood_dict[source_cu_id] = dict()
             for target_cu_id in branch_counter_dict[source_cu_id]:
-                branch_likelihood_dict[source_cu_id][target_cu_id] = branch_counter_dict[source_cu_id][target_cu_id] / total_counter
+                branch_likelihood_dict[source_cu_id][target_cu_id] = (
+                    branch_counter_dict[source_cu_id][target_cu_id] / total_counter
+                )
 
         print("Branch likelihood dict:")
         print(branch_likelihood_dict)
 
         # fix branch likelihood, necessary due to different structure of BB vs. Optimization graph
 
-
         for function in get_all_function_nodes(self.graph):
             print("pruning function: ", cast(FunctionRoot, data_at(self.graph, function)).name)
             # calculate node likelihoods
@@ -222,12 +223,11 @@ def __prune_branches(self):
 
                     # add successors to queue
                     queue += [s for s in get_successors(self.graph, current_node) if s not in queue]
-                            
+
                 else:
                     # add current_node to the queue for another try
                     queue.append(current_node)
 
-
             print("node likelihood:")
             for key in sorted(node_likelihood_dict.keys()):
                 print(key, "->", node_likelihood_dict[key])
@@ -237,11 +237,12 @@ def __prune_branches(self):
             keep_nodes: List[int] = self.__identify_most_likely_path(node_likelihood_dict, function)
 
             # prune the graph
-            to_be_removed: List[int] = [n for n in get_all_nodes_in_function(self.graph, function) if n not in keep_nodes]
+            to_be_removed: List[int] = [
+                n for n in get_all_nodes_in_function(self.graph, function) if n not in keep_nodes
+            ]
             for n in to_be_removed:
                 self.graph.remove_node(n)
 
-
     def __identify_most_likely_path(self, node_likelihood_dict: Dict[int, float], function: int) -> List[int]:
         """Traverse graph upwards and return a list of the most likely nodes which constitute the most likely execution path."""
         keep_nodes: List[int] = []
@@ -250,7 +251,7 @@ def __identify_most_likely_path(self, node_likelihood_dict: Dict[int, float], fu
         for node in get_all_nodes_in_function(self.graph, function):
             if len(get_successors(self.graph, node)) == 0:
                 queue.append(node)
-        
+
         while len(queue) > 0:
             current = queue.pop()
             keep_nodes.append(current)
@@ -267,10 +268,6 @@ def __identify_most_likely_path(self, node_likelihood_dict: Dict[int, float], fu
 
         return keep_nodes
 
-        
-        
-
-
     def __flatten_function_graphs(self):
         # TODO: remove deepcopies by storing data independently from the nodes
 
diff --git a/discopop_library/discopop_optimizer/__main__.py b/discopop_library/discopop_optimizer/__main__.py
index 804656905..f9a677de1 100644
--- a/discopop_library/discopop_optimizer/__main__.py
+++ b/discopop_library/discopop_optimizer/__main__.py
@@ -73,7 +73,7 @@ def parse_args() -> OptimizerArguments:
         check_called_function_for_nested_parallelism=arguments.check_called_function_for_nested_parallelism,
         profiling=arguments.profiling,
         greedy=arguments.greedy,
-        optimization=arguments.optimize
+        optimization=arguments.optimize,
     )
 
 
diff --git a/discopop_library/discopop_optimizer/optimization/greedy.py b/discopop_library/discopop_optimizer/optimization/greedy.py
index ebe96db5d..e095bb240 100644
--- a/discopop_library/discopop_optimizer/optimization/greedy.py
+++ b/discopop_library/discopop_optimizer/optimization/greedy.py
@@ -93,6 +93,7 @@ def greedy_search(
                 local_decision_set[function_node.node_id][dcsi] = [decision]
                 param_list.append(local_decision_set)
 
+            tmp_result: List[Tuple[Dict[int, List[List[int]]], int, ContextObject]] = []
             if True:
                 # calculate costs in parallel
                 with Pool(initializer=__initialize_cost_caluclation_worker, initargs=(experiment, arguments)) as pool:
@@ -101,7 +102,7 @@ def greedy_search(
                     )
             else:
                 # calculate costs sequentially
-                tmp_result: List[Tuple[Dict[int, List[List[int]]], int, ContextObject]] = []
+                tmp_result = []
                 for param in param_list:
                     tmp_result.append(__get_score(param))
 
diff --git a/discopop_library/discopop_optimizer/suggestions/optimizers/loop_collapse.py b/discopop_library/discopop_optimizer/suggestions/optimizers/loop_collapse.py
index 4b238cd40..7c7c08fac 100644
--- a/discopop_library/discopop_optimizer/suggestions/optimizers/loop_collapse.py
+++ b/discopop_library/discopop_optimizer/suggestions/optimizers/loop_collapse.py
@@ -13,8 +13,8 @@
 
 import networkx as nx  # type: ignore
 
-import tqdm
-from build.lib.discopop_library.result_classes.OptimizerOutputPattern import OptimizerOutputPattern  # type: ignore
+import tqdm  # type: ignore
+from discopop_library.result_classes.OptimizerOutputPattern import OptimizerOutputPattern  # type: ignore
 from discopop_explorer.pattern_detectors.do_all_detector import DoAllInfo  # type: ignore
 from discopop_library.PatternIdManagement.unique_pattern_id import get_unique_pattern_id
 from discopop_library.discopop_optimizer.Variables.Experiment import Experiment
@@ -243,8 +243,12 @@ def __collapse_loops_in_function(function_node_id):
                 # todo: find a nicer solution to duplicating the patterns for each device mapping
                 global_experiment.detection_result.patterns.do_all.append(pattern_info)
                 # construct optimizer output pattern to represent the non-standalone pattern_info
-                optimizer_output_pattern = OptimizerOutputPattern(pattern_info._node, [new_node_id], global_experiment.get_system().get_host_device_id())
-                optimizer_output_pattern.add_pattern(pattern_info.pattern_id, pattern_info.device_id, pattern_info.device_type)
+                optimizer_output_pattern = OptimizerOutputPattern(
+                    pattern_info._node, [new_node_id], global_experiment.get_system().get_host_device_id()
+                )
+                optimizer_output_pattern.add_pattern(
+                    pattern_info.pattern_id, pattern_info.device_id, pattern_info.device_type
+                )
                 global_experiment.detection_result.patterns.optimizer_output.append(optimizer_output_pattern)
                 print("REGISTERED PATTERN INFO: ", pattern_id, " for Device: ", data_at(global_graph, csrc).device_id)
                 print(pattern_info)

From 3ab9bc567e814ad9beaa90b192e70e1999694c83 Mon Sep 17 00:00:00 2001
From: Lukas Rothenberger <lukas.rothenberger@tu-darmstadt.de>
Date: Tue, 23 Jan 2024 11:29:00 +0100
Subject: [PATCH 14/26] feat(optimizer, profiler): optional branch tracking

---
 DiscoPoP/DiscoPoP.cpp                                 | 11 ++++++++---
 .../discopop_optimizer/PETParser/PETParser.py         |  8 +++++++-
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/DiscoPoP/DiscoPoP.cpp b/DiscoPoP/DiscoPoP.cpp
index 00c7e2593..9ea124d82 100644
--- a/DiscoPoP/DiscoPoP.cpp
+++ b/DiscoPoP/DiscoPoP.cpp
@@ -19,7 +19,8 @@
 #define DP_VERBOSE false  // prints warning messages
 #define DP_hybrid_DEBUG false
 #define DP_hybrid_SKIP false  //todo add parameter to disable hybrid dependence analysis on demand.
-
+#define DP_BRANCH_TRACKING false  // toggles the creation of instrumentation calls for tracking taken branches.
+                                 // Required by the graph pruning step of the DiscoPoP optimizer.
 
 using namespace llvm;
 using namespace std;
@@ -1940,7 +1941,9 @@ void DiscoPoP::dp_reduction_insert_functions() {
             if (llvm::isa<llvm::ReturnInst>(&(*it))) {
                 llvm::IRBuilder<> ir_builder(&(*it));
                 ir_builder.CreateCall(loop_counter_output_callee);
-                ir_builder.CreateCall(cu_taken_branch_counter_output_callee);
+                if(DP_BRANCH_TRACKING){
+                    ir_builder.CreateCall(cu_taken_branch_counter_output_callee);
+                }
                 break;
             }
         }
@@ -2218,7 +2221,9 @@ bool DiscoPoP::runOnFunction(Function &F) {
 
         createCUs(TopRegion, globalVariablesSet, CUVector, BBIDToCUIDsMap, root, LI);
 
-        createTakenBranchInstrumentation(TopRegion, BBIDToCUIDsMap);
+        if(DP_BRANCH_TRACKING){
+            createTakenBranchInstrumentation(TopRegion, BBIDToCUIDsMap);
+        }
 
         fillCUVariables(TopRegion, globalVariablesSet, CUVector, BBIDToCUIDsMap);
 
diff --git a/discopop_library/discopop_optimizer/PETParser/PETParser.py b/discopop_library/discopop_optimizer/PETParser/PETParser.py
index 0f347494a..a61195f80 100644
--- a/discopop_library/discopop_optimizer/PETParser/PETParser.py
+++ b/discopop_library/discopop_optimizer/PETParser/PETParser.py
@@ -110,7 +110,8 @@ def parse(self) -> Tuple[nx.DiGraph, int]:
         if self.experiment.arguments.verbose:
             print("pruning graphs based on taken branches")
         self.__prune_branches()
-        print("\tDone.")
+        if self.experiment.arguments.verbose:
+            print("\tDone.")
 
         self.__flatten_function_graphs()
 
@@ -146,6 +147,11 @@ def get_new_node_id(self) -> int:
 
     def __prune_branches(self):
         """Prune branches based on the measured likelihood of execution"""
+        # check if branch information exists. If not, skip this step.
+        if not os.path.exists("profiler/cu_taken_branch_counter_output.txt"):
+            if self.experiment.arguments.verbose:
+                print("\tNo information on taken branches found. Skipping.")
+            return
         # load observed branching information
         branch_counter_dict: Dict[str, Dict[str, int]] = dict()
         with open("profiler/cu_taken_branch_counter_output.txt", "r") as f:

From 08e15c7821eca6b255b4a71d425912d6ce396631 Mon Sep 17 00:00:00 2001
From: Lukas Rothenberger <lukas.rothenberger@tu-darmstadt.de>
Date: Tue, 23 Jan 2024 11:35:20 +0100
Subject: [PATCH 15/26] fix(profiler): enable branch tracking

---
 DiscoPoP/DiscoPoP.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DiscoPoP/DiscoPoP.cpp b/DiscoPoP/DiscoPoP.cpp
index 9ea124d82..139380c8b 100644
--- a/DiscoPoP/DiscoPoP.cpp
+++ b/DiscoPoP/DiscoPoP.cpp
@@ -19,7 +19,7 @@
 #define DP_VERBOSE false  // prints warning messages
 #define DP_hybrid_DEBUG false
 #define DP_hybrid_SKIP false  //todo add parameter to disable hybrid dependence analysis on demand.
-#define DP_BRANCH_TRACKING false  // toggles the creation of instrumentation calls for tracking taken branches.
+#define DP_BRANCH_TRACKING true  // toggles the creation of instrumentation calls for tracking taken branches.
                                  // Required by the graph pruning step of the DiscoPoP optimizer.
 
 using namespace llvm;

From d4ae32eadac61ad73f34b261819103070ecd7156 Mon Sep 17 00:00:00 2001
From: Lukas Rothenberger <lukas.rothenberger@tu-darmstadt.de>
Date: Tue, 23 Jan 2024 14:32:44 +0100
Subject: [PATCH 16/26] feat(optimizer): add configurable pruning levels

---
 .../discopop_optimizer/OptimizerArguments.py         |  5 +++++
 .../discopop_optimizer/PETParser/PETParser.py        | 12 ++++++++++--
 discopop_library/discopop_optimizer/__main__.py      |  5 ++++-
 3 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/discopop_library/discopop_optimizer/OptimizerArguments.py b/discopop_library/discopop_optimizer/OptimizerArguments.py
index eea8cab06..9ffa86247 100644
--- a/discopop_library/discopop_optimizer/OptimizerArguments.py
+++ b/discopop_library/discopop_optimizer/OptimizerArguments.py
@@ -27,6 +27,7 @@ class OptimizerArguments(object):
     profiling: bool
     greedy: bool
     optimization: bool
+    pruning_level: int
 
     def __post_init__(self):
         # fix correct optimization method
@@ -44,4 +45,8 @@ def __validate(self):
         if self.reduction_microbench_file is not "None":
             if not os.path.isfile(self.reduction_microbench_file):
                 raise FileNotFoundError(f"Microbenchmark file not found: {self.reduction_microbench_file}")
+
+        # check pruning level values
+        if self.pruning_level not in [0, 1]:
+            raise ValueError("Unsupported pruning level: ", self.pruning_level)
         pass
diff --git a/discopop_library/discopop_optimizer/PETParser/PETParser.py b/discopop_library/discopop_optimizer/PETParser/PETParser.py
index a61195f80..35409efe1 100644
--- a/discopop_library/discopop_optimizer/PETParser/PETParser.py
+++ b/discopop_library/discopop_optimizer/PETParser/PETParser.py
@@ -148,6 +148,10 @@ def get_new_node_id(self) -> int:
     def __prune_branches(self):
         """Prune branches based on the measured likelihood of execution"""
         # check if branch information exists. If not, skip this step.
+        if self.experiment.arguments.pruning_level == 0:
+            if self.experiment.arguments.verbose:
+                print("\tPruning level 0. Skipping.")
+            return
         if not os.path.exists("profiler/cu_taken_branch_counter_output.txt"):
             if self.experiment.arguments.verbose:
                 print("\tNo information on taken branches found. Skipping.")
@@ -239,8 +243,12 @@ def __prune_branches(self):
                 print(key, "->", node_likelihood_dict[key])
             print("DONE")
 
-            # calculate best branches using upwards search using branch and node likelihoods
-            keep_nodes: List[int] = self.__identify_most_likely_path(node_likelihood_dict, function)
+            keep_nodes: List[int] = []
+            if self.experiment.arguments.pruning_level == 1:
+                # calculate best branches using upwards search using branch and node likelihoods
+                keep_nodes = self.__identify_most_likely_path(node_likelihood_dict, function)
+            else:
+                raise ValueError("Unknown pruning level: ", self.experiment.arguments.pruning_level)
 
             # prune the graph
             to_be_removed: List[int] = [
diff --git a/discopop_library/discopop_optimizer/__main__.py b/discopop_library/discopop_optimizer/__main__.py
index f9a677de1..5d9b1b362 100644
--- a/discopop_library/discopop_optimizer/__main__.py
+++ b/discopop_library/discopop_optimizer/__main__.py
@@ -31,6 +31,8 @@ def parse_args() -> OptimizerArguments:
         help="Enable greedy search. (Default)")
     parser.add_argument("-o", "--optimize", action="store_true",
         help="Enable optimization.")
+    parser.add_argument("-p", "--pruning-level", type=int, default=0,
+        help="Program path pruning aggressiveness. 0: no pruning. 1: prune to most likely path.")
     parser.add_argument(
         "--doall-microbench-file", type=str, default="None",
         help="Do-All microbenchmark results"
@@ -43,7 +45,7 @@ def parse_args() -> OptimizerArguments:
         "--system-configuration", type=str, default="optimizer/system_configuration.json",
         help="System configuration file"
     )
-    parser.add_argument("-p", "--profiling", action="store_true",
+    parser.add_argument("--profiling", action="store_true",
         help="Enable profiling.")
     # EXPERIMENTAL FLAGS:
     experimental_parser.add_argument("--allow-nested-parallelism", action="store_true",
@@ -74,6 +76,7 @@ def parse_args() -> OptimizerArguments:
         profiling=arguments.profiling,
         greedy=arguments.greedy,
         optimization=arguments.optimize,
+        pruning_level=arguments.pruning_level,
     )
 
 

From 76ac11f115b6a67efb2b542df17532ea8f039e47 Mon Sep 17 00:00:00 2001
From: Lukas Rothenberger <lukas.rothenberger@tu-darmstadt.de>
Date: Tue, 23 Jan 2024 14:40:43 +0100
Subject: [PATCH 17/26] feat(optimizer): preparations for pruning level 2

---
 discopop_library/discopop_optimizer/OptimizerArguments.py | 2 +-
 discopop_library/discopop_optimizer/__main__.py           | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/discopop_library/discopop_optimizer/OptimizerArguments.py b/discopop_library/discopop_optimizer/OptimizerArguments.py
index 9ffa86247..fc4bb01f3 100644
--- a/discopop_library/discopop_optimizer/OptimizerArguments.py
+++ b/discopop_library/discopop_optimizer/OptimizerArguments.py
@@ -47,6 +47,6 @@ def __validate(self):
                 raise FileNotFoundError(f"Microbenchmark file not found: {self.reduction_microbench_file}")
 
         # check pruning level values
-        if self.pruning_level not in [0, 1]:
+        if self.pruning_level not in [0, 1, 2]:
             raise ValueError("Unsupported pruning level: ", self.pruning_level)
         pass
diff --git a/discopop_library/discopop_optimizer/__main__.py b/discopop_library/discopop_optimizer/__main__.py
index 5d9b1b362..fa0e6bd53 100644
--- a/discopop_library/discopop_optimizer/__main__.py
+++ b/discopop_library/discopop_optimizer/__main__.py
@@ -32,7 +32,7 @@ def parse_args() -> OptimizerArguments:
     parser.add_argument("-o", "--optimize", action="store_true",
         help="Enable optimization.")
     parser.add_argument("-p", "--pruning-level", type=int, default=0,
-        help="Program path pruning aggressiveness. 0: no pruning. 1: prune to most likely path.")
+        help="Program path pruning aggressiveness. 0: no pruning. 1: prune to most likely path. 2: prune to paths that cover 80%% of observed executions.")
     parser.add_argument(
         "--doall-microbench-file", type=str, default="None",
         help="Do-All microbenchmark results"

From 9cdd77f7d9b55111d643fc78b1101e860c212853 Mon Sep 17 00:00:00 2001
From: Lukas Rothenberger <lukas.rothenberger@tu-darmstadt.de>
Date: Tue, 23 Jan 2024 15:41:51 +0100
Subject: [PATCH 18/26] feat(optimizer): pruning level 2

---
 .../discopop_optimizer/PETParser/PETParser.py | 52 +++++++++++++++++++
 1 file changed, 52 insertions(+)

diff --git a/discopop_library/discopop_optimizer/PETParser/PETParser.py b/discopop_library/discopop_optimizer/PETParser/PETParser.py
index 35409efe1..c5cd00545 100644
--- a/discopop_library/discopop_optimizer/PETParser/PETParser.py
+++ b/discopop_library/discopop_optimizer/PETParser/PETParser.py
@@ -247,6 +247,9 @@ def __prune_branches(self):
             if self.experiment.arguments.pruning_level == 1:
                 # calculate best branches using upwards search using branch and node likelihoods
                 keep_nodes = self.__identify_most_likely_path(node_likelihood_dict, function)
+            elif self.experiment.arguments.pruning_level == 2:
+                # calculate branches which are executed in 80% of the observed cases
+                keep_nodes = self.__identify_most_likely_paths_80_percent_cutoff(branch_likelihood_dict, function)
             else:
                 raise ValueError("Unknown pruning level: ", self.experiment.arguments.pruning_level)
 
@@ -257,6 +260,55 @@ def __prune_branches(self):
             for n in to_be_removed:
                 self.graph.remove_node(n)
 
+            show_function(self.graph, data_at(self.graph, function), show_dataflow=False, show_mutex_edges=False)
+
+    def __identify_most_likely_paths_80_percent_cutoff(self, branch_likelihood_dict: Dict[str, Dict[str, float]], function: int) -> List[int]:
+        """Traverse graph downwards and return a list of the nodes visited if all branches were taken that constitute a sum of at least 80% of the observed cases."""
+        keep_nodes: List[int] = []
+        queue: List[int] = []
+        # get path entries points
+        for node in get_all_nodes_in_function(self.graph, function):
+            if len(get_predecessors(self.graph, node)) == 0:
+                queue.append(node)
+
+        while len(queue) > 0:
+            current = queue.pop()
+            current_cu_id = data_at(self.graph, current).original_cu_id
+            keep_nodes.append(current)
+
+            # get successors and their cu ids
+            successors = get_successors(self.graph, current)
+            if len(successors) < 2:
+                queue += [s for s in successors if s not in queue and s not in keep_nodes]
+                continue
+            successor_cus = [(s, data_at(self.graph, s).original_cu_id) for s in successors]
+
+            # get likelihoods for transitions to successors
+            if current_cu_id not in branch_likelihood_dict:
+                warnings.warn("No branch counters available for path split at CU Node: " + current_cu_id + ". Fallback: Preserving all successors.")
+                # fallback: preserve all successors
+                queue += [s for s in successors if s not in queue and s not in keep_nodes]
+                continue
+            else:
+                successor_likelihood = []
+                for succ, succ_cu_id in successor_cus:
+                    if succ_cu_id not in branch_likelihood_dict[current_cu_id]:
+                        successor_likelihood.append((succ, succ_cu_id, 0.0))
+                    else:
+                        successor_likelihood.append((succ, succ_cu_id, branch_likelihood_dict[current_cu_id][succ_cu_id]))
+
+                # select successors until total probability is > THRESHOLD
+                threshold = 0.8
+                total_probability = 0
+                for succ, succ_cu_id, succ_prob in sorted(successor_likelihood, reverse=True, key=lambda x: x[2]):
+                    if total_probability < threshold:
+                        queue.append(succ)
+                        total_probability += succ_prob
+                    else:
+                        break
+
+        return keep_nodes
+
     def __identify_most_likely_path(self, node_likelihood_dict: Dict[int, float], function: int) -> List[int]:
         """Traverse graph upwards and return a list of the most likely nodes which constitute the most likely execution path."""
         keep_nodes: List[int] = []

From 47f787950c4ba40df3e24fd9e43d7676731860fc Mon Sep 17 00:00:00 2001
From: Lukas Rothenberger <lukas.rothenberger@tu-darmstadt.de>
Date: Tue, 23 Jan 2024 15:44:45 +0100
Subject: [PATCH 19/26] chore: cleanup

---
 discopop_library/discopop_optimizer/PETParser/PETParser.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/discopop_library/discopop_optimizer/PETParser/PETParser.py b/discopop_library/discopop_optimizer/PETParser/PETParser.py
index c5cd00545..0819bc282 100644
--- a/discopop_library/discopop_optimizer/PETParser/PETParser.py
+++ b/discopop_library/discopop_optimizer/PETParser/PETParser.py
@@ -260,8 +260,6 @@ def __prune_branches(self):
             for n in to_be_removed:
                 self.graph.remove_node(n)
 
-            show_function(self.graph, data_at(self.graph, function), show_dataflow=False, show_mutex_edges=False)
-
     def __identify_most_likely_paths_80_percent_cutoff(self, branch_likelihood_dict: Dict[str, Dict[str, float]], function: int) -> List[int]:
         """Traverse graph downwards and return a list of the nodes visited if all branches were taken that constitute a sum of at least 80% of the observed cases."""
         keep_nodes: List[int] = []

From c0da1faf494f4f8397cf47ca206287195f27b6b5 Mon Sep 17 00:00:00 2001
From: Lukas Rothenberger <lukas.rothenberger@tu-darmstadt.de>
Date: Tue, 23 Jan 2024 15:57:08 +0100
Subject: [PATCH 20/26] feat(optimizer)[-p2]: formatted verbose output

---
 .../discopop_optimizer/PETParser/PETParser.py | 25 ++++++++++++++-----
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/discopop_library/discopop_optimizer/PETParser/PETParser.py b/discopop_library/discopop_optimizer/PETParser/PETParser.py
index 0819bc282..0cb939cc4 100644
--- a/discopop_library/discopop_optimizer/PETParser/PETParser.py
+++ b/discopop_library/discopop_optimizer/PETParser/PETParser.py
@@ -190,6 +190,15 @@ def __prune_branches(self):
 
         for function in get_all_function_nodes(self.graph):
             print("pruning function: ", cast(FunctionRoot, data_at(self.graph, function)).name)
+            verbose_print_pruning_statistics = False
+            if self.experiment.arguments.verbose:
+                ct = 0
+                for node in get_all_nodes_in_function(self.graph, function):
+                    if len(get_successors(self.graph, node)) > 1:
+                        ct += 1
+                if ct > 0:
+                    verbose_print_pruning_statistics = True
+                    print("\tpath splits before pruning: ", ct)
             # calculate node likelihoods
             node_likelihood_dict: Dict[int, float] = dict()
             # initialize
@@ -238,11 +247,6 @@ def __prune_branches(self):
                     # add current_node to the queue for another try
                     queue.append(current_node)
 
-            print("node likelihood:")
-            for key in sorted(node_likelihood_dict.keys()):
-                print(key, "->", node_likelihood_dict[key])
-            print("DONE")
-
             keep_nodes: List[int] = []
             if self.experiment.arguments.pruning_level == 1:
                 # calculate best branches using upwards search using branch and node likelihoods
@@ -254,11 +258,20 @@ def __prune_branches(self):
                 raise ValueError("Unknown pruning level: ", self.experiment.arguments.pruning_level)
 
             # prune the graph
+            function_nodes = get_all_nodes_in_function(self.graph, function)
             to_be_removed: List[int] = [
-                n for n in get_all_nodes_in_function(self.graph, function) if n not in keep_nodes
+                n for n in function_nodes if n not in keep_nodes
             ]
             for n in to_be_removed:
                 self.graph.remove_node(n)
+            
+            if self.experiment.arguments.verbose and verbose_print_pruning_statistics:
+                ct = 0
+                for node in get_all_nodes_in_function(self.graph, function):
+                    if len(get_successors(self.graph, node)) > 1:
+                        ct += 1
+                print("\tpath splits after pruning: ", ct)
+
 
     def __identify_most_likely_paths_80_percent_cutoff(self, branch_likelihood_dict: Dict[str, Dict[str, float]], function: int) -> List[int]:
         """Traverse graph downwards and return a list of the nodes visited if all branches were taken that constitute a sum of at least 80% of the observed cases."""

From af80e6ca611cd32b0ab59adcf1d2b39df21443d4 Mon Sep 17 00:00:00 2001
From: Lukas Rothenberger <lukas.rothenberger@tu-darmstadt.de>
Date: Tue, 23 Jan 2024 15:58:20 +0100
Subject: [PATCH 21/26] doc(optimizer): cleanup help string

---
 discopop_library/discopop_optimizer/__main__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/discopop_library/discopop_optimizer/__main__.py b/discopop_library/discopop_optimizer/__main__.py
index fa0e6bd53..7007c0dec 100644
--- a/discopop_library/discopop_optimizer/__main__.py
+++ b/discopop_library/discopop_optimizer/__main__.py
@@ -32,7 +32,7 @@ def parse_args() -> OptimizerArguments:
     parser.add_argument("-o", "--optimize", action="store_true",
         help="Enable optimization.")
     parser.add_argument("-p", "--pruning-level", type=int, default=0,
-        help="Program path pruning aggressiveness. 0: no pruning. 1: prune to most likely path. 2: prune to paths that cover 80%% of observed executions.")
+        help="Program path pruning aggressiveness. 0: no pruning. 1: prune to most likely path. 2: prune to paths that cover 80%% of observed decisions per path split.")
     parser.add_argument(
         "--doall-microbench-file", type=str, default="None",
         help="Do-All microbenchmark results"

From 321e9d3490a70c53b97a62bea63da7c8a0283fd8 Mon Sep 17 00:00:00 2001
From: Lukas Rothenberger <lukas.rothenberger@tu-darmstadt.de>
Date: Wed, 24 Jan 2024 08:46:40 +0100
Subject: [PATCH 22/26] chore(optimizer): formatting

---
 .../discopop_optimizer/PETParser/PETParser.py | 23 +++++++++++--------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/discopop_library/discopop_optimizer/PETParser/PETParser.py b/discopop_library/discopop_optimizer/PETParser/PETParser.py
index 0cb939cc4..810eda1e6 100644
--- a/discopop_library/discopop_optimizer/PETParser/PETParser.py
+++ b/discopop_library/discopop_optimizer/PETParser/PETParser.py
@@ -259,12 +259,10 @@ def __prune_branches(self):
 
             # prune the graph
             function_nodes = get_all_nodes_in_function(self.graph, function)
-            to_be_removed: List[int] = [
-                n for n in function_nodes if n not in keep_nodes
-            ]
+            to_be_removed: List[int] = [n for n in function_nodes if n not in keep_nodes]
             for n in to_be_removed:
                 self.graph.remove_node(n)
-            
+
             if self.experiment.arguments.verbose and verbose_print_pruning_statistics:
                 ct = 0
                 for node in get_all_nodes_in_function(self.graph, function):
@@ -272,8 +270,9 @@ def __prune_branches(self):
                         ct += 1
                 print("\tpath splits after pruning: ", ct)
 
-
-    def __identify_most_likely_paths_80_percent_cutoff(self, branch_likelihood_dict: Dict[str, Dict[str, float]], function: int) -> List[int]:
+    def __identify_most_likely_paths_80_percent_cutoff(
+        self, branch_likelihood_dict: Dict[str, Dict[str, float]], function: int
+    ) -> List[int]:
         """Traverse graph downwards and return a list of the nodes visited if all branches were taken that constitute a sum of at least 80% of the observed cases."""
         keep_nodes: List[int] = []
         queue: List[int] = []
@@ -296,7 +295,11 @@ def __identify_most_likely_paths_80_percent_cutoff(self, branch_likelihood_dict:
 
             # get likelihoods for transitions to successors
             if current_cu_id not in branch_likelihood_dict:
-                warnings.warn("No branch counters available for path split at CU Node: " + current_cu_id + ". Fallback: Preserving all successors.")
+                warnings.warn(
+                    "No branch counters available for path split at CU Node: "
+                    + str(current_cu_id)
+                    + ". Fallback: Preserving all successors."
+                )
                 # fallback: preserve all successors
                 queue += [s for s in successors if s not in queue and s not in keep_nodes]
                 continue
@@ -306,11 +309,13 @@ def __identify_most_likely_paths_80_percent_cutoff(self, branch_likelihood_dict:
                     if succ_cu_id not in branch_likelihood_dict[current_cu_id]:
                         successor_likelihood.append((succ, succ_cu_id, 0.0))
                     else:
-                        successor_likelihood.append((succ, succ_cu_id, branch_likelihood_dict[current_cu_id][succ_cu_id]))
+                        successor_likelihood.append(
+                            (succ, succ_cu_id, branch_likelihood_dict[current_cu_id][succ_cu_id])
+                        )
 
                 # select successors until total probability is > THRESHOLD
                 threshold = 0.8
-                total_probability = 0
+                total_probability = 0.0
                 for succ, succ_cu_id, succ_prob in sorted(successor_likelihood, reverse=True, key=lambda x: x[2]):
                     if total_probability < threshold:
                         queue.append(succ)

From c13792becc1dc1ca80dea0384f39f5a95d88f35b Mon Sep 17 00:00:00 2001
From: Lukas Rothenberger <lukas.rothenberger@tu-darmstadt.de>
Date: Wed, 24 Jan 2024 11:01:00 +0100
Subject: [PATCH 23/26] fix(CI): profiler test

---
 .github/workflows/tests/profiler.sh | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/tests/profiler.sh b/.github/workflows/tests/profiler.sh
index ec3f769d3..998a28afb 100755
--- a/.github/workflows/tests/profiler.sh
+++ b/.github/workflows/tests/profiler.sh
@@ -20,11 +20,14 @@ TARGET_NAME=$1
 PASS_NAME=$2
 
 function test_discopopPass {
-  cp ${DISCOPOP_SRC}/scripts/dp-fmap .
-  ./dp-fmap
-  clang++ -g -c -O0 -S -emit-llvm -fno-discard-value-names "$1" -o out.ll || return 1
-  opt-11 -S -load=${DISCOPOP_INSTALL}/libi/LLVMDiscoPoP.so --DiscoPoP out.ll -o out_dp.ll || return 1
-  clang++ out_dp.ll -o out_prof -L${DISCOPOP_INSTALL}/rtlib -lDiscoPoP_RT -lpthread || return 1
+   ${DISCOPOP_INSTALL}/scripts/CXX_wrapper.sh "$1" -o out_prof
+
+
+#  cp ${DISCOPOP_SRC}/scripts/dp-fmap .
+#  ./dp-fmap
+#  clang++ -g -c -O0 -S -emit-llvm -fno-discard-value-names "$1" -o out.ll || return 1
+#  opt-11 -S -load=${DISCOPOP_INSTALL}/libi/LLVMDiscoPoP.so --DiscoPoP out.ll -o out_dp.ll || return 1
+#  clang++ out_dp.ll -o out_prof -L${DISCOPOP_INSTALL}/rtlib -lDiscoPoP_RT -lpthread || return 1
   ./out_prof || return 1
 }
 

From 0b40562beb0c7c3a60e4fe82f904d9fc488f6fd1 Mon Sep 17 00:00:00 2001
From: Lukas Rothenberger <lukas.rothenberger@tu-darmstadt.de>
Date: Wed, 24 Jan 2024 11:45:45 +0100
Subject: [PATCH 24/26] chore: formatting

---
 discopop_library/discopop_optimizer/OptimizerArguments.py | 2 +-
 discopop_library/discopop_optimizer/__main__.py           | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/discopop_library/discopop_optimizer/OptimizerArguments.py b/discopop_library/discopop_optimizer/OptimizerArguments.py
index 1c36e1899..30f546343 100644
--- a/discopop_library/discopop_optimizer/OptimizerArguments.py
+++ b/discopop_library/discopop_optimizer/OptimizerArguments.py
@@ -42,7 +42,7 @@ def __validate(self):
         # check pruning level values
         if self.pruning_level not in [0, 1, 2]:
             raise ValueError("Unsupported pruning level: ", self.pruning_level)
-            
+
         # check optimization level
         if self.optimization_level not in [0, 1, 2, 3]:
             raise ValueError("Unknown optimization level requested: ", self.optimization_level)
diff --git a/discopop_library/discopop_optimizer/__main__.py b/discopop_library/discopop_optimizer/__main__.py
index deb199147..cf1bb4cb3 100644
--- a/discopop_library/discopop_optimizer/__main__.py
+++ b/discopop_library/discopop_optimizer/__main__.py
@@ -67,7 +67,7 @@ def parse_args() -> OptimizerArguments:
         plot=arguments.plot,
         system_configuration_path=arguments.system_configuration,
         check_called_function_for_nested_parallelism=arguments.check_called_function_for_nested_parallelism,
-        profiling=arguments.profiling,        
+        profiling=arguments.profiling,
         pruning_level=arguments.pruning_level,
         optimization_level=arguments.o,
         optimization_level_2_parameters=arguments.opt_2_params,

From af9a3c68c96b28e877f7024b547487b3fc662497 Mon Sep 17 00:00:00 2001
From: Lukas Rothenberger <lukas.rothenberger@tu-darmstadt.de>
Date: Wed, 24 Jan 2024 11:49:10 +0100
Subject: [PATCH 25/26] fix(optimizer)[pruning]: switched level 1 and 2 due to 
 agressiveness

---
 .../discopop_optimizer/PETParser/PETParser.py             | 8 ++++----
 discopop_library/discopop_optimizer/__main__.py           | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/discopop_library/discopop_optimizer/PETParser/PETParser.py b/discopop_library/discopop_optimizer/PETParser/PETParser.py
index 810eda1e6..6d629e4c6 100644
--- a/discopop_library/discopop_optimizer/PETParser/PETParser.py
+++ b/discopop_library/discopop_optimizer/PETParser/PETParser.py
@@ -108,7 +108,7 @@ def parse(self) -> Tuple[nx.DiGraph, int]:
         # self.__new_parse_branched_sections()
 
         if self.experiment.arguments.verbose:
-            print("pruning graphs based on taken branches")
+            print("pruning graphs based on taken branches. Pruning level: ", self.experiment.arguments.pruning_level)
         self.__prune_branches()
         if self.experiment.arguments.verbose:
             print("\tDone.")
@@ -249,11 +249,11 @@ def __prune_branches(self):
 
             keep_nodes: List[int] = []
             if self.experiment.arguments.pruning_level == 1:
-                # calculate best branches using upwards search using branch and node likelihoods
-                keep_nodes = self.__identify_most_likely_path(node_likelihood_dict, function)
-            elif self.experiment.arguments.pruning_level == 2:
                 # calculate branches which are executed in 80% of the observed cases
                 keep_nodes = self.__identify_most_likely_paths_80_percent_cutoff(branch_likelihood_dict, function)
+            elif self.experiment.arguments.pruning_level == 2:
+                # calculate best branches using upwards search using branch and node likelihoods
+                keep_nodes = self.__identify_most_likely_path(node_likelihood_dict, function)
             else:
                 raise ValueError("Unknown pruning level: ", self.experiment.arguments.pruning_level)
 
diff --git a/discopop_library/discopop_optimizer/__main__.py b/discopop_library/discopop_optimizer/__main__.py
index cf1bb4cb3..22aa7ffff 100644
--- a/discopop_library/discopop_optimizer/__main__.py
+++ b/discopop_library/discopop_optimizer/__main__.py
@@ -23,8 +23,8 @@ def parse_args() -> OptimizerArguments:
     # fmt: off
     parser.add_argument("-v", "--verbose", action="store_true",
         help="Enable verbose output.")
-    parser.add_argument("-p", "--pruning-level", type=int, default=0,
-        help="Program path pruning aggressiveness. 0: no pruning. 1: prune to most likely path. 2: prune to paths that cover 80%% of observed decisions per path split.")
+    parser.add_argument("-p", type=int, default=0,
+        help="Program path pruning aggressiveness. 0: no pruning. 1: prune to paths that cover 80%% of observed decisions per path split. 2: prune to most likely path.")
     parser.add_argument("-o", type=int, default=0, help="Optimization level: 0 -> no optimization. 1 -> greedy. 2 -> evolutionary. 3 -> exhaustive")
 
     parser.add_argument("-opt-2-params", type=str, default=None, nargs=2, metavar=("population_size", "generations"), help="Configure parameters of the evolutionary optimization (-o2). Default: 50 5")

From 8eabcfc085b95063470d46dce91653bde5fad1b0 Mon Sep 17 00:00:00 2001
From: Lukas Rothenberger <lukas.rothenberger@tu-darmstadt.de>
Date: Wed, 24 Jan 2024 11:53:10 +0100
Subject: [PATCH 26/26] fix(optimizer): incorrect argument name

---
 discopop_library/discopop_optimizer/__main__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/discopop_library/discopop_optimizer/__main__.py b/discopop_library/discopop_optimizer/__main__.py
index 22aa7ffff..5e3b537a2 100644
--- a/discopop_library/discopop_optimizer/__main__.py
+++ b/discopop_library/discopop_optimizer/__main__.py
@@ -68,7 +68,7 @@ def parse_args() -> OptimizerArguments:
         system_configuration_path=arguments.system_configuration,
         check_called_function_for_nested_parallelism=arguments.check_called_function_for_nested_parallelism,
         profiling=arguments.profiling,
-        pruning_level=arguments.pruning_level,
+        pruning_level=arguments.p,
         optimization_level=arguments.o,
         optimization_level_2_parameters=arguments.opt_2_params,
     )