discopop-project · lukasrothenberger · Dec 7, 2023 · Nov 29, 2023 · Nov 29, 2023 · Nov 30, 2023
diff --git a/discopop_library/discopop_optimizer/CostModels/DataTransfer/DataTransferCosts.py b/discopop_library/discopop_optimizer/CostModels/DataTransfer/DataTransferCosts.py
@@ -35,7 +35,6 @@ def add_data_transfer_costs(
             # and no asynchronous transfers happen.
             # todo: This should be extended in the future.
             data_transfer_costs = get_transfer_costs(context, environment=environment)
-
             # extend the cost_model
             cost_model = cost_model.parallelizable_plus_combine(data_transfer_costs)
 

diff --git a/discopop_library/discopop_optimizer/CostModels/utilities.py b/discopop_library/discopop_optimizer/CostModels/utilities.py
@@ -31,7 +31,7 @@
 
 
 def get_performance_models_for_functions(
-    experiment: Experiment, graph: nx.DiGraph
+    experiment: Experiment, graph: nx.DiGraph, restrict_to_decisions: Optional[Set[int]] = None
 ) -> Dict[FunctionRoot, List[CostModel]]:
     performance_models: Dict[FunctionRoot, List[CostModel]] = dict()
     # get called FunctionRoots from cu ids
@@ -48,9 +48,21 @@ def get_performance_models_for_functions(
             # start the collection at the first child of the function
             for child_id in get_children(graph, node_id):
                 performance_models[node_data] = get_node_performance_models(
-                    experiment, graph, child_id, set(), all_function_nodes
+                    experiment, graph, child_id, set(), all_function_nodes, restrict_to_decisions=restrict_to_decisions
                 )
 
+            # At this point, decisions are restricted to the specified parallelization or the sequential version.
+            # Restrict them to the exact case specified in restrict_to_decisions
+            if restrict_to_decisions is not None:
+                to_be_removed: List[int] = []
+                for idx, cost_model in enumerate(performance_models[node_data]):
+                    for decision in cost_model.path_decisions:
+                        if decision not in restrict_to_decisions:
+                            to_be_removed.append(idx)
+                            break
+                for idx in sorted(to_be_removed, reverse=True):
+                    del performance_models[node_data][idx]
+
             # filter out NaN - Models
             performance_models[node_data] = [
                 model for model in performance_models[node_data] if model.parallelizable_costs != sympy.nan

diff --git a/discopop_library/discopop_optimizer/OptimizerArguments.py b/discopop_library/discopop_optimizer/OptimizerArguments.py
@@ -18,6 +18,8 @@ class OptimizerArguments(object):
     exhaustive: bool
     doall_microbench_file: str
     reduction_microbench_file: str
+    allow_nested_parallelism: bool
+    plot: bool
 
     def __post_init__(self):
         self.__validate()

diff --git a/discopop_library/discopop_optimizer/PETParser/PETParser.py b/discopop_library/discopop_optimizer/PETParser/PETParser.py
@@ -34,6 +34,7 @@
 from discopop_library.discopop_optimizer.classes.nodes.Loop import Loop
 from discopop_library.discopop_optimizer.classes.nodes.Workload import Workload
 from discopop_library.discopop_optimizer.utilities.MOGUtilities import (
+    add_dataflow_edge,
     data_at,
     get_successors,
     get_children,
@@ -429,4 +430,4 @@ def inlined_data_flow_calculation(current_node, current_last_writes):
         for key in self.out_data_flow:
             for entry in self.out_data_flow[key]:
                 if not self.graph.has_edge(key, entry):
-                    add_temporary_edge(self.graph, key, entry)
+                    add_dataflow_edge(self.graph, key, entry)
diff --git a/discopop_library/discopop_optimizer/Variables/Experiment.py b/discopop_library/discopop_optimizer/Variables/Experiment.py
@@ -67,7 +67,7 @@ class Experiment(object):
 
     optimization_graph: nx.DiGraph
     next_free_node_id: int
-    suggestion_to_node_id_dict: Dict[int, int]
+    suggestion_to_node_ids_dict: Dict[int, List[int]]
 
     def __init__(
         self, file_mapping: Dict[int, Path], system: System, detection_result: DetectionResult, profiler_dir: str
@@ -84,7 +84,7 @@ def __init__(
         self.file_mapping = file_mapping
         self.function_models = dict()
         self.selected_paths_per_function = dict()
-        self.suggestion_to_node_id_dict = dict()
+        self.suggestion_to_node_ids_dict = dict()
 
         # collect free symbols from system
         for free_symbol, value_suggestion in system.get_free_symbols():

diff --git a/discopop_library/discopop_optimizer/__main__.py b/discopop_library/discopop_optimizer/__main__.py
@@ -34,8 +34,14 @@ def parse_args() -> OptimizerArguments:
         help="Reduction microbenchmark results"
     )
     # EXPERIMENTAL FLAGS:
+    experimental_parser.add_argument("--allow-nested-parallelism", action="store_true",
+        help="Allow the creation of nested parallelism suggestions. "
+        + "WARNING: Cost estimations may not be accurrate due to potentially"
+        + "high overhead introduced by entering nested parallelism!")
     experimental_parser.add_argument("-i", "--interactive", action="store_true",
         help="Enable interactive execution.")
+    experimental_parser.add_argument("--plot", action="store_true",
+        help="Plot the internal graph.")
     # fmt: on
 
     arguments = parser.parse_args()
@@ -46,6 +52,8 @@ def parse_args() -> OptimizerArguments:
         exhaustive=arguments.exhaustive,
         doall_microbench_file=arguments.doall_microbench_file,
         reduction_microbench_file=arguments.reduction_microbench_file,
+        allow_nested_parallelism=arguments.allow_nested_parallelism,
+        plot=arguments.plot,
     )
 
 

diff --git a/discopop_library/discopop_optimizer/classes/context/ContextObject.py b/discopop_library/discopop_optimizer/classes/context/ContextObject.py
@@ -81,7 +81,7 @@ def calculate_and_perform_necessary_updates(
                 for data_write in unknown_writes:
                     # if device <-> device update is required, split it into two distinct updates
                     if device_id != 0 and reading_device_id != 0:
-                        print("Device <-> Device update required!")
+                        #                         print("Device <-> Device update required!")
 
                         # check if data is known to the host
                         if data_write.memory_region not in self.seen_writes_by_device[0]:
@@ -98,20 +98,20 @@ def calculate_and_perform_necessary_updates(
                                     is_first_data_occurrence=is_first_data_occurrence,
                                 )
                             )
-                        else:
-                            print(
-                                "SKIPPED KNOWN WRITE: ",
-                                str(
-                                    Update(
-                                        source_node_id=self.last_visited_node_id,
-                                        target_node_id=reading_node_id,
-                                        source_device_id=device_id,
-                                        target_device_id=0,  # reading_device_id,
-                                        write_data_access=data_write,
-                                        is_first_data_occurrence=is_first_data_occurrence,
-                                    )
-                                ),
-                            )
+                        #                        else:
+                        #                            print(
+                        #                                "SKIPPED KNOWN WRITE: ",
+                        #                                str(
+                        #                                    Update(
+                        #                                        source_node_id=self.last_visited_node_id,
+                        #                                        target_node_id=reading_node_id,
+                        #                                        source_device_id=device_id,
+                        #                                        target_device_id=0,  # reading_device_id,
+                        #                                        write_data_access=data_write,
+                        #                                        is_first_data_occurrence=is_first_data_occurrence,
+                        #                                    )
+                        #                                ),
+                        #                            )
 
                         # register host -> target device update
                         required_updates.add(

diff --git a/discopop_library/discopop_optimizer/classes/edges/MutuallyExclusiveEdge.py b/discopop_library/discopop_optimizer/classes/edges/MutuallyExclusiveEdge.py
@@ -0,0 +1,21 @@
+# This file is part of the DiscoPoP software (http://www.discopop.tu-darmstadt.de)
+#
+# Copyright (c) 2020, Technische Universitaet Darmstadt, Germany
+#
+# This software may be modified and distributed under the terms of
+# the 3-Clause BSD License.  See the LICENSE file in the package base
+# directory for details.
+from sympy import Symbol  # type: ignore
+
+from discopop_library.discopop_optimizer.CostModels.CostModel import CostModel
+from discopop_library.discopop_optimizer.classes.edges.GenericEdge import GenericEdge
+
+
+class MutuallyExclusiveEdge(GenericEdge):
+    """Used to represent mutual exclusivities between path selections in the form of:
+    if A is selected, B may not be selected."""
+
+    pass
+
+    def get_cost_model(self) -> CostModel:
+        raise ValueError("The cost of a MutuallyExclusiveEdge is not defined and may never be used!")
diff --git a/discopop_library/discopop_optimizer/classes/nodes/GenericNode.py b/discopop_library/discopop_optimizer/classes/nodes/GenericNode.py
@@ -86,3 +86,8 @@ def register_successor(self, other, root_node):
         """Registers a successor node for the given model.
         Does not modify the stored model in self or other."""
         raise NotImplementedError("Implementation needs to be provided by derived class: !", type(self))
+
+    def represents_sequential_version(self) -> bool:
+        """Returns True if the given node represents a sequential execution.
+        Returns False, if the node characterizes a parallel option instead."""
+        return self.suggestion is None
diff --git a/discopop_library/discopop_optimizer/classes/system/System.py b/discopop_library/discopop_optimizer/classes/system/System.py
@@ -75,8 +75,8 @@ def __init__(self, arguments: OptimizerArguments):
         # define Network
         network = self.get_network()
         network.add_connection(device_0, device_0, Integer(100000), Integer(0))
-        network.add_connection(device_0, device_1, Integer(100), Integer(1000000))
-        network.add_connection(device_1, device_0, Integer(100), Integer(1000000))
+        network.add_connection(device_0, device_1, Integer(10000), Integer(1000000))
+        network.add_connection(device_1, device_0, Integer(10000), Integer(1000000))
         network.add_connection(device_1, device_1, Integer(100000), Integer(0))
 
         network.add_connection(device_0, device_2, Integer(100), Integer(10000000))
@@ -118,6 +118,13 @@ def get_device(self, device_id: Optional[int]) -> Device:
             return self.__devices[0]
         return self.__devices[device_id]
 
+    def get_device_ids_by_type(self, device_type: type) -> List[int]:
+        result_device_ids = []
+        for device_id in self.__devices:
+            if type(self.__devices[device_id]) == device_type:
+                result_device_ids.append(device_id)
+        return result_device_ids
+
     def get_network(self) -> Network:
         return self.__network
 

diff --git a/discopop_library/discopop_optimizer/optimization/evaluate.py b/discopop_library/discopop_optimizer/optimization/evaluate.py
@@ -8,11 +8,17 @@
 
 import copy
 from typing import Dict, List, Optional, Tuple, cast
+import warnings
 
 from sympy import Expr, Integer, Symbol
 import sympy
 from discopop_library.discopop_optimizer.CostModels.CostModel import CostModel
-from discopop_library.discopop_optimizer.CostModels.utilities import get_node_performance_models
+from discopop_library.discopop_optimizer.CostModels.DataTransfer.DataTransferCosts import add_data_transfer_costs
+from discopop_library.discopop_optimizer.CostModels.utilities import (
+    get_node_performance_models,
+    get_performance_models_for_functions,
+)
+from discopop_library.discopop_optimizer.DataTransfers.DataTransfers import calculate_data_transfers
 
 from discopop_library.discopop_optimizer.Variables.Experiment import Experiment
 from discopop_library.discopop_optimizer.classes.context.ContextObject import ContextObject
@@ -23,7 +29,6 @@
 
 def evaluate_configuration(
     experiment: Experiment,
-    function_performance_models: Dict[FunctionRoot, List[Tuple[CostModel, ContextObject]]],
     decisions: List[int],
     arguments: OptimizerArguments,
 ) -> Tuple[Tuple[int, ...], Expr]:
@@ -42,24 +47,24 @@ def evaluate_configuration(
     if main_function is None:
         raise ValueError("No main function found!")
 
-    # identify function models which correspond to the given decisions
+    function_performance_models_without_context = get_performance_models_for_functions(
+        experiment, experiment.optimization_graph, restrict_to_decisions=set(decisions)
+    )
+
+    function_performance_models = calculate_data_transfers(
+        experiment.optimization_graph, function_performance_models_without_context
+    )
+    function_performance_models = add_data_transfer_costs(
+        experiment.optimization_graph,
+        function_performance_models,
+        experiment,
+    )
+
     selected_function_models: Dict[FunctionRoot, Tuple[CostModel, ContextObject]] = dict()
     for function in function_performance_models:
-        # get the correct model according to the selected decisions
-        selected_function_model: Optional[Tuple[CostModel, ContextObject]] = None
-        for tpl in function_performance_models[function]:
-            cost, ctx = tpl
-            # check if all decisions are specified
-            if set(cost.path_decisions).issubset(set(decisions)):
-                selected_function_model = tpl
-                selected_function_models[function] = selected_function_model
-        if selected_function_model is None:
-            raise ValueError(
-                "No valid configuration found for function: "
-                + function.name
-                + " and specified decisions: "
-                + str(decisions)
-            )
+        if len(function_performance_models[function]) != 1:
+            warnings.warn("Selection for fucntion:" + function.name + " not unambiguous!")
+        selected_function_models[function] = function_performance_models[function][0]
 
     # apply selected substitutions
     # collect substitutions