From ea01d03d9fe1f96fc4e2dc6388538a43eb2592a8 Mon Sep 17 00:00:00 2001
From: Philip Mueller <philip.mueller@cscs.ch>
Date: Tue, 28 Jan 2025 07:49:56 +0100
Subject: [PATCH] Included the changes suggested by Philip Schaad.

This includes
- The renaming to `SingleUseData`.
- The correct scanning of InterstateEdges, before no nested edges were considered.
- Inclusion of the conditions, such as the conditions used by `ConditionalBlock`, in the scan, they are now handled in the same way as interstate edges.

There is also a new test for the last case.
---
 .../passes/analysis/analysis.py               |  64 +++---
 ...a_test.py => find_single_use_data_test.py} | 186 +++++++++++++-----
 2 files changed, 178 insertions(+), 72 deletions(-)
 rename tests/passes/{find_exclusive_data_test.py => find_single_use_data_test.py} (57%)

diff --git a/dace/transformation/passes/analysis/analysis.py b/dace/transformation/passes/analysis/analysis.py
index cd114c08a8..7798340884 100644
--- a/dace/transformation/passes/analysis/analysis.py
+++ b/dace/transformation/passes/analysis/analysis.py
@@ -335,15 +335,19 @@ def apply_pass(self, top_sdfg: SDFG, _) -> Dict[int, Dict[str, Set[SDFGState]]]:
 
 @properties.make_properties
 @transformation.explicit_cf_compatible
-class FindExclusiveData(ppl.Pass):
+class FindSingleUseData(ppl.Pass):
     """
     For each SDFG find all data descriptors that are referenced in exactly one location.
 
-    This means that for every data descriptor there exists exactly one AccessNode that
-    refers to that data. In addition to this the following rules applies as well:
-    - If the data is read by at least one interstate edge it will not be classified as exclusive.
-    - If there is no reference to a data descriptor, i.e. it exists inside `SDFG.arrays`
-        but there is no AccessNode, then it is _not_ classified as exclusive.
+    In addition to the requirement that there exists exactly one AccessNode that
+    refers to a data descriptor the following conditions have to be meet as well:
+    - The data is not read on an interstate edge.
+    - The data is not accessed in the branch condition, loop condition, etc. of
+        control flow regions.
+    - There must be at least one AccessNode that refers to the data. I.e. if it exists
+        inside `SDFG.arrays` but there is no AccessNode, then it is _not_ included.
+
+    It is also important to note that the degree of the AccessNodes are ignored.
     """
 
     CATEGORY: str = 'Analysis'
@@ -353,49 +357,53 @@ def modifies(self) -> ppl.Modifies:
 
     def should_reapply(self, modified: ppl.Modifies) -> bool:
         # If anything was modified, reapply
-        return modified & ppl.Modifies.AccessNodes & ppl.Modifies.States
+        return modified & ppl.Modifies.AccessNodes & ppl.Modifies.CFG
 
     def apply_pass(self, sdfg: SDFG, _) -> Dict[SDFG, Set[str]]:
         """
         :return: A dictionary mapping SDFGs to a `set` of strings containing the name
-            of the data descriptors that are exclusively used.
+            of the data descriptors that are only used once.
         """
-        # pschaad: Should we index on cfg or the SDFG itself.
+        # TODO(pschaad): Should we index on cfg or the SDFG itself.
         exclusive_data: Dict[SDFG, Set[str]] = {}
         for nsdfg in sdfg.all_sdfgs_recursive():
-            exclusive_data[nsdfg] = self._find_exclusive_data_in_sdfg(nsdfg)
+            exclusive_data[nsdfg] = self._find_single_use_data_in_sdfg(nsdfg)
         return exclusive_data
 
-    def _find_exclusive_data_in_sdfg(self, sdfg: SDFG) -> Set[str]:
-        """Scans an SDFG and computes the exclusive data for that SDFG.
+    def _find_single_use_data_in_sdfg(self, sdfg: SDFG) -> Set[str]:
+        """Scans an SDFG and computes the data that is only used once in the SDFG.
 
-        This function only scans `sdfg` and does not go into nested ones.
+        The rules used to classify data descriptors are outlined above. The function
+        will not scan nested SDFGs.
 
-        :return: The set of data descriptors that have exclusive access.
+        :return: The set of data descriptors that are used once in the SDFG.
         """
-        # Data descriptor that are classified, up to now, as exclusive.
-        #  We add and data that we do not know to it the first time we seen it
-        #  and might remove it if we found another reference.
-        exclusive_data: Set[str] = set()
+        # If we encounter a data descriptor for the first time we immediately
+        #  classify it as single use. We will undo this decision as soon as
+        #  learn that it is used somewhere else.
+        single_use_data: Set[str] = set()
         previously_seen: Set[str] = set()
 
         for state in sdfg.states():
             for dnode in state.data_nodes():
                 data_name: str = dnode.data
-                if data_name in exclusive_data:
-                    exclusive_data.discard(data_name)  # Classified too early; Undo
+                if data_name in single_use_data:
+                    single_use_data.discard(data_name)  # Classified too early -> Undo
                 elif data_name not in previously_seen:
-                    exclusive_data.add(data_name)  # Never seen; Assume it is exclusive.
+                    single_use_data.add(data_name)  # Never seen -> Assume single use
                 previously_seen.add(data_name)
 
-        # Compute the set of all data that is accessed, i.e. read, by the edges.
-        interstate_read_symbols: Set[str] = set()
-        for edge in sdfg.edges():
-            interstate_read_symbols.update(edge.data.free_symbols)
+        # By definition, data that is referenced by interstate edges is not single
+        #  use data, also remove it.
+        for edge in sdfg.all_interstate_edges():
+            single_use_data.difference_update(edge.data.free_symbols)
+
+        # By definition, data that is referenced by the conditions (branching condition,
+        #  loop condition, ...) is not single use data, also remove that.
+        for cfr in sdfg.all_control_flow_regions():
+            single_use_data.difference_update(cfr.used_symbols(all_symbols=True, with_contents=False))
 
-        # Enforces the first rule, "if data is accessed by an interstate edge it will
-        #  not be classified as exclusive".
-        return exclusive_data.difference(interstate_read_symbols)
+        return single_use_data
 
 
 @properties.make_properties
diff --git a/tests/passes/find_exclusive_data_test.py b/tests/passes/find_single_use_data_test.py
similarity index 57%
rename from tests/passes/find_exclusive_data_test.py
rename to tests/passes/find_single_use_data_test.py
index 385f90ca91..a3f60dd7a7 100644
--- a/tests/passes/find_exclusive_data_test.py
+++ b/tests/passes/find_single_use_data_test.py
@@ -1,15 +1,15 @@
 # Copyright 2019-2025 ETH Zurich and the DaCe authors. All rights reserved.
 from typing import Dict, Set, Tuple
 import dace
-from dace.transformation.passes.analysis import FindExclusiveData
+from dace.transformation.passes.analysis import FindSingleUseData
 
 def perform_scan(sdfg: dace.SDFG) -> Dict[dace.SDFG, Set[str]]:
-    scanner = FindExclusiveData()
+    scanner = FindSingleUseData()
     return scanner.apply_pass(sdfg, None)
 
 
-def _make_all_exclusive_data_but_one_unused_sdfg() -> dace.SDFG:
-    sdfg = dace.SDFG('all_exclusive_data_but_one_unused_sdfg')
+def _make_all_single_use_data_but_one_unused_sdfg() -> dace.SDFG:
+    sdfg = dace.SDFG('all_single_use_data_but_one_unused_sdfg')
     state1 = sdfg.add_state(is_start_block=True)
     state2 = sdfg.add_state_after(state1)
 
@@ -35,18 +35,19 @@ def _make_all_exclusive_data_but_one_unused_sdfg() -> dace.SDFG:
     return sdfg
 
 
-def test_all_exclusive_data_but_one_unused():
-    sdfg = _make_all_exclusive_data_but_one_unused_sdfg()
+def test_all_single_use_data_but_one_unused():
+    sdfg = _make_all_single_use_data_but_one_unused_sdfg()
     assert len(sdfg.arrays) == 5
 
-    # Because it is not used `e` is not considered to be exclusively used.
-    #  This is a matter of definition.
-    expected_exclusive_set = {aname for aname in sdfg.arrays.keys() if aname != 'e'}
+    # Because `e` is not used inside the SDFG, it is not included in the returned set,
+    #  all other descriptors are included because they appear once.
+    expected_single_use_set = {aname for aname in sdfg.arrays.keys() if aname != 'e'}
 
-    exclusive_set = perform_scan(sdfg)
-    assert len(exclusive_set[sdfg]) == 4
+    single_use_set = perform_scan(sdfg)
 
-    assert exclusive_set[sdfg] == expected_exclusive_set
+    assert len(single_use_set[sdfg]) == 4
+    assert len(single_use_set) == 1
+    assert single_use_set[sdfg] == expected_single_use_set
 
 
 def _make_multiple_access_same_state_sdfg() -> dace.SDFG:
@@ -79,12 +80,13 @@ def test_multiple_access_same_state():
     sdfg = _make_multiple_access_same_state_sdfg()
     assert len(sdfg.arrays) == 3
 
-    # `a` is not exclusive because there exists multiple access nodes in a single
-    #  state for `a`.
-    expected_exclusive_set = {aname for aname in sdfg.arrays.keys() if aname != 'a'}
-    exclusive_set = perform_scan(sdfg)
-    assert len(exclusive_set[sdfg]) == 2
-    assert expected_exclusive_set == exclusive_set[sdfg]
+    # `a` is not single use data because there are multiple access nodes for it
+    #  in a single state.
+    expected_single_use_set = {aname for aname in sdfg.arrays.keys() if aname != 'a'}
+    single_use_set = perform_scan(sdfg)
+    assert len(single_use_set) == 1
+    assert len(single_use_set[sdfg]) == 2
+    assert expected_single_use_set == single_use_set[sdfg]
 
 
 def _make_multiple_single_access_node_same_state_sdfg() -> dace.SDFG:
@@ -119,13 +121,14 @@ def test_multiple_single_access_node_same_state_sdfg() -> dace.SDFG:
     sdfg = _make_multiple_single_access_node_same_state_sdfg()
     assert len(sdfg.arrays) == 3
 
-    # Unlike `test_multiple_access_same_state()` here `a` is included in the exclusive
+    # Unlike `test_multiple_access_same_state()` here `a` is included in the single use
     #  set, because, there is only a single AccessNode, that is used multiple times,
     #  i.e. has an output degree larger than one.
-    expected_exclusive_set = sdfg.arrays.keys()
-    exclusive_set = perform_scan(sdfg)
-    assert len(exclusive_set[sdfg]) == 3
-    assert expected_exclusive_set == exclusive_set[sdfg]
+    expected_single_use_set = sdfg.arrays.keys()
+    single_use_set = perform_scan(sdfg)
+    assert len(single_use_set) == 1
+    assert len(single_use_set[sdfg]) == 3
+    assert expected_single_use_set == single_use_set[sdfg]
 
 
 def _make_multiple_access_different_states_sdfg() -> dace.SDFG:
@@ -161,11 +164,12 @@ def test_multiple_access_different_states():
     sdfg = _make_multiple_access_different_states_sdfg()
     assert len(sdfg.arrays) == 3
 
-    # `a` is not included in the exclusive set, because it is used in two different states.
-    exclusive_set = perform_scan(sdfg)
-    expected_exclusive_set = {aname for aname in sdfg.arrays.keys() if aname != 'a'}
-    assert len(exclusive_set[sdfg]) == 2
-    assert expected_exclusive_set == exclusive_set[sdfg]
+    # `a` is not included in the single use set, because it is used in two different states.
+    single_use_set = perform_scan(sdfg)
+    expected_single_use_set = {aname for aname in sdfg.arrays.keys() if aname != 'a'}
+    assert len(single_use_set) == 1
+    assert len(single_use_set[sdfg]) == 2
+    assert expected_single_use_set == single_use_set[sdfg]
 
 
 def _make_access_only_on_interstate_edge_sdfg() -> dace.SDFG:
@@ -201,12 +205,13 @@ def test_access_only_on_interstate_edge():
     sdfg = _make_access_only_on_interstate_edge_sdfg()
     assert len(sdfg.arrays) == 5
 
-    # `e` is only accessed on the interstate edge. So it is technically an exclusive
-    #  data. But by definition we handle this case as non exclusive.
-    expected_exclusive_set = {aname for aname in sdfg.arrays.keys() if aname != 'e'}
-    exclusive_set = perform_scan(sdfg)
-    assert len(exclusive_set[sdfg]) == 4
-    assert exclusive_set[sdfg] == expected_exclusive_set
+    # `e` is only accessed on the interstate edge. So it is technically an single use
+    #  data. But by definition we handle this case as non single_use.
+    expected_single_use_set = {aname for aname in sdfg.arrays.keys() if aname != 'e'}
+    single_use_set = perform_scan(sdfg)
+    assert len(single_use_set) == 1
+    assert len(single_use_set[sdfg]) == 4
+    assert single_use_set[sdfg] == expected_single_use_set
 
 
 def _make_additional_access_on_interstate_edge_sdfg() -> dace.SDFG:
@@ -248,11 +253,13 @@ def test_additional_access_on_interstate_edge():
     sdfg = _make_additional_access_on_interstate_edge_sdfg()
     assert len(sdfg.arrays) == 6
 
-    # As in `test_access_only_on_interstate_edge` `e` is not part of the exclusive set.
-    expected_exclusive_set = {aname for aname in sdfg.arrays.keys() if aname != 'e'}
-    exclusive_set = perform_scan(sdfg)
-    assert len(exclusive_set[sdfg]) == 5
-    assert exclusive_set[sdfg] == expected_exclusive_set
+    # There is one AccessNode for `a`, but as in `test_access_only_on_interstate_edge`
+    #  `e` is also used on the inter state edge, so it is not included.
+    expected_single_use_set = {aname for aname in sdfg.arrays.keys() if aname != 'e'}
+    single_use_set = perform_scan(sdfg)
+    assert len(single_use_set) == 1
+    assert len(single_use_set[sdfg]) == 5
+    assert single_use_set[sdfg] == expected_single_use_set
 
 
 def _make_access_nested_nsdfg() -> dace.SDFG:
@@ -319,18 +326,109 @@ def test_access_nested_sdfg():
     sdfg, nested_sdfg = _make_access_nested_sdfg()
     assert all(len(nsdfg.arrays) == 2 for nsdfg in [sdfg, nested_sdfg])
 
-    # In both SDFGs all data descriptors are exclusive.
-    expected_exclusive_set = {'a', 'b'}
-    exclusive_sets = perform_scan(sdfg)
+    # In the top and the nested SDFG `a` and `b` are both used once, so for
+    #  both they are included in the single use set.
+    #  Essentially tests if there is separation between the two.
+    expected_single_use_set = {'a', 'b'}
+    single_use_sets = perform_scan(sdfg)
 
-    assert all(exclusive_sets[nsdfg] == expected_exclusive_set for nsdfg in [sdfg, nested_sdfg])
+    assert len(single_use_sets) == 2
+    assert all(single_use_sets[nsdfg] == expected_single_use_set for nsdfg in [sdfg, nested_sdfg])
+
+
+def _make_conditional_block_sdfg() -> dace.SDFG:
+    sdfg = dace.SDFG("conditional_block_sdfg")
+
+    for name in ["a", "b", "c", "d", "cond", "cond2"]:
+        sdfg.add_scalar(
+                name,
+                dtype=dace.bool_ if name.startswith("cond") else dace.float64,
+                transient=False
+        )
+    sdfg.arrays["b"].transient = True
+    sdfg.arrays["cond2"].transient = True
+
+    entry_state = sdfg.add_state("entry", is_start_block=True)
+    entry_state.add_nedge(
+            entry_state.add_access("a"),
+            entry_state.add_access("b"),
+            sdfg.make_array_memlet("a")
+    )
+    cond_tasklet: dace.nodes.Tasklet = entry_state.add_tasklet(
+            "cond_processing",
+            inputs={"__in"},
+            code="__out = not __in",
+            outputs={"__out"},
+    )
+    entry_state.add_edge(
+            entry_state.add_access("cond"),
+            None,
+            cond_tasklet,
+            "__in",
+            dace.Memlet("cond[0]")
+    )
+    entry_state.add_edge(
+            cond_tasklet,
+            "__out",
+            entry_state.add_access("cond2"),
+            None,
+            dace.Memlet("cond2[0]")
+    )
+
+    if_region = dace.sdfg.state.ConditionalBlock("if")
+    sdfg.add_node(if_region)
+    sdfg.add_edge(
+            entry_state,
+            if_region,
+            dace.InterstateEdge()
+    )
+
+    then_body = dace.sdfg.state.ControlFlowRegion("then_body", sdfg=sdfg)
+    tstate = then_body.add_state("true_branch", is_start_block=True)
+    tstate.add_nedge(
+            tstate.add_access("b"),
+            tstate.add_access("c"),
+            sdfg.make_array_memlet("b")
+    )
+    if_region.add_branch(
+            dace.sdfg.state.CodeBlock("cond2"),
+            then_body
+    )
+
+    else_body = dace.sdfg.state.ControlFlowRegion("else_body", sdfg=sdfg)
+    fstate = else_body.add_state("false_branch", is_start_block=True)
+    fstate.add_nedge(
+            fstate.add_access("b"),
+            fstate.add_access("d"),
+            sdfg.make_array_memlet("d")
+    )
+    if_region.add_branch(
+            dace.sdfg.state.CodeBlock("not (cond2)"),
+            else_body
+    )
+    sdfg.validate()
+    return sdfg
+
+
+def test_conditional_block():
+    sdfg = _make_conditional_block_sdfg()
+
+    # `b` is not in no single use data, because there are three AccessNodes for it.
+    #  `cond2` is no single use data, although there is exactly one AccessNode for
+    #  it, it is used in the condition expression.
+    expected_single_use_set = {a for a in sdfg.arrays.keys() if a not in ["b", "cond2"]}
+    single_use_set = perform_scan(sdfg)
+
+    assert len(single_use_set) == 1
+    assert single_use_set[sdfg] == expected_single_use_set
 
 
 if __name__ == '__main__':
-    test_all_exclusive_data_but_one_unused()
+    test_all_single_use_data_but_one_unused()
     test_multiple_access_same_state()
     test_multiple_single_access_node_same_state_sdfg()
     test_multiple_access_different_states()
     test_access_only_on_interstate_edge()
     test_additional_access_on_interstate_edge()
     test_access_nested_sdfg()
+    test_conditional_block()