From cc9c3c3de372a770df5207b0971b16fc69079227 Mon Sep 17 00:00:00 2001
From: dgolubovic <dgolubovic@tenstorrent.com>
Date: Fri, 26 Jul 2024 09:25:19 +0000
Subject: [PATCH] remove softmax decompose

---
 pybuda/pybuda/config.py                       |  7 ------
 pybuda/pybuda/op/eval/pybuda/nn.py            | 22 +------------------
 pybuda/test/falcon/pybudify.py                |  1 -
 .../falcon/tests/falcon_modules/falcon.py     |  1 -
 pybuda/test/llama/pybudify_caching.py         |  1 -
 .../high_prio/cnn/pytorch/test_swin.py        |  1 -
 .../model_demos/models/falcon/pybudify.py     |  3 ---
 pybuda/test/test_fusing.py                    |  2 --
 8 files changed, 1 insertion(+), 37 deletions(-)

diff --git a/pybuda/pybuda/config.py b/pybuda/pybuda/config.py
index 8ac9077f7..45aa6f32e 100644
--- a/pybuda/pybuda/config.py
+++ b/pybuda/pybuda/config.py
@@ -174,7 +174,6 @@ class CompilerConfig:
     enable_auto_transposing_placement: bool = ("PYBUDA_ENABLE_AUTO_TRANSPOSE" in os.environ)  # compiler automatically detects ops to transpose on placement when the flag is set
     fracture_groups: List[Tuple[List[Tuple[str, int, int]], List[str], List[int]]] = field(default_factory=lambda: list()) # see insert_fracture_group
     conv_multi_op_fracture_factor_override: Dict[str, int] = field(default_factory=lambda: dict())  # override multi op fracture factor for conv
-    enable_stable_softmax: bool = True
     enable_single_buffer_fallback: bool = False
 
     backend_opt_level: int = 4 # backend optimization level
@@ -235,9 +234,6 @@ def apply_env_config_overrides(self):
         if "PYBUDA_PRESTRIDE_DISABLE" in os.environ:
             self.enable_conv_prestride = not bool(int(os.environ["PYBUDA_PRESTRIDE_DISABLE"]))
 
-        if "PYBUDA_DISABLE_STABLE_SOFTMAX" in os.environ:
-            self.enable_stable_softmax = not bool(int(os.environ["PYBUDA_DISABLE_STABLE_SOFTMAX"]))
-
         if "PYBUDA_CONVERT_PARAMS_TO_TVM" in os.environ:
             self.convert_framework_params_to_tvm = bool(int(os.environ["PYBUDA_CONVERT_PARAMS_TO_TVM"]))
 
@@ -390,7 +386,6 @@ def set_configuration_options(
         backend_runtime_args: Optional[str] = None,
         enable_auto_fusing: Optional[bool] = None,
         enable_conv_prestride: Optional[bool] = None,
-        enable_stable_softmax: Optional[bool] = None,
         amp_level: Optional[int] = None,
         harvested_rows: Optional[List[List[int]]] = None,
         store_backend_db_to_yaml: Optional[bool] = None,
@@ -541,8 +536,6 @@ def set_configuration_options(
         g_compiler_config.enable_auto_fusing = enable_auto_fusing
     if enable_conv_prestride is not None:
         g_compiler_config.enable_conv_prestride = enable_conv_prestride
-    if enable_stable_softmax is not None:
-        g_compiler_config.enable_stable_softmax = enable_stable_softmax
     if amp_level is not None:
         g_compiler_config.amp_level = amp_level
     if harvested_rows is not None:
diff --git a/pybuda/pybuda/op/eval/pybuda/nn.py b/pybuda/pybuda/op/eval/pybuda/nn.py
index 9d9187d51..c509d80da 100644
--- a/pybuda/pybuda/op/eval/pybuda/nn.py
+++ b/pybuda/pybuda/op/eval/pybuda/nn.py
@@ -459,28 +459,8 @@ def decompose_post_autograd(op_type, attr, dc, inputs):
         Result of the operation.
     
     """
-    
-    if op_type == "softmax":
-        
-        assert len(inputs) == 1, "Softmax should have one operand."
-        assert len(attr) == 2, "Softmax should have two attributes."
-        x = inputs[0]
-        dim = attr[0]
-        stable = attr[1]
 
-        if stable and dc.get_compiler_cfg().enable_stable_softmax:
-            res_max = dc.op("reduce_max", (x, ), (dim, ))
-            res_x_max = dc.op("subtract", (x, res_max), ())
-            res_exp = dc.op(Exp.create(), (res_x_max, ), ())
-        else:
-            res_exp = dc.op(Exp.create(), (x, ), ())
-            
-
-        res_exp_sum = dc.op("reduce_sum", (res_exp, ), (dim, ))
-        res_exp_sum = dc.op("add", (res_exp_sum, dc.tensor(torch.zeros(res_exp_sum.shape.as_list()) + 1e-10)), ())
-        res_exp_sum_recip = dc.op(Reciprocal.create(), (res_exp_sum, ), ())
-        result = dc.op("multiply", (res_exp, res_exp_sum_recip), ())
-        dc.fuse(result)
+    if op_type == "softmax":
         return
 
     if op_type == "softmax_bw":
diff --git a/pybuda/test/falcon/pybudify.py b/pybuda/test/falcon/pybudify.py
index 9223ec811..dcb62d90f 100644
--- a/pybuda/test/falcon/pybudify.py
+++ b/pybuda/test/falcon/pybudify.py
@@ -27,7 +27,6 @@ def __init__(self, pt_module, device='silicon', arch='wormhole_b0', precision='f
 
             # pybuda workarounds
             os.environ["GOLDEN_WORMHOLE_B0"] = "1"            # golden should always simulate a B0 as that's all we use now
-            os.environ["PYBUDA_ENABLE_STABLE_SOFTMAX"] = "1"  # improved accuracy - pybuda team surprised we need it though
             os.environ["PYBUDA_CONVERT_PARAMS_TO_TVM"] = "0"  # faster compile times... why would this ever be 1?
             os.environ["TT_BACKEND_TIMEOUT"] = "0"            # default is too aggressive for large models?
 
diff --git a/pybuda/test/falcon/tests/falcon_modules/falcon.py b/pybuda/test/falcon/tests/falcon_modules/falcon.py
index 78918dd52..f33c51144 100644
--- a/pybuda/test/falcon/tests/falcon_modules/falcon.py
+++ b/pybuda/test/falcon/tests/falcon_modules/falcon.py
@@ -1139,7 +1139,6 @@ def __init__(self, args):
         # pybuda workarounds
         os.environ["GOLDEN_WORMHOLE_B0"] = "1"
         os.environ["WHA0_DISABLE_RELAY_BUFS"] = "1"
-        os.environ["PYBUDA_ENABLE_STABLE_SOFTMAX"] = "1"
         os.environ["PYBUDA_CONVERT_PARAMS_TO_TVM"] = "0"
         os.environ["TT_BACKEND_TIMEOUT"] = "0"
 
diff --git a/pybuda/test/llama/pybudify_caching.py b/pybuda/test/llama/pybudify_caching.py
index 05554c389..b046da988 100644
--- a/pybuda/test/llama/pybudify_caching.py
+++ b/pybuda/test/llama/pybudify_caching.py
@@ -27,7 +27,6 @@ def __init__(self, pt_module, device='silicon', arch='wormhole_b0', precision='f
             #os.environ["PYBUDA_DISABLE_FORK_JOIN_BUF"] = "1"
             # os.environ["PYBUDA_DRAM_PICK_CAPACITY"] = "1"
             os.environ["WHA0_DISABLE_RELAY_BUFS"] = "1"
-            os.environ["PYBUDA_ENABLE_STABLE_SOFTMAX"] = "1"
             os.environ["PYBUDA_FUSE_STOP_ON_RECIPROCAL"] = "1"
             os.environ["PYBUDA_PLACER_SNAKE"] = "1"
             os.environ["LOGGER_LEVEL"] = log_level
diff --git a/pybuda/test/model_demos/high_prio/cnn/pytorch/test_swin.py b/pybuda/test/model_demos/high_prio/cnn/pytorch/test_swin.py
index 0a0ce5d1d..9410d1b6d 100644
--- a/pybuda/test/model_demos/high_prio/cnn/pytorch/test_swin.py
+++ b/pybuda/test/model_demos/high_prio/cnn/pytorch/test_swin.py
@@ -25,7 +25,6 @@ def test_swin_v1_tiny_4_224_hf_pytorch(test_device):
     compiler_cfg = pybuda.config._get_global_compiler_config()    
     compiler_cfg.retain_tvm_python_files = True
     compiler_cfg.enable_tvm_constant_prop = True
-    os.environ["PYBUDA_ENABLE_STABLE_SOFTMAX"] = "1"
     os.environ["TVM_BACKTRACE"]="1" 
     
     # STEP 2: Create PyBuda module from PyTorch model
diff --git a/pybuda/test/model_demos/models/falcon/pybudify.py b/pybuda/test/model_demos/models/falcon/pybudify.py
index 8a783b4e7..9ed3c6323 100644
--- a/pybuda/test/model_demos/models/falcon/pybudify.py
+++ b/pybuda/test/model_demos/models/falcon/pybudify.py
@@ -58,9 +58,6 @@ def __init__(
             os.environ[
                 "GOLDEN_WORMHOLE_B0"
             ] = "1"  # golden should always simulate a B0 as that's all we use now
-            os.environ[
-                "PYBUDA_ENABLE_STABLE_SOFTMAX"
-            ] = "1"  # improved accuracy - pybuda team surprised we need it though
             os.environ[
                 "PYBUDA_CONVERT_PARAMS_TO_TVM"
             ] = "0"  # faster compile times... why would this ever be 1?
diff --git a/pybuda/test/test_fusing.py b/pybuda/test/test_fusing.py
index 498f7a1a4..37703b4ab 100644
--- a/pybuda/test/test_fusing.py
+++ b/pybuda/test/test_fusing.py
@@ -342,8 +342,6 @@ def forward(self, act1):
 @pytest.mark.parametrize("dim", ["r", "c"])
 def test_softmax(test_device, test_kind, dim):
 
-    pybuda.set_configuration_options(enable_stable_softmax=False)
-
     os.environ["PYBUDA_FUSE_REDUCE"] = "1"
 
     dim_index = -1 if dim == "c" else -2