From cc9c3c3de372a770df5207b0971b16fc69079227 Mon Sep 17 00:00:00 2001 From: dgolubovic Date: Fri, 26 Jul 2024 09:25:19 +0000 Subject: [PATCH] remove softmax decompose --- pybuda/pybuda/config.py | 7 ------ pybuda/pybuda/op/eval/pybuda/nn.py | 22 +------------------ pybuda/test/falcon/pybudify.py | 1 - .../falcon/tests/falcon_modules/falcon.py | 1 - pybuda/test/llama/pybudify_caching.py | 1 - .../high_prio/cnn/pytorch/test_swin.py | 1 - .../model_demos/models/falcon/pybudify.py | 3 --- pybuda/test/test_fusing.py | 2 -- 8 files changed, 1 insertion(+), 37 deletions(-) diff --git a/pybuda/pybuda/config.py b/pybuda/pybuda/config.py index 8ac9077f7..45aa6f32e 100644 --- a/pybuda/pybuda/config.py +++ b/pybuda/pybuda/config.py @@ -174,7 +174,6 @@ class CompilerConfig: enable_auto_transposing_placement: bool = ("PYBUDA_ENABLE_AUTO_TRANSPOSE" in os.environ) # compiler automatically detects ops to transpose on placement when the flag is set fracture_groups: List[Tuple[List[Tuple[str, int, int]], List[str], List[int]]] = field(default_factory=lambda: list()) # see insert_fracture_group conv_multi_op_fracture_factor_override: Dict[str, int] = field(default_factory=lambda: dict()) # override multi op fracture factor for conv - enable_stable_softmax: bool = True enable_single_buffer_fallback: bool = False backend_opt_level: int = 4 # backend optimization level @@ -235,9 +234,6 @@ def apply_env_config_overrides(self): if "PYBUDA_PRESTRIDE_DISABLE" in os.environ: self.enable_conv_prestride = not bool(int(os.environ["PYBUDA_PRESTRIDE_DISABLE"])) - if "PYBUDA_DISABLE_STABLE_SOFTMAX" in os.environ: - self.enable_stable_softmax = not bool(int(os.environ["PYBUDA_DISABLE_STABLE_SOFTMAX"])) - if "PYBUDA_CONVERT_PARAMS_TO_TVM" in os.environ: self.convert_framework_params_to_tvm = bool(int(os.environ["PYBUDA_CONVERT_PARAMS_TO_TVM"])) @@ -390,7 +386,6 @@ def set_configuration_options( backend_runtime_args: Optional[str] = None, enable_auto_fusing: Optional[bool] = None, enable_conv_prestride: Optional[bool] = None, - enable_stable_softmax: Optional[bool] = None, amp_level: Optional[int] = None, harvested_rows: Optional[List[List[int]]] = None, store_backend_db_to_yaml: Optional[bool] = None, @@ -541,8 +536,6 @@ def set_configuration_options( g_compiler_config.enable_auto_fusing = enable_auto_fusing if enable_conv_prestride is not None: g_compiler_config.enable_conv_prestride = enable_conv_prestride - if enable_stable_softmax is not None: - g_compiler_config.enable_stable_softmax = enable_stable_softmax if amp_level is not None: g_compiler_config.amp_level = amp_level if harvested_rows is not None: diff --git a/pybuda/pybuda/op/eval/pybuda/nn.py b/pybuda/pybuda/op/eval/pybuda/nn.py index 9d9187d51..c509d80da 100644 --- a/pybuda/pybuda/op/eval/pybuda/nn.py +++ b/pybuda/pybuda/op/eval/pybuda/nn.py @@ -459,28 +459,8 @@ def decompose_post_autograd(op_type, attr, dc, inputs): Result of the operation. """ - - if op_type == "softmax": - - assert len(inputs) == 1, "Softmax should have one operand." - assert len(attr) == 2, "Softmax should have two attributes." - x = inputs[0] - dim = attr[0] - stable = attr[1] - if stable and dc.get_compiler_cfg().enable_stable_softmax: - res_max = dc.op("reduce_max", (x, ), (dim, )) - res_x_max = dc.op("subtract", (x, res_max), ()) - res_exp = dc.op(Exp.create(), (res_x_max, ), ()) - else: - res_exp = dc.op(Exp.create(), (x, ), ()) - - - res_exp_sum = dc.op("reduce_sum", (res_exp, ), (dim, )) - res_exp_sum = dc.op("add", (res_exp_sum, dc.tensor(torch.zeros(res_exp_sum.shape.as_list()) + 1e-10)), ()) - res_exp_sum_recip = dc.op(Reciprocal.create(), (res_exp_sum, ), ()) - result = dc.op("multiply", (res_exp, res_exp_sum_recip), ()) - dc.fuse(result) + if op_type == "softmax": return if op_type == "softmax_bw": diff --git a/pybuda/test/falcon/pybudify.py b/pybuda/test/falcon/pybudify.py index 9223ec811..dcb62d90f 100644 --- a/pybuda/test/falcon/pybudify.py +++ b/pybuda/test/falcon/pybudify.py @@ -27,7 +27,6 @@ def __init__(self, pt_module, device='silicon', arch='wormhole_b0', precision='f # pybuda workarounds os.environ["GOLDEN_WORMHOLE_B0"] = "1" # golden should always simulate a B0 as that's all we use now - os.environ["PYBUDA_ENABLE_STABLE_SOFTMAX"] = "1" # improved accuracy - pybuda team surprised we need it though os.environ["PYBUDA_CONVERT_PARAMS_TO_TVM"] = "0" # faster compile times... why would this ever be 1? os.environ["TT_BACKEND_TIMEOUT"] = "0" # default is too aggressive for large models? diff --git a/pybuda/test/falcon/tests/falcon_modules/falcon.py b/pybuda/test/falcon/tests/falcon_modules/falcon.py index 78918dd52..f33c51144 100644 --- a/pybuda/test/falcon/tests/falcon_modules/falcon.py +++ b/pybuda/test/falcon/tests/falcon_modules/falcon.py @@ -1139,7 +1139,6 @@ def __init__(self, args): # pybuda workarounds os.environ["GOLDEN_WORMHOLE_B0"] = "1" os.environ["WHA0_DISABLE_RELAY_BUFS"] = "1" - os.environ["PYBUDA_ENABLE_STABLE_SOFTMAX"] = "1" os.environ["PYBUDA_CONVERT_PARAMS_TO_TVM"] = "0" os.environ["TT_BACKEND_TIMEOUT"] = "0" diff --git a/pybuda/test/llama/pybudify_caching.py b/pybuda/test/llama/pybudify_caching.py index 05554c389..b046da988 100644 --- a/pybuda/test/llama/pybudify_caching.py +++ b/pybuda/test/llama/pybudify_caching.py @@ -27,7 +27,6 @@ def __init__(self, pt_module, device='silicon', arch='wormhole_b0', precision='f #os.environ["PYBUDA_DISABLE_FORK_JOIN_BUF"] = "1" # os.environ["PYBUDA_DRAM_PICK_CAPACITY"] = "1" os.environ["WHA0_DISABLE_RELAY_BUFS"] = "1" - os.environ["PYBUDA_ENABLE_STABLE_SOFTMAX"] = "1" os.environ["PYBUDA_FUSE_STOP_ON_RECIPROCAL"] = "1" os.environ["PYBUDA_PLACER_SNAKE"] = "1" os.environ["LOGGER_LEVEL"] = log_level diff --git a/pybuda/test/model_demos/high_prio/cnn/pytorch/test_swin.py b/pybuda/test/model_demos/high_prio/cnn/pytorch/test_swin.py index 0a0ce5d1d..9410d1b6d 100644 --- a/pybuda/test/model_demos/high_prio/cnn/pytorch/test_swin.py +++ b/pybuda/test/model_demos/high_prio/cnn/pytorch/test_swin.py @@ -25,7 +25,6 @@ def test_swin_v1_tiny_4_224_hf_pytorch(test_device): compiler_cfg = pybuda.config._get_global_compiler_config() compiler_cfg.retain_tvm_python_files = True compiler_cfg.enable_tvm_constant_prop = True - os.environ["PYBUDA_ENABLE_STABLE_SOFTMAX"] = "1" os.environ["TVM_BACKTRACE"]="1" # STEP 2: Create PyBuda module from PyTorch model diff --git a/pybuda/test/model_demos/models/falcon/pybudify.py b/pybuda/test/model_demos/models/falcon/pybudify.py index 8a783b4e7..9ed3c6323 100644 --- a/pybuda/test/model_demos/models/falcon/pybudify.py +++ b/pybuda/test/model_demos/models/falcon/pybudify.py @@ -58,9 +58,6 @@ def __init__( os.environ[ "GOLDEN_WORMHOLE_B0" ] = "1" # golden should always simulate a B0 as that's all we use now - os.environ[ - "PYBUDA_ENABLE_STABLE_SOFTMAX" - ] = "1" # improved accuracy - pybuda team surprised we need it though os.environ[ "PYBUDA_CONVERT_PARAMS_TO_TVM" ] = "0" # faster compile times... why would this ever be 1? diff --git a/pybuda/test/test_fusing.py b/pybuda/test/test_fusing.py index 498f7a1a4..37703b4ab 100644 --- a/pybuda/test/test_fusing.py +++ b/pybuda/test/test_fusing.py @@ -342,8 +342,6 @@ def forward(self, act1): @pytest.mark.parametrize("dim", ["r", "c"]) def test_softmax(test_device, test_kind, dim): - pybuda.set_configuration_options(enable_stable_softmax=False) - os.environ["PYBUDA_FUSE_REDUCE"] = "1" dim_index = -1 if dim == "c" else -2