From f03bf935365f9a81203c53087fc76d2b3bb74c37 Mon Sep 17 00:00:00 2001
From: Yu Gao <145494740+yugaoTT@users.noreply.github.com>
Date: Tue, 3 Sep 2024 10:20:32 -0400
Subject: [PATCH] Use new resnet test infra for GS (#12072)

* #0: use new resnet test infra for GS

* #0: adjust 2cq e2e time

* #0: change to fn wrapper for gs and wh

* #0: update README

* #0: adjsut wh compile time
---
 README.md                                     |   2 +-
 models/demos/ttnn_resnet/README.md            |  23 +-
 .../tests/test_perf_device_ttnn_resnet.py     |  36 ++-
 .../tests/test_perf_ttnn_resnet.py            | 129 ++++++++++-
 .../tests/test_ttnn_resnet50_performant.py    | 215 +++++++++++++++---
 tests/scripts/run_performance.sh              |  16 +-
 6 files changed, 365 insertions(+), 56 deletions(-)
diff --git a/README.md b/README.md
index 7e993d1cc8f..f734e045213 100644
--- a/README.md
+++ b/README.md
@@ -24,7 +24,7 @@
 
 | Model                                                      | Batch               | End-to-end throughput [1]    | Device throughput [2]       | Target throughput                             |
 |----------------------------------------------------------  |---------------------|------------------------------|-----------------------------|-------------------------------------|
-| [ResNet-50](./models/demos/resnet) (fps)                   | 20                  | 5,600                        | 7,560                       | 10,000                              |
+| [ResNet-50](./models/demos/ttnn_resnet) (fps)                   | 20                  | 5,100                        | 6,600                       | 10,000                              |
 | [BERT-Large](./models/demos/bert) (sen/s)                  | 12                  | 370                          | 406                         | 410                                 |
 | [Falcon7B-decode](./models/demos/ttnn_falcon7b) (t/s)      | 32                  | 135                          | 135                         | 140                                 |
 | [ViT](./models/demos/grayskull/vit) (fps)                  | 8                   | 860                          | 1570                        | 2000                                |
diff --git a/models/demos/ttnn_resnet/README.md b/models/demos/ttnn_resnet/README.md
index e8be0f3948d..4beed8bd285 100644
--- a/models/demos/ttnn_resnet/README.md
+++ b/models/demos/ttnn_resnet/README.md
@@ -28,14 +28,25 @@ To obtain a huggingface token visit: https://huggingface.co/docs/hub/security-to
 
 ### Single Device
 
-#### Device Performance
-+ To obtain device performance, run `WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml ./tt_metal/tools/profiler/profile_this.py -c "pytest models/demos/ttnn_resnet/tests/test_ttnn_resnet50_performant.py::test_run_resnet50_inference[16z-act_dtype0-weight_dtype0-math_fidelity0-device_params0]"`
-+ This will generate a CSV report under `<this repo dir>/generated/profiler/reports/ops/<report name>`. The report file name is logged in the run output.
+#### Grayskull Device Performance
++ To obtain device performance, run `pytest models/demos/ttnn_resnet/tests/test_perf_device_ttnn_resnet.py::test_perf_device_bare_metal_gs`
++ This will run the model for 4 times and generate CSV reports under `<this repo dir>/generated/profiler/reports/ops/<report name>`. The report file name is logged in the run output.
++ It will also show a sumary of the device throughput in the run output.
 
-#### End-to-End Performance
-+ For end-to-end performance, run `WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/ttnn_resnet/tests/test_perf_ttnn_resnet.py::test_perf_trace_2cqs_bare_metal[16-0.004-25-device_params0]`.
+#### Grayskull End-to-End Performance
++ For end-to-end performance, run `pytest models/demos/ttnn_resnet/tests/test_perf_ttnn_resnet.py::test_perf_trace_2cqs_bare_metal_gs[20-0.004-5-device_params0]`.
++ This will generate a CSV with the timings and throughputs.
++ **Expected end-to-end perf**: For batch = 20, it is about `5,100 fps` currently. This may vary machine to machine.
+
+#### Wormhole_B0 Device Performance
++ To obtain device performance, run `WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/ttnn_resnet/tests/test_perf_device_ttnn_resnet.py::test_perf_device_bare_metal_wh`
++ This will run the model for 4 times and generate CSV reports under `<this repo dir>/generated/profiler/reports/ops/<report name>`. The report file name is logged in the run output.
++ It will also show a sumary of the device throughput in the run output.
+
+#### Wormhole_B0 End-to-End Performance
++ For end-to-end performance, run `WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/ttnn_resnet/tests/test_perf_ttnn_resnet.py::test_perf_trace_2cqs_bare_metal_wh[16-0.004-25-device_params0]`.
 + This will generate a CSV with the timings and throughputs.
-+ **Expected end-to-end perf**: For batch = 16, it is about `4100 fps` currently. This may vary machine to machine.
++ **Expected end-to-end perf**: For batch = 16, it is about `4,100 fps` currently. This may vary machine to machine.
 
 ### T3000
 #### End-to-End Performance
diff --git a/models/demos/ttnn_resnet/tests/test_perf_device_ttnn_resnet.py b/models/demos/ttnn_resnet/tests/test_perf_device_ttnn_resnet.py
index 6ffa491f513..50ee10a95d3 100644
--- a/models/demos/ttnn_resnet/tests/test_perf_device_ttnn_resnet.py
+++ b/models/demos/ttnn_resnet/tests/test_perf_device_ttnn_resnet.py
@@ -4,7 +4,35 @@
 
 import pytest
 from models.perf.device_perf_utils import run_device_perf, check_device_perf, prep_device_perf_report
-from models.utility_functions import run_for_wormhole_b0
+from models.utility_functions import run_for_wormhole_b0, run_for_grayskull
+
+
+@run_for_grayskull()
+@pytest.mark.models_device_performance_bare_metal
+@pytest.mark.parametrize(
+    "batch_size, test, expected_perf",
+    [
+        [20, "20-act_dtype0-weight_dtype0-math_fidelity0-device_params0", 6600],
+    ],
+)
+def test_perf_device_bare_metal_gs(batch_size, test, expected_perf):
+    subdir = "resnet50"
+    num_iterations = 4
+    margin = 0.03
+    command = f"pytest models/demos/ttnn_resnet/tests/test_ttnn_resnet50_performant.py::test_run_resnet50_inference_gs[{test}]"
+    cols = ["DEVICE FW", "DEVICE KERNEL", "DEVICE BRISC KERNEL"]
+    inference_time_key = "AVG DEVICE KERNEL SAMPLES/S"
+    expected_perf_cols = {inference_time_key: expected_perf}
+
+    post_processed_results = run_device_perf(command, subdir, num_iterations, cols, batch_size, True)
+    expected_results = check_device_perf(post_processed_results, margin, expected_perf_cols)
+    prep_device_perf_report(
+        model_name=f"ttnn_resnet50_batch_size{batch_size}",
+        batch_size=batch_size,
+        post_processed_results=post_processed_results,
+        expected_results=expected_results,
+        comments=test,
+    )
 
 
 @run_for_wormhole_b0()
@@ -15,13 +43,11 @@
         [16, "16-act_dtype0-weight_dtype0-math_fidelity0-device_params0", 5020],
     ],
 )
-def test_perf_device_bare_metal(batch_size, test, expected_perf):
+def test_perf_device_bare_metal_wh(batch_size, test, expected_perf):
     subdir = "resnet50"
     num_iterations = 4
     margin = 0.03
-    command = (
-        f"pytest models/demos/ttnn_resnet/tests/test_ttnn_resnet50_performant.py::test_run_resnet50_inference[{test}]"
-    )
+    command = f"pytest models/demos/ttnn_resnet/tests/test_ttnn_resnet50_performant.py::test_run_resnet50_inference_wh[{test}]"
     cols = ["DEVICE FW", "DEVICE KERNEL", "DEVICE BRISC KERNEL"]
     inference_time_key = "AVG DEVICE KERNEL SAMPLES/S"
     expected_perf_cols = {inference_time_key: expected_perf}
diff --git a/models/demos/ttnn_resnet/tests/test_perf_ttnn_resnet.py b/models/demos/ttnn_resnet/tests/test_perf_ttnn_resnet.py
index 264bec76e72..047d1ad12e8 100644
--- a/models/demos/ttnn_resnet/tests/test_perf_ttnn_resnet.py
+++ b/models/demos/ttnn_resnet/tests/test_perf_ttnn_resnet.py
@@ -13,9 +13,12 @@
 )
 
 from models.utility_functions import (
+    is_grayskull,
+    is_wormhole_b0,
     profiler,
     disable_persistent_kernel_cache,
     run_for_wormhole_b0,
+    run_for_grayskull,
 )
 
 from models.demos.ttnn_resnet.tests.ttnn_resnet_test_infra import create_test_infra
@@ -368,14 +371,130 @@ def run_perf_resnet(
     logger.info(f"{model_name} compile time: {compile_time}")
 
 
+@run_for_grayskull()
+@pytest.mark.models_performance_bare_metal
+@pytest.mark.parametrize("device_params", [{"l1_small_size": 32768}], indirect=True)
+@pytest.mark.parametrize(
+    "batch_size, expected_inference_time, expected_compile_time",
+    ((20, 0.0080, 20),),
+)
+def test_perf_bare_metal_gs(
+    device,
+    use_program_cache,
+    batch_size,
+    expected_inference_time,
+    expected_compile_time,
+    hf_cat_image_sample_input,
+    model_location_generator,
+):
+    run_perf_resnet(
+        batch_size,
+        expected_inference_time,
+        expected_compile_time,
+        hf_cat_image_sample_input,
+        device,
+        "resnet50",
+        model_location_generator,
+    )
+
+
+@run_for_grayskull()
+@pytest.mark.models_performance_bare_metal
+@pytest.mark.parametrize("device_params", [{"l1_small_size": 32768, "trace_region_size": 1332224}], indirect=True)
+@pytest.mark.parametrize(
+    "batch_size, enable_async_mode, expected_inference_time, expected_compile_time",
+    (
+        (20, True, 0.0064, 10),
+        (20, False, 0.0064, 5),
+    ),
+    indirect=["enable_async_mode"],
+)
+def test_perf_trace_bare_metal_gs(
+    device,
+    use_program_cache,
+    batch_size,
+    expected_inference_time,
+    expected_compile_time,
+    hf_cat_image_sample_input,
+    enable_async_mode,
+    model_location_generator,
+):
+    mode = "async" if enable_async_mode else "sync"
+    run_perf_resnet(
+        batch_size,
+        expected_inference_time,
+        expected_compile_time,
+        hf_cat_image_sample_input,
+        device,
+        f"resnet50_trace_{mode}",
+        model_location_generator,
+    )
+
+
+@run_for_grayskull()
+@pytest.mark.models_performance_bare_metal
+@pytest.mark.parametrize("device_params", [{"l1_small_size": 32768, "num_hw_cqs": 2}], indirect=True)
+@pytest.mark.parametrize(
+    "batch_size, expected_inference_time, expected_compile_time",
+    ((20, 0.0100, 19),),
+)
+def test_perf_2cqs_bare_metal_gs(
+    device,
+    use_program_cache,
+    batch_size,
+    expected_inference_time,
+    expected_compile_time,
+    hf_cat_image_sample_input,
+    model_location_generator,
+):
+    run_perf_resnet(
+        batch_size,
+        expected_inference_time,
+        expected_compile_time,
+        hf_cat_image_sample_input,
+        device,
+        "resnet50_2cqs",
+        model_location_generator,
+    )
+
+
+@run_for_grayskull()
+@pytest.mark.models_performance_bare_metal
+@pytest.mark.parametrize(
+    "device_params", [{"l1_small_size": 32768, "trace_region_size": 1332224, "num_hw_cqs": 2}], indirect=True
+)
+@pytest.mark.parametrize(
+    "batch_size, expected_inference_time, expected_compile_time",
+    ((20, 0.004, 5),),
+)
+def test_perf_trace_2cqs_bare_metal_gs(
+    device,
+    use_program_cache,
+    batch_size,
+    expected_inference_time,
+    expected_compile_time,
+    hf_cat_image_sample_input,
+    model_location_generator,
+):
+    run_perf_resnet(
+        batch_size,
+        expected_inference_time,
+        expected_compile_time,
+        hf_cat_image_sample_input,
+        device,
+        "resnet50_trace_2cqs",
+        model_location_generator,
+    )
+
+
 @run_for_wormhole_b0()
 @pytest.mark.models_performance_bare_metal
 @pytest.mark.parametrize("device_params", [{"l1_small_size": 24576}], indirect=True)
 @pytest.mark.parametrize(
     "batch_size, expected_inference_time, expected_compile_time",
-    ((16, 0.0070, 26),),
+    ((16, 0.0070, 28),),
 )
-def test_perf_bare_metal(
+def test_perf_bare_metal_wh(
     device,
     use_program_cache,
     batch_size,
@@ -406,7 +525,7 @@ def test_perf_bare_metal(
     ),
     indirect=["enable_async_mode"],
 )
-def test_perf_trace_bare_metal(
+def test_perf_trace_bare_metal_wh(
     device,
     use_program_cache,
     batch_size,
@@ -435,7 +554,7 @@ def test_perf_trace_bare_metal(
     "batch_size, expected_inference_time, expected_compile_time",
     ((16, 0.0070, 26),),
 )
-def test_perf_2cqs_bare_metal(
+def test_perf_2cqs_bare_metal_wh(
     device,
     use_program_cache,
     batch_size,
@@ -464,7 +583,7 @@ def test_perf_2cqs_bare_metal(
     "batch_size, expected_inference_time, expected_compile_time",
     ((16, 0.004, 25),),
 )
-def test_perf_trace_2cqs_bare_metal(
+def test_perf_trace_2cqs_bare_metal_wh(
     device,
     use_program_cache,
     batch_size,
diff --git a/models/demos/ttnn_resnet/tests/test_ttnn_resnet50_performant.py b/models/demos/ttnn_resnet/tests/test_ttnn_resnet50_performant.py
index 3bba1fbe0fe..764bef2cd67 100644
--- a/models/demos/ttnn_resnet/tests/test_ttnn_resnet50_performant.py
+++ b/models/demos/ttnn_resnet/tests/test_ttnn_resnet50_performant.py
@@ -7,9 +7,8 @@
 import ttnn
 from models.utility_functions import (
     is_wormhole_b0,
-    divup,
-    skip_for_grayskull,
 )
+from models.utility_functions import run_for_wormhole_b0, run_for_grayskull
 from models.demos.ttnn_resnet.tests.ttnn_resnet_test_infra import create_test_infra
 
 try:
@@ -20,13 +19,7 @@
     use_signpost = False
 
 
-@skip_for_grayskull(reason_str="Untested for Grayskull")
-@pytest.mark.parametrize("device_params", [{"l1_small_size": 24576}], indirect=True)
-@pytest.mark.parametrize(
-    "batch_size, act_dtype, weight_dtype, math_fidelity",
-    ((16, ttnn.bfloat8_b, ttnn.bfloat8_b, ttnn.MathFidelity.LoFi),),
-)
-def test_run_resnet50_inference(
+def run_resnet50_inference(
     device, use_program_cache, batch_size, act_dtype, weight_dtype, math_fidelity, model_location_generator
 ):
     if batch_size == 8:
@@ -61,14 +54,7 @@ def test_run_resnet50_inference(
     test_infra.validate()
 
 
-@skip_for_grayskull(reason_str="Untested for Grayskull")
-@pytest.mark.parametrize("device_params", [{"l1_small_size": 24576, "trace_region_size": 800768}], indirect=True)
-@pytest.mark.parametrize(
-    "batch_size, act_dtype, weight_dtype, math_fidelity",
-    ((16, ttnn.bfloat8_b, ttnn.bfloat8_b, ttnn.MathFidelity.LoFi),),
-)
-@pytest.mark.parametrize("enable_async", [True, False])
-def test_run_resnet50_trace_inference(
+def run_resnet50_trace_inference(
     device,
     use_program_cache,
     batch_size,
@@ -127,13 +113,7 @@ def test_run_resnet50_trace_inference(
     device.enable_async(False)
 
 
-@skip_for_grayskull(reason_str="Untested for Grayskull")
-@pytest.mark.parametrize("device_params", [{"l1_small_size": 24576, "num_hw_cqs": 2}], indirect=True)
-@pytest.mark.parametrize(
-    "batch_size, act_dtype, weight_dtype, math_fidelity",
-    ((16, ttnn.bfloat8_b, ttnn.bfloat8_b, ttnn.MathFidelity.LoFi),),
-)
-def test_run_resnet50_2cqs_inference(
+def run_resnet50_2cqs_inference(
     device, use_program_cache, batch_size, act_dtype, weight_dtype, math_fidelity, model_location_generator
 ):
     if batch_size == 8:
@@ -196,16 +176,7 @@ def test_run_resnet50_2cqs_inference(
         test_infra.validate(output)
 
 
-@skip_for_grayskull(reason_str="Untested for Grayskull")
-@pytest.mark.parametrize(
-    "device_params", [{"l1_small_size": 24576, "trace_region_size": 800768, "num_hw_cqs": 2}], indirect=True
-)
-@pytest.mark.parametrize(
-    "batch_size, act_dtype, weight_dtype, math_fidelity",
-    ((16, ttnn.bfloat8_b, ttnn.bfloat8_b, ttnn.MathFidelity.LoFi),),
-)
-@pytest.mark.parametrize("enable_async", [True, False])
-def test_run_resnet50_trace_2cqs_inference(
+def run_resnet50_trace_2cqs_inference(
     device,
     use_program_cache,
     batch_size,
@@ -305,3 +276,179 @@ def test_run_resnet50_trace_2cqs_inference(
         test_infra.validate(output)
 
     device.enable_async(False)
+
+
+@run_for_grayskull()
+@pytest.mark.parametrize("device_params", [{"l1_small_size": 32768}], indirect=True)
+@pytest.mark.parametrize(
+    "batch_size, act_dtype, weight_dtype, math_fidelity",
+    ((20, ttnn.bfloat8_b, ttnn.bfloat8_b, ttnn.MathFidelity.LoFi),),
+)
+def test_run_resnet50_inference_gs(
+    device, use_program_cache, batch_size, act_dtype, weight_dtype, math_fidelity, model_location_generator
+):
+    run_resnet50_inference(
+        device, use_program_cache, batch_size, act_dtype, weight_dtype, math_fidelity, model_location_generator
+    )
+
+
+@run_for_grayskull()
+@pytest.mark.parametrize("device_params", [{"l1_small_size": 32768, "trace_region_size": 1332224}], indirect=True)
+@pytest.mark.parametrize(
+    "batch_size, act_dtype, weight_dtype, math_fidelity",
+    ((20, ttnn.bfloat8_b, ttnn.bfloat8_b, ttnn.MathFidelity.LoFi),),
+)
+@pytest.mark.parametrize("enable_async", [True, False])
+def test_run_resnet50_trace_inference_gs(
+    device,
+    use_program_cache,
+    batch_size,
+    act_dtype,
+    weight_dtype,
+    math_fidelity,
+    enable_async,
+    model_location_generator,
+):
+    run_resnet50_trace_inference(
+        device,
+        use_program_cache,
+        batch_size,
+        act_dtype,
+        weight_dtype,
+        math_fidelity,
+        enable_async,
+        model_location_generator,
+    )
+
+
+@run_for_grayskull()
+@pytest.mark.parametrize("device_params", [{"l1_small_size": 32768, "num_hw_cqs": 2}], indirect=True)
+@pytest.mark.parametrize(
+    "batch_size, act_dtype, weight_dtype, math_fidelity",
+    ((20, ttnn.bfloat8_b, ttnn.bfloat8_b, ttnn.MathFidelity.LoFi),),
+)
+def test_run_resnet50_2cqs_inference_gs(
+    device, use_program_cache, batch_size, act_dtype, weight_dtype, math_fidelity, model_location_generator
+):
+    run_resnet50_2cqs_inference(
+        device, use_program_cache, batch_size, act_dtype, weight_dtype, math_fidelity, model_location_generator
+    )
+
+
+@run_for_grayskull()
+@pytest.mark.parametrize(
+    "device_params", [{"l1_small_size": 32768, "trace_region_size": 1332224, "num_hw_cqs": 2}], indirect=True
+)
+@pytest.mark.parametrize(
+    "batch_size, act_dtype, weight_dtype, math_fidelity",
+    ((20, ttnn.bfloat8_b, ttnn.bfloat8_b, ttnn.MathFidelity.LoFi),),
+)
+@pytest.mark.parametrize("enable_async", [True, False])
+def test_run_resnet50_trace_2cqs_inference_gs(
+    device,
+    use_program_cache,
+    batch_size,
+    act_dtype,
+    weight_dtype,
+    math_fidelity,
+    enable_async,
+    model_location_generator,
+):
+    run_resnet50_trace_2cqs_inference(
+        device,
+        use_program_cache,
+        batch_size,
+        act_dtype,
+        weight_dtype,
+        math_fidelity,
+        enable_async,
+        model_location_generator,
+    )
+
+
+@run_for_wormhole_b0()
+@pytest.mark.parametrize("device_params", [{"l1_small_size": 24576}], indirect=True)
+@pytest.mark.parametrize(
+    "batch_size, act_dtype, weight_dtype, math_fidelity",
+    ((16, ttnn.bfloat8_b, ttnn.bfloat8_b, ttnn.MathFidelity.LoFi),),
+)
+def test_run_resnet50_inference_wh(
+    device, use_program_cache, batch_size, act_dtype, weight_dtype, math_fidelity, model_location_generator
+):
+    run_resnet50_inference(
+        device, use_program_cache, batch_size, act_dtype, weight_dtype, math_fidelity, model_location_generator
+    )
+
+
+@run_for_wormhole_b0()
+@pytest.mark.parametrize("device_params", [{"l1_small_size": 24576, "trace_region_size": 800768}], indirect=True)
+@pytest.mark.parametrize(
+    "batch_size, act_dtype, weight_dtype, math_fidelity",
+    ((16, ttnn.bfloat8_b, ttnn.bfloat8_b, ttnn.MathFidelity.LoFi),),
+)
+@pytest.mark.parametrize("enable_async", [True, False])
+def test_run_resnet50_trace_inference_wh(
+    device,
+    use_program_cache,
+    batch_size,
+    act_dtype,
+    weight_dtype,
+    math_fidelity,
+    enable_async,
+    model_location_generator,
+):
+    run_resnet50_trace_inference(
+        device,
+        use_program_cache,
+        batch_size,
+        act_dtype,
+        weight_dtype,
+        math_fidelity,
+        enable_async,
+        model_location_generator,
+    )
+
+
+@run_for_wormhole_b0()
+@pytest.mark.parametrize("device_params", [{"l1_small_size": 24576, "num_hw_cqs": 2}], indirect=True)
+@pytest.mark.parametrize(
+    "batch_size, act_dtype, weight_dtype, math_fidelity",
+    ((16, ttnn.bfloat8_b, ttnn.bfloat8_b, ttnn.MathFidelity.LoFi),),
+)
+def test_run_resnet50_2cqs_inference_wh(
+    device, use_program_cache, batch_size, act_dtype, weight_dtype, math_fidelity, model_location_generator
+):
+    run_resnet50_2cqs_inference(
+        device, use_program_cache, batch_size, act_dtype, weight_dtype, math_fidelity, model_location_generator
+    )
+
+
+@run_for_wormhole_b0()
+@pytest.mark.parametrize(
+    "device_params", [{"l1_small_size": 24576, "trace_region_size": 800768, "num_hw_cqs": 2}], indirect=True
+)
+@pytest.mark.parametrize(
+    "batch_size, act_dtype, weight_dtype, math_fidelity",
+    ((16, ttnn.bfloat8_b, ttnn.bfloat8_b, ttnn.MathFidelity.LoFi),),
+)
+@pytest.mark.parametrize("enable_async", [True, False])
+def test_run_resnet50_trace_2cqs_inference_wh(
+    device,
+    use_program_cache,
+    batch_size,
+    act_dtype,
+    weight_dtype,
+    math_fidelity,
+    enable_async,
+    model_location_generator,
+):
+    run_resnet50_trace_2cqs_inference(
+        device,
+        use_program_cache,
+        batch_size,
+        act_dtype,
+        weight_dtype,
+        math_fidelity,
+        enable_async,
+        model_location_generator,
+    )
diff --git a/tests/scripts/run_performance.sh b/tests/scripts/run_performance.sh
index b6734f6b94b..cd728307b14 100755
--- a/tests/scripts/run_performance.sh
+++ b/tests/scripts/run_performance.sh
@@ -11,14 +11,18 @@ run_perf_models_other() {
     local tt_arch=$1
     local test_marker=$2
 
-    env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/ttnn_resnet/tests/test_perf_ttnn_resnet.py -m $test_marker
+    if [ "$tt_arch" == "grayskull" ]; then
+        env pytest models/demos/ttnn_resnet/tests/test_perf_ttnn_resnet.py -m $test_marker
+    fi
+
+    if [ "$tt_arch" == "wormhole_b0" ]; then
+        env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/ttnn_resnet/tests/test_perf_ttnn_resnet.py -m $test_marker
+    fi
 
     env pytest -n auto tests/ttnn/integration_tests/bert/test_performance.py -m $test_marker
 
     env pytest -n auto models/demos/ttnn_falcon7b/tests -m $test_marker
 
-    env pytest -n auto models/demos/resnet/tests/test_perf_resnet.py -m $test_marker
-
     env pytest -n auto tests/ttnn/integration_tests/whisper/test_performance.py -m $test_marker
 
     env pytest -n auto models/demos/metal_BERT_large_11/tests -m $test_marker
@@ -59,8 +63,6 @@ run_device_perf_models() {
     set -eo pipefail
     local test_marker=$1
 
-    env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/ttnn_resnet/tests -m $test_marker
-
     env pytest tests/device_perf_tests/stable_diffusion -m $test_marker --timeout=600
 
     if [ "$tt_arch" == "grayskull" ]; then
@@ -68,6 +70,8 @@ run_device_perf_models() {
         #Model Device perf regression tests to make sure thy run on no-soft-reset BMs
         tests/scripts/run_profiler_regressions.sh PROFILER_NO_RESET
 
+        env pytest models/demos/ttnn_resnet/tests -m $test_marker
+
         env pytest models/demos/metal_BERT_large_11/tests -m $test_marker
 
         env pytest models/demos/ttnn_falcon7b/tests -m $test_marker --timeout=360
@@ -78,6 +82,8 @@ run_device_perf_models() {
     fi
 
     if [ "$tt_arch" == "wormhole_b0" ]; then
+        env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/ttnn_resnet/tests -m $test_marker
+
         env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/wormhole/mamba/tests -m $test_marker
 
         env WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/metal_BERT_large_11/tests -m $test_marker