Skip to content

Commit

Permalink
[test_pfcwd_wb] increase delay time for detecting pfcwd storm (sonic…
Browse files Browse the repository at this point in the history
…-net#16445)

What is the motivation for this PR?
Script log Analyzer timestamp is 2025-01-07 13:26:55.978427 while the real detect timestamp in syslog is Jan 7 13:27:18.132683.
Need delay more time to grep the syslog

2025 Jan  7 13:26:55.996909 bjw2-can-7260-2 DEBUG extract_log combine_logs from file /var/log/syslog create time 2025-01-07 13:26:55.978427, size 4236007

syslog:23447:2025 Jan  7 13:27:18.132683 bjw2-can-7260-2 NOTICE swss#orchagent: :- report_pfc_storm: PFC Watchdog detected PFC storm on port Ethernet24, queue index 3, queue id 0x1500000000014d and port id 0x1000000000001

How did you do it?
Increase the delay time and add some debug log in script

How did you verify/test it?
Use elastic to run the case on 7060 and 2700, passed.
https://elastictest.org/scheduler/testplan/67808a9a572c09d3aa482375
https://elastictest.org/scheduler/testplan/67808b40745d47c6b42e53f4
https://elastictest.org/scheduler/testplan/67808bd3c66ca5ae0571bd57
  • Loading branch information
lipxu committed Jan 13, 2025
1 parent 2c0942d commit e9e715a
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 8 deletions.
33 changes: 33 additions & 0 deletions tests/common/helpers/pfcwd_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,7 @@ def select_test_ports(test_ports):
random_port = list(test_ports.keys())[0]
selected_ports[random_port] = test_ports[random_port]

logger.info("select_test_ports: {}".format(selected_ports.keys()))
return selected_ports


Expand Down Expand Up @@ -625,3 +626,35 @@ def parser_show_pfcwd_stat(dut, select_port, select_queue):
pfcwd_stat.append(parsed_dict)

return pfcwd_stat


def pfcwd_show_status(duthost, output_string):
"""
Get pfcwd status
Args:
duthost: AnsibleHost instance for DUT
output_string: string to be printed
Returns:
pfcwd status
"""
logger.debug("pfcwd_show_status: {}".format(output_string))

cmd = "show pfc counters"
cmd_response = duthost.shell(cmd, module_ignore_errors=True)
logger.debug("execute cmd {} response: \n{}".format(cmd, cmd_response.get('stdout', None)))

cmd = "show pfcwd config"
cmd_response = duthost.shell(cmd, module_ignore_errors=True)
logger.debug("execute cmd {} response: \n{}".format(cmd, cmd_response.get('stdout', None)))

cmd = "show pfcwd stats"
cmd_response = duthost.shell(cmd, module_ignore_errors=True)
logger.debug("execute cmd {} response: \n{}".format(cmd, cmd_response.get('stdout', None)))

cmd = "grep \"{}\" /var/log/syslog".format("PFC Watchdog")
cmd_response = duthost.shell(cmd, module_ignore_errors=True)
logger.debug("execute cmd {} response: \n{}".format(cmd, cmd_response.get('stdout', None)))

return
39 changes: 31 additions & 8 deletions tests/pfcwd/test_pfcwd_warm_reboot.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from tests.common.utilities import InterruptableThread
from tests.common.utilities import join_all
from tests.ptf_runner import ptf_runner
from tests.common.helpers.pfcwd_helper import EXPECT_PFC_WD_DETECT_RE, EXPECT_PFC_WD_RESTORE_RE
from tests.common.helpers.pfcwd_helper import EXPECT_PFC_WD_DETECT_RE, EXPECT_PFC_WD_RESTORE_RE, pfcwd_show_status
from tests.common.helpers.pfcwd_helper import send_background_traffic
from tests.common.helpers.pfcwd_helper import has_neighbor_device
from tests.common.utilities import wait_until
Expand Down Expand Up @@ -347,7 +347,7 @@ def storm_detect_path(self, port, queue, first_detect_after_wb=False):
if not first_detect_after_wb:
if not self.pfc_wd['fake_storm']:
self.storm_handle[port][queue].start_storm()
time.sleep(15 * len(self.pfc_wd['queue_indices']))
time.sleep(60 * len(self.pfc_wd['queue_indices']))
else:
logger.info("Enable DEBUG fake storm on port {} queue {}".format(port, queue))
PfcCmd.set_storm_status(self.dut, self.oid_map[(port, queue)], "enabled")
Expand Down Expand Up @@ -415,6 +415,14 @@ def run_test(self, port, queue, detect=True, storm_start=True, first_detect_afte
first_detect_after_wb(bool): used to decide certain actions in the detect logic (default: False)
storm_defer(bool): use the storm defer logic or not (default: False)
"""

logger.info(
"pfcwd wr: run_test port: {}, queue: {}, detect: {}, storm_start: {}, "
"first_detect_after_wb: {}, storm_defer: {}".format(
port, queue, detect, storm_start, first_detect_after_wb, storm_defer
)
)

# for deferred storm, return to main loop for next action which is warm boot
if storm_defer:
if not self.pfc_wd['fake_storm']:
Expand Down Expand Up @@ -520,8 +528,10 @@ def pfcwd_wb_helper(self, fake_storm, testcase_actions, setup_pfc_test, enum_fan
self.oid_map = dict()
self.storm_threads = []

logger.debug("pfcwd wr: fake_storm: {} two_queues: {}".format(self.fake_storm, self.two_queues))

for t_idx, test_action in enumerate(testcase_actions):
logger.info("Index {} test_action {}".format(t_idx, test_action))
logger.info("pfcwd wr: Index {} test_action {}".format(t_idx, test_action))
if 'warm-reboot' in test_action:
reboot(self.dut, localhost, reboot_type="warm", wait_warmboot_finalizer=True)

Expand Down Expand Up @@ -552,11 +562,12 @@ def pfcwd_wb_helper(self, fake_storm, testcase_actions, setup_pfc_test, enum_fan
bitmask = (1 << ACTIONS[test_action])
for p_idx, port in enumerate(self.ports):
logger.info("")
logger.info("--- Testing on {} ---".format(port))
logger.info("pfcwd wr: --- Testing on port {} ---".format(port))
send_pfc_frame_interval = calculate_send_pfc_frame_interval(duthost, port) \
if self.fanout[self.ports[port]['peer_device']].os == 'onyx' else 0
self.setup_test_params(port, setup_info['vlan'], p_idx)
for q_idx, queue in enumerate(self.pfc_wd['queue_indices']):
logger.info("pfcwd wr: --- Testing on queue {} ---".format(queue))
if not t_idx or storm_deferred:
if not q_idx:
self.storm_handle[port] = dict()
Expand All @@ -574,10 +585,22 @@ def pfcwd_wb_helper(self, fake_storm, testcase_actions, setup_pfc_test, enum_fan
self.oid_map[(port, queue)] = PfcCmd.get_queue_oid(self.dut, port, queue)

self.traffic_inst = SendVerifyTraffic(self.ptf, dut_facts['router_mac'], self.pfc_wd, queue)
self.run_test(port, queue, detect=(bitmask & 1),
storm_start=not t_idx or storm_deferred or storm_restored,
first_detect_after_wb=(t_idx == 2 and not p_idx and not q_idx and not storm_deferred),
storm_defer=(bitmask & 4))
try:
pfcwd_show_status(
self.dut,
"pfcwd wr: run_test start t_idx: {}, test_action: {}, p_idx: {}-{}, q_idx: {}-{}, "
"bitmask: {}, storm_deferred: {}, storm_restored: {}".format(
t_idx, test_action, p_idx, port, q_idx, queue, bitmask, storm_deferred, storm_restored
)
)
self.run_test(port, queue, detect=(bitmask & 1),
storm_start=not t_idx or storm_deferred or storm_restored,
first_detect_after_wb=(t_idx == 2 and not p_idx and not q_idx and not storm_deferred), # noqa E501
storm_defer=(bitmask & 4))
pfcwd_show_status(self.dut, "pfcwd wr: run_test end")
except Exception as e:
pfcwd_show_status(self.dut, "pfcwd wr: run_test exception")
pytest.fail(str(e))

@pytest.fixture(params=['no_storm', 'storm', 'async_storm'])
def testcase_action(self, request):
Expand Down

0 comments on commit e9e715a

Please sign in to comment.