Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add CPU hotplug feature case #421

Merged
merged 1 commit into from
Nov 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions BM/hotplug/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Release Notes

In the software solution, CPU hotplug and unplug refers to
CPU offline and online operations using the '/sys/devices/system/cpu' interface.

The python script utilizes the Avocade Test Framework, so it needs to be installed first

## The command to instlall the avocado from source code
```
git clone git://github.com/avocado-framework/avocado.git
cd avocado
pip install .
```

or

## Installing avocado vai pip:
```
pip3 install --user avocado-framework
```

## The command to run the case
### Running with 'runtest.py'
```
cd ..
./runtests.py -f hotplug -t hotplug/tests
```

### Running with avocado framework
```
avocado run cpu_off_on_stress.py
```
132 changes: 132 additions & 0 deletions BM/hotplug/cpu_off_on_stress.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-2.0-only
# Copyright (c) 2024 Intel Corporation

"""
This script performs CPU offline/online stress test for the specified number of cycles.
Prerequisties:
Install the avocado framework and the required dependencies with below command:
git clone git://github.com/avocado-framework/avocado.git
cd avocado
pip install .
"""

import subprocess
import time
import os

from avocado.core.nrunner.runnable import Runnable

__author__ = "Wendy Wang"
__copyright__ = "GPL-2.0-only"
__license__ = "GPL version 2"

# Determine the directory of the current script
script_dir = os.path.dirname(os.path.abspath(__file__))

# Construce relative paths to the common.sh file
common_sh_path = os.path.join(script_dir, '../../common/common.sh')

class ShellCommandRunnable(Runnable):
def __init__(self, command):
self.command = command
self.stdout = None
self.stderr = None

def run(self):
try:
result = subprocess.run(self.command, shell=True, check=True, capture_output=True, text=True, executable='/bin/bash')
self.stdout = result.stdout
self.stderr = result.stderr
print(f"command '{self.command}' executed successfully.")

return result.returncode
except subprocess.CalledProcessError as e:
self.stderr = e.stderr
self.stdout = e.stdout
print (f"Error occurred: {self.stderr}")
return e.returncode

def get_online_cpu_count():
try:
# Run 'lscpu' and filter out the number of online CPUs
lscpu_command = "lscpu | grep 'On-line CPU' | awk '{print $NF}'"
result = ShellCommandRunnable(lscpu_command)

# Run the command
return_code = result.run()
if return_code != 0:
raise Exception ("Failed to get CPU count")

if result.stdout is None:
raise Exception ("No output from lscpu command")

cpu_range = result.stdout.strip().split('-')
print(f"cpu range: {cpu_range}")
if len(cpu_range) == 2:
return int(cpu_range[1]) + 1
else:
return 1 # Only one CPU available
except Exception as e:
print (f"Error getting cpu count:{e}")
return 0

def check_dmesg_error():
result = ShellCommandRunnable(f"source {common_sh_path} && extract_case_dmesg")
result.run()
dmesg_log = result.stdout

# Check any failure, error, bug in the dmesg log when stress is running
if dmesg_log and any(keyword in dmesg_log for keyword in ["fail","error","Call Trace","Bug","error"]):
return dmesg_log
return None

def cpu_off_on_stress(cycle):
"""Perform CPU offline/online stress test for the specified number of cycles"""
try:
cpu_num = get_online_cpu_count()
if cpu_num == 0:
raise Exception("On-line CPU is not available.")

print (f"The max CPU number is: {cpu_num}")

# Start stress test cycle
for i in range(1, cycle + 1):
print(f"CPUs offline online stress cycle {i}")

for cpu_id in range(cpu_num):
if cpu_id == 0:
continue
print(f"Offline CPU{cpu_id}")
# Bring CPUs offline
result = ShellCommandRunnable(f"echo 0 > /sys/devices/system/cpu/cpu{cpu_id}/online")
result_code = result.run()
if result_code != 0:
raise Exception(f"Failed to bring CPU{cpu_id} offline")

time.sleep(1)

for cpu_id in range(cpu_num):
if cpu_id == 0:
continue
print(f"Online CPU{cpu_id}")
# Bring CPUs online
result = ShellCommandRunnable(f"echo 1 > /sys/devices/system/cpu/cpu{cpu_id}/online")
result_code = result.run()
if result_code != 0:
raise Exception(f"Failed to bring CPU{cpu_id} online")

except Exception as e:
print(f"Error during CPU stress testing:{e}")

# Check dmesg log
dmesg_log = check_dmesg_error()
if dmesg_log:
print(f"Kernel dmesg shows failure after CPU offline/online stress: {dmesg_log}")
raise Exception("Kernel dmesg show failure")
else:
print("Kernel dmesg shows Okay after CPU offline/online stress.")


if __name__== '__main__':
cpu_off_on_stress(5)
3 changes: 3 additions & 0 deletions BM/hotplug/tests
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# This script performs CPU offline/online stress test for the specified number of cycles

python3 cpu_off_on_stress.py
Loading