From c5a9d8bf7eb3d01c4eb548a9970a37e0d62d0fbb Mon Sep 17 00:00:00 2001 From: Smit Gardhariya <113590758+smit-gardhariya@users.noreply.github.com> Date: Tue, 20 Aug 2024 13:31:25 +0530 Subject: [PATCH 01/17] Fix docker config for Cloud-Hypervisor tests (#3387) Create /etc/docker if not present Use JSON config instead of plain string and dumps it Signed-off-by: Smit Gardhariya --- .../cloud_hypervisor/ch_tests_tool.py | 35 ++++++++++++++++--- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/microsoft/testsuites/cloud_hypervisor/ch_tests_tool.py b/microsoft/testsuites/cloud_hypervisor/ch_tests_tool.py index 661eb84264..ecf7eda463 100644 --- a/microsoft/testsuites/cloud_hypervisor/ch_tests_tool.py +++ b/microsoft/testsuites/cloud_hypervisor/ch_tests_tool.py @@ -14,7 +14,18 @@ from lisa.messages import TestStatus, send_sub_test_result_message from lisa.operating_system import CBLMariner from lisa.testsuite import TestResult -from lisa.tools import Chmod, Chown, Dmesg, Docker, Echo, Git, Modprobe, Whoami +from lisa.tools import ( + Chmod, + Chown, + Dmesg, + Docker, + Echo, + Git, + Ls, + Mkdir, + Modprobe, + Whoami, +) from lisa.util import find_groups_in_lines @@ -260,13 +271,27 @@ def _install(self) -> bool: git.clone(self.upstream_repo, clone_path) if isinstance(self.node.os, CBLMariner): - daemon_json_file = PurePath("/etc/docker/daemon.json") + docker_config_dir = "/etc/docker/" + + docker_config: Dict[str, Any] = {} + docker_config["default-ulimits"] = {} + nofiles = {"Hard": 65535, "Name": "nofile", "Soft": 65535} + docker_config["default-ulimits"]["nofile"] = nofiles + + ls = self.node.tools[Ls] + if not ls.path_exists(path=docker_config_dir, sudo=True): + self.node.tools[Mkdir].create_directory( + path=docker_config_dir, + sudo=True, + ) + node_info = self.node.get_information() distro = node_info.get("distro_version", "") if distro == "Microsoft Azure Linux 3.0": - daemon_json = '{"userland-proxy": false,"default-ulimits":{"nofile":{"Hard":65535,"Name":"nofile","Soft":65535}}}' # noqa: E501 - else: - daemon_json = '{"default-ulimits":{"nofile":{"Hard":65535,"Name":"nofile","Soft":65535}}}' # noqa: E501 + docker_config["userland-proxy"] = False + + daemon_json = json.dumps(docker_config).replace('"', '\\"') + daemon_json_file = PurePath(f"{docker_config_dir}/daemon.json") self.node.tools[Echo].write_to_file( daemon_json, daemon_json_file, sudo=True ) From 48e711dafea3ca4ff5833910afedaf61907694c3 Mon Sep 17 00:00:00 2001 From: Baihua Lu Date: Tue, 20 Aug 2024 15:59:34 +0800 Subject: [PATCH 02/17] Add gallery images and versions delete actions for AITL clean up job --- microsoft/utils/setup_aitl.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/microsoft/utils/setup_aitl.py b/microsoft/utils/setup_aitl.py index 6f9697316e..4cdf5b394b 100644 --- a/microsoft/utils/setup_aitl.py +++ b/microsoft/utils/setup_aitl.py @@ -223,8 +223,10 @@ def _set_target_role_parameters( # for testing ARM64 VHD and gallery image "Microsoft.Compute/galleries/images/read", "Microsoft.Compute/galleries/images/write", + "Microsoft.Compute/galleries/images/delete", "Microsoft.Compute/galleries/images/versions/read", "Microsoft.Compute/galleries/images/versions/write", + "Microsoft.Compute/galleries/images/versions/delete", "Microsoft.Compute/galleries/read", "Microsoft.Compute/galleries/write", # for test VM extension running From 158857c9d8b2c1889adc470da8ad09aef8b8a1e0 Mon Sep 17 00:00:00 2001 From: Lili Deng Date: Tue, 20 Aug 2024 12:32:52 +0800 Subject: [PATCH 03/17] save_console_log: catch ChunkedEncodingError --- lisa/sut_orchestrator/azure/common.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/lisa/sut_orchestrator/azure/common.py b/lisa/sut_orchestrator/azure/common.py index 684f716591..2db3a0a9a4 100644 --- a/lisa/sut_orchestrator/azure/common.py +++ b/lisa/sut_orchestrator/azure/common.py @@ -86,6 +86,7 @@ from marshmallow import fields, validate from msrestazure.azure_cloud import AZURE_PUBLIC_CLOUD, Cloud # type: ignore from PIL import Image, UnidentifiedImageError +from requests.exceptions import ChunkedEncodingError from retry import retry from lisa import feature, schema, search_space @@ -2025,12 +2026,22 @@ def save_console_log( ) screenshot_raw_name.unlink() - log_response = requests.get(diagnostic_data.serial_console_log_blob_uri, timeout=60) - if log_response.status_code == 404: - log.debug( - "The serial console is not generated. " - "The reason may be the VM is not started." + try: + log_response = requests.get( + diagnostic_data.serial_console_log_blob_uri, timeout=60 ) + if log_response.status_code == 404: + log.debug( + "The serial console is not generated. " + "The reason may be the VM is not started." + ) + except ChunkedEncodingError as ex: + log.debug(f"ChunkedEncodingError occurred: {ex}") + return b"" + except Exception as ex: + log.debug(f"Failed to save console log: {ex}") + return b"" + return log_response.content From 4a84849804e2ae18a1f97411a09f43ab855b44e6 Mon Sep 17 00:00:00 2001 From: "Gustavo Lima Chaves (from Dev Box)" Date: Tue, 6 Aug 2024 15:03:53 -0700 Subject: [PATCH 04/17] Minimal shell: honor full chain of hooks, on either minimal entry point We have 2 points in which LISA might decide to go with a minimal shell profile: 1. When first contacting a node, issuing a 'cmd' command, checking if 'Windows' is present in the output or not (rule out Windows shell type). A negative case will incur in either the POSIX shell type, if 'Unknown syntax' is not thrown as output, or an early decision for the 'minimal' shell type is taken and the shell object is set to follow that config from there on. 2. When running the actual first lisa suite SSH command on the same node. It turns out we've seen cases where 'Unknown syntax' is not output at early node initialization (1), for some reason. Well, the exception path of actual command execution will check internal node state, that tracks if a minimal profile has been tested before or not. If not used yet and under a command execution exception, set the node to minimal type and try again was the logic. It turns out the 'minification' of the node config was only done partially, in (2). We missed the full logic of overriding its final SSH command tokenizer. Another missed detail is that the final tokenizer logic at the minimal shell case was too greedy in trying to remove stray quotes. Let's keep injecting quotes on tokens with spaces on them. That will not hurt any legit commands from any minimal context. --- lisa/util/shell.py | 48 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 34 insertions(+), 14 deletions(-) diff --git a/lisa/util/shell.py b/lisa/util/shell.py index 571918f84f..9e124a889c 100644 --- a/lisa/util/shell.py +++ b/lisa/util/shell.py @@ -41,11 +41,17 @@ ) -def minimal_escape_sh(value: str) -> str: - return value.replace("'", "'\\''") +def _minimal_escape_sh(value: str) -> str: + # Tokens iterated here will either have spaces in them, after + # tokenized by process_*_command()--process.py--or not. For those + # with spaces, we want to inject quotes around them again, on the + # minimal shell case--but *only* for those. + if re.search(r"\s", value): + return f"'{value}'" + return value -def minimal_generate_run_command( # type: ignore +def _minimal_generate_run_command( # type: ignore self, command_args: str, store_pid: bool, @@ -53,7 +59,7 @@ def minimal_generate_run_command( # type: ignore update_env: Optional[Dict[str, str]] = None, new_process_group: bool = False, ) -> str: - return " ".join(map(minimal_escape_sh, command_args)) + return " ".join(map(_minimal_escape_sh, command_args)) def wait_tcp_port_ready( @@ -213,6 +219,20 @@ def _spawn_ssh_process(shell: spur.ssh.SshShell, **kwargs: Any) -> spur.ssh.SshP return shell.spawn(**kwargs) +def _minimize_shell(shell: spur.ssh.SshShell) -> None: + """ + Dynamically override that object's method. Here, we don't enclose every + shell token under single quotes anymore. That's an assumption from spur + that minimal shells will still be POSIX compliant--not true for some + cases for LISA users. + """ + func_type = type(spur.ssh.ShellTypes.minimal.generate_run_command) + shell._spur._shell_type.generate_run_command = func_type( + _minimal_generate_run_command, + shell._spur._shell_type, + ) + + class SshShell(InitializableMixin): def __init__(self, connection_info: schema.ConnectionInfo) -> None: super().__init__() @@ -267,7 +287,11 @@ def _initialize(self, *args: Any, **kwargs: Any) -> None: else: self.is_posix = True shell_type = spur.ssh.ShellTypes.sh - # it doesn't support bash. Use minimal shell type + # First chance in getting a clue about no POSIX shell + # support (still not Windows). Use minimal shell type if + # so. Bear in mind we can get silence here (no "Unknown + # syntax"), but there will be a second chance of setting a + # minimal shell further down the flow if stdout_content and "Unknown syntax" in stdout_content: shell_type = spur.ssh.ShellTypes.minimal @@ -299,15 +323,7 @@ def _initialize(self, *args: Any, **kwargs: Any) -> None: ) self._inner_shell = spurplus.SshShell(spur_ssh_shell=spur_ssh_shell, sftp=sftp) if shell_type == spur.ssh.ShellTypes.minimal: - # Dynamically override that object's method. Here, we don't enclose every - # shell token under single quotes anymore. That's an assumption from spur - # that minimal shells will still be POSIX compliant--not true for some - # cases for LISA users. - func_type = type(spur.ssh.ShellTypes.minimal.generate_run_command) - self._inner_shell._spur._shell_type.generate_run_command = func_type( - minimal_generate_run_command, - self._inner_shell._spur._shell_type, - ) + _minimize_shell(self._inner_shell) def close(self) -> None: if self._inner_shell: @@ -367,6 +383,9 @@ def spawn( "the paramiko/spur not support the shell of node." ) except spur.errors.CommandInitializationError as identifier: + # *Second* chance in getting a clue about no POSIX shell + # support (still not Windows). Set minimal shell type if + # so, again. # Some publishers images, such as azhpc-desktop, javlinltd and # vfunctiontechnologiesltd, there might have permission errors when # scripts under /etc/profile.d directory are executed at startup of @@ -380,6 +399,7 @@ def spawn( # Except CommandInitializationError then use minimal shell type. if not have_tried_minimal_type: self._inner_shell._spur._shell_type = spur.ssh.ShellTypes.minimal + _minimize_shell(self._inner_shell) have_tried_minimal_type = True matched = _spawn_initialization_error_pattern.search( str(identifier) From 00442607b9a37fa22110a774a859fa2d09b8263c Mon Sep 17 00:00:00 2001 From: Lili Deng Date: Thu, 22 Aug 2024 10:00:17 +0800 Subject: [PATCH 05/17] file notifier: change the encoding to resolve UnicodeEncodeError error --- lisa/notifiers/file.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lisa/notifiers/file.py b/lisa/notifiers/file.py index 76cdf5f2b6..7d9a19bf14 100644 --- a/lisa/notifiers/file.py +++ b/lisa/notifiers/file.py @@ -39,7 +39,7 @@ def finalize(self) -> None: def _received_message(self, message: messages.MessageBase) -> None: simplify_message(message) # write every time to refresh the content immediately. - with open(self._file_path, "a") as f: + with open(self._file_path, "a", encoding="utf-8") as f: f.write(f"{datetime.now(timezone.utc):%Y-%m-%d %H:%M:%S.%ff}: {message}\n") def _subscribed_message_type(self) -> List[Type[messages.MessageBase]]: From e8391c758b19fe515bb60114e26083e02bc9a321 Mon Sep 17 00:00:00 2001 From: Lili Deng Date: Thu, 22 Aug 2024 22:43:06 +0800 Subject: [PATCH 06/17] exclude case generic/680 for security reason --- microsoft/testsuites/xfstests/xfstesting.py | 2 ++ microsoft/testsuites/xfstests/xfstests.py | 10 +++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/microsoft/testsuites/xfstests/xfstesting.py b/microsoft/testsuites/xfstests/xfstesting.py index d768855399..cb40aea2cd 100644 --- a/microsoft/testsuites/xfstests/xfstesting.py +++ b/microsoft/testsuites/xfstests/xfstesting.py @@ -125,6 +125,7 @@ class Xfstesting(TestSuite): # commit d0c7feaf8767 ("xfs: add agf freeblocks verify in xfs_agf_verify") # generic/738 case might cause hang more than 4 hours on old kernel # TODO: will figure out the detailed reason of every excluded case. + # exclude generic/680 for security reason. excluded_tests = ( "generic/211 generic/430 generic/431 generic/434 generic/738 xfs/438 xfs/490" + " btrfs/007 btrfs/178 btrfs/244 btrfs/262" @@ -132,6 +133,7 @@ class Xfstesting(TestSuite): + " xfs/144 xfs/148 xfs/175 xfs/191-input-validation xfs/289 xfs/293 xfs/424" + " xfs/432 xfs/500 xfs/508 xfs/512 xfs/514 xfs/515 xfs/516 xfs/518 xfs/521" + " xfs/528 xfs/544 ext4/054 ext4/056 ext4/058 ext4/059 xfs/081 xfs/520" + + " generic/680" ) def before_case(self, log: Logger, **kwargs: Any) -> None: diff --git a/microsoft/testsuites/xfstests/xfstests.py b/microsoft/testsuites/xfstests/xfstests.py index 72db617aca..3588894073 100644 --- a/microsoft/testsuites/xfstests/xfstests.py +++ b/microsoft/testsuites/xfstests/xfstests.py @@ -19,7 +19,7 @@ Ubuntu, ) from lisa.testsuite import TestResult -from lisa.tools import Cat, Chmod, Echo, Git, Make, Pgrep +from lisa.tools import Cat, Chmod, Echo, Git, Make, Pgrep, Rm, Sed from lisa.util import LisaException, UnsupportedDistroException, find_patterns_in_lines @@ -282,6 +282,14 @@ def _install(self) -> bool: git.clone(url=self.repo, cwd=tool_path, ref=self.branch) make = self.node.tools[Make] code_path = tool_path.joinpath("xfstests-dev") + + self.node.tools[Rm].remove_file(str(code_path / "src" / "splice2pipe.c")) + self.node.tools[Sed].substitute( + regexp="splice2pipe", + replacement="", + file=str(code_path / "src" / "Makefile"), + ) + make.make_install(code_path) return True From fec0c5de1315d60c4d83f7c500d6dccd48dcf2e6 Mon Sep 17 00:00:00 2001 From: Smit Gardhariya <113590758+smit-gardhariya@users.noreply.github.com> Date: Mon, 26 Aug 2024 12:15:30 +0530 Subject: [PATCH 07/17] Fix KVM Unit test result parsing (#3391) * Fix KVM Unit test result parsing Use lisa.util methods to get the matching pattern using named regex. Signed-off-by: Smit Gardhariya * Fix the chmod usage for kvm unit test Pass the named parameter instead to chmod and update_filder functions. Order was wrong which will make chmod command to be failed. Signed-off-by: Smit Gardhariya --------- Signed-off-by: Smit Gardhariya --- .../testsuites/kvm/kvm_unit_tests_tool.py | 25 +++++++++++++------ 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/microsoft/testsuites/kvm/kvm_unit_tests_tool.py b/microsoft/testsuites/kvm/kvm_unit_tests_tool.py index 7a0146e5ed..b31363d5ba 100644 --- a/microsoft/testsuites/kvm/kvm_unit_tests_tool.py +++ b/microsoft/testsuites/kvm/kvm_unit_tests_tool.py @@ -12,7 +12,7 @@ from lisa.operating_system import Posix from lisa.testsuite import TestResult from lisa.tools import Chmod, Git, Ls, Make -from lisa.util import LisaException +from lisa.util import LisaException, find_group_in_lines @dataclass @@ -101,15 +101,18 @@ def _parse_results(self, output: str) -> List[KvmUnitTestResult]: # # For now, we don't do anything with the additional info in the # parantheses. - line_regex = re.compile(r"^\S+(PASS|FAIL|SKIP)\S+ (\S+) .*$") + line_regex = re.compile( + r"(?PPASS|SKIP|FAIL)\s+(?P\S+)" + r"(?:\s+\((?P[^)]+)\))?" + ) for line in lines: - match = re.search(line_regex, line) + match = find_group_in_lines(lines=line.strip(), pattern=line_regex) if not match: continue result = KvmUnitTestResult() - result.name = match.group(2) - status = match.group(1) + result.name = match.get("test_name", "") + status = match.get("status", "") if status == "PASS": result.status = TestStatus.PASSED elif status == "FAIL": @@ -136,8 +139,16 @@ def _save_logs(self, test_names: List[str], log_path: Path) -> None: def _save_all_logs(self, log_path: Path) -> None: logs_dir = self.repo_root / "logs" - self.node.tools[Chmod].chmod("a+x", str(logs_dir), sudo=True) - self.node.tools[Chmod].update_folder("a+r", str(logs_dir), sudo=True) + self.node.tools[Chmod].chmod( + permission="a+x", + path=str(logs_dir), + sudo=True, + ) + self.node.tools[Chmod].update_folder( + permission="a+r", + path=str(logs_dir), + sudo=True, + ) files = self.node.tools[Ls].list(str(logs_dir), sudo=True) for f in files: f_path = PurePath(f) From c145b86c00cdef0dc2304f86cce8597083ec42ab Mon Sep 17 00:00:00 2001 From: Smit Gardhariya <113590758+smit-gardhariya@users.noreply.github.com> Date: Mon, 26 Aug 2024 18:34:39 +0530 Subject: [PATCH 08/17] Fix libvirt community test (#3393) * Fix libvirt tck test Fix the format of config getting written in /etc/libvirt/qemu.conf Signed-off-by: Smit Gardhariya * Add dmesg/journalctl log for libvirt community test Add dmesg and journalctl log for libvirt community test under after_case of the testcase: verify_libvirt_tck Signed-off-by: Smit Gardhariya --------- Signed-off-by: Smit Gardhariya --- microsoft/testsuites/libvirt/libvirt_tck.py | 14 +++++++++++++- microsoft/testsuites/libvirt/libvirt_tck_tool.py | 4 ++-- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/microsoft/testsuites/libvirt/libvirt_tck.py b/microsoft/testsuites/libvirt/libvirt_tck.py index 2026db1a8e..a953670957 100644 --- a/microsoft/testsuites/libvirt/libvirt_tck.py +++ b/microsoft/testsuites/libvirt/libvirt_tck.py @@ -6,7 +6,7 @@ from lisa import Logger, Node, TestCaseMetadata, TestSuite, TestSuiteMetadata from lisa.operating_system import CBLMariner, Ubuntu from lisa.testsuite import TestResult -from lisa.tools import Lscpu +from lisa.tools import Dmesg, Journalctl, Lscpu from lisa.util import SkippedException from microsoft.testsuites.libvirt.libvirt_tck_tool import LibvirtTck @@ -34,6 +34,18 @@ def before_case(self, log: Logger, **kwargs: Any) -> None: if not virtualization_enabled: raise SkippedException("Virtualization is not enabled in hardware") + def after_case(self, log: Logger, **kwargs: Any) -> None: + node = kwargs["node"] + dmesg = node.tools[Dmesg] + dmesg.get_output(force_run=True) + + journalctl = node.tools[Journalctl] + libvirt_log = journalctl.logs_for_unit( + unit_name="libvirtd", + sudo=True, + ) + log.debug(f"Journalctl libvirt Logs: {libvirt_log}") + @TestCaseMetadata( description=""" Runs the Libvirt TCK (Technology Compatibility Kit) tests with the default diff --git a/microsoft/testsuites/libvirt/libvirt_tck_tool.py b/microsoft/testsuites/libvirt/libvirt_tck_tool.py index b4f23a8b58..42a5f546b3 100644 --- a/microsoft/testsuites/libvirt/libvirt_tck_tool.py +++ b/microsoft/testsuites/libvirt/libvirt_tck_tool.py @@ -130,13 +130,13 @@ def _install(self) -> bool: # tell libvirt to run qemu as root libvirt_qemu_conf = PurePath("/etc/libvirt/qemu.conf") self.node.tools[Echo].write_to_file( - 'user = "root"', + 'user = "root"'.replace('"', '\\"'), libvirt_qemu_conf, sudo=True, append=True, ) self.node.tools[Echo].write_to_file( - 'group = "root"', + 'group = "root"'.replace('"', '\\"'), libvirt_qemu_conf, sudo=True, append=True, From 8616100a1c484a72750e83aeb91df2a20a516145 Mon Sep 17 00:00:00 2001 From: bhagyapathak Date: Mon, 26 Aug 2024 09:29:52 +0000 Subject: [PATCH 09/17] Fix test cases for Azure Linux 3.0 --- lisa/tools/netperf.py | 1 + lisa/tools/vdsotest.py | 1 + .../testsuites/core/azure_image_standard.py | 25 +++++++++++++------ microsoft/testsuites/xfstests/xfstests.py | 1 + 4 files changed, 20 insertions(+), 8 deletions(-) diff --git a/lisa/tools/netperf.py b/lisa/tools/netperf.py index 2e990e2542..d98bce857f 100644 --- a/lisa/tools/netperf.py +++ b/lisa/tools/netperf.py @@ -94,6 +94,7 @@ def _install_dep_packages(self) -> None: "binutils", "glibc-devel", "zlib-devel", + "perl-CPAN", "automake", "autoconf", ] diff --git a/lisa/tools/vdsotest.py b/lisa/tools/vdsotest.py index 2084769c08..f7f4f16715 100644 --- a/lisa/tools/vdsotest.py +++ b/lisa/tools/vdsotest.py @@ -56,6 +56,7 @@ def _install_from_src(self) -> bool: "binutils", "glibc-devel", "kernel-headers", + "perl-CPAN", ] ) else: diff --git a/microsoft/testsuites/core/azure_image_standard.py b/microsoft/testsuites/core/azure_image_standard.py index a4883a792c..b32b233447 100644 --- a/microsoft/testsuites/core/azure_image_standard.py +++ b/microsoft/testsuites/core/azure_image_standard.py @@ -737,14 +737,23 @@ def verify_repository_installed(self, node: Node) -> None: # noqa: C901 mariner_repositories = [ cast(RPMRepositoryInfo, repo) for repo in repositories ] - expected_repo_list = [ - "mariner-official-base", - "mariner-official-microsoft", - ] - if 1 == node.os.information.version.major: - expected_repo_list += ["mariner-official-update"] - elif 2 == node.os.information.version.major: - expected_repo_list += ["mariner-official-extras"] + + if 3 == node.os.information.version.major: + expected_repo_list = [ + "azurelinux-official-base", + "azurelinux-official-ms-non-oss", + "azurelinux-official-ms-oss", + ] + else: + expected_repo_list = [ + "mariner-official-base", + "mariner-official-microsoft", + ] + if 1 == node.os.information.version.major: + expected_repo_list += ["mariner-official-update"] + elif 2 == node.os.information.version.major: + expected_repo_list += ["mariner-official-extras"] + for id_ in expected_repo_list: is_repository_present = any( id_ in repository.id for repository in mariner_repositories diff --git a/microsoft/testsuites/xfstests/xfstests.py b/microsoft/testsuites/xfstests/xfstests.py index 3588894073..97d91d2d5b 100644 --- a/microsoft/testsuites/xfstests/xfstests.py +++ b/microsoft/testsuites/xfstests/xfstests.py @@ -113,6 +113,7 @@ class Xfstests(Tool): "kernel-headers", "util-linux-devel", "psmisc", + "perl-CPAN", ] # Passed all 35 tests __all_pass_pattern = re.compile( From 7ed20f8ce13d08d864df2f1ecc5b29f7adff5e07 Mon Sep 17 00:00:00 2001 From: Lili Deng Date: Mon, 26 Aug 2024 22:28:51 +0800 Subject: [PATCH 10/17] add test cases priority --- microsoft/testsuites/core/storage.py | 8 ++++++++ microsoft/testsuites/mshv/mshv_install.py | 3 ++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/microsoft/testsuites/core/storage.py b/microsoft/testsuites/core/storage.py index 967fcbba4c..a3e94c43a6 100644 --- a/microsoft/testsuites/core/storage.py +++ b/microsoft/testsuites/core/storage.py @@ -370,6 +370,7 @@ def verify_os_partition_identifier(self, log: Logger, node: RemoteNode) -> None: 3. Serially add and remove the data disks and verify that the added disks are present in the vm. """, + priority=2, timeout=TIME_OUT, requirement=simple_requirement(disk=DiskStandardHDDLRS()), ) @@ -384,6 +385,7 @@ def verify_hot_add_disk_serial(self, log: Logger, node: Node) -> None: be added serially while the vm is running. The test steps are same as `hot_add_disk_serial`. """, + priority=2, timeout=TIME_OUT, requirement=simple_requirement(disk=DiskStandardSSDLRS()), ) @@ -398,6 +400,7 @@ def verify_hot_add_disk_serial_standard_ssd(self, log: Logger, node: Node) -> No be added serially while the vm is running. The test steps are same as `hot_add_disk_serial`. """, + priority=2, timeout=TIME_OUT, requirement=simple_requirement(disk=DiskPremiumSSDLRS()), ) @@ -418,6 +421,7 @@ def verify_hot_add_disk_serial_premium_ssd(self, log: Logger, node: Node) -> Non 5. Remove the disks from the vm in parallel. 6. Verify that the disks are removed from the OS. """, + priority=2, timeout=TIME_OUT, requirement=simple_requirement(disk=DiskStandardHDDLRS()), ) @@ -432,6 +436,7 @@ def verify_hot_add_disk_parallel(self, log: Logger, node: Node) -> None: be added serially while the vm is running. The test steps are same as `hot_add_disk_parallel`. """, + priority=2, timeout=TIME_OUT, requirement=simple_requirement(disk=DiskStandardSSDLRS()), ) @@ -456,6 +461,7 @@ def verify_hot_add_disk_parallel_standard_ssd( 7. Verify that 1 disk is removed from the OS. 8. Repeat steps 6 & 7 till all randomly attached disks are removed. """, + priority=2, timeout=TIME_OUT, requirement=simple_requirement(disk=DiskStandardSSDLRS()), ) @@ -484,6 +490,7 @@ def verify_hot_add_disk_serial_random_lun_standard_ssd( 7. Verify that 1 disk is removed from the OS. 8. Repeat steps 6 & 7 till all randomly attached disks are removed. """, + priority=2, timeout=TIME_OUT, requirement=simple_requirement(disk=DiskStandardSSDLRS()), ) @@ -504,6 +511,7 @@ def verify_hot_add_disk_serial_random_lun_premium_ssd( be added serially while the vm is running. The test steps are same as `hot_add_disk_parallel`. """, + priority=2, timeout=TIME_OUT, requirement=simple_requirement(disk=DiskPremiumSSDLRS()), ) diff --git a/microsoft/testsuites/mshv/mshv_install.py b/microsoft/testsuites/mshv/mshv_install.py index 03eabdf25a..5a0494b2ff 100644 --- a/microsoft/testsuites/mshv/mshv_install.py +++ b/microsoft/testsuites/mshv/mshv_install.py @@ -41,7 +41,8 @@ class MshvHostInstallSuite(TestSuite): The test expects the directory containing MSHV binaries to be passed in the mshv_binpath variable. - """ + """, + priority=2, ) def verify_mshv_install_succeeds( self, From 8e396c5539d352e1868bc30437e7dd6f810f8982 Mon Sep 17 00:00:00 2001 From: Baihua Lu Date: Tue, 27 Aug 2024 13:59:34 +0800 Subject: [PATCH 11/17] Add some permissions for AITL for VM resize, extension test cases --- microsoft/utils/setup_aitl.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/microsoft/utils/setup_aitl.py b/microsoft/utils/setup_aitl.py index 4cdf5b394b..f495221185 100644 --- a/microsoft/utils/setup_aitl.py +++ b/microsoft/utils/setup_aitl.py @@ -233,9 +233,18 @@ def _set_target_role_parameters( "Microsoft.Compute/virtualMachines/extensions/read", "Microsoft.Compute/virtualMachines/extensions/write", "Microsoft.Compute/virtualMachines/extensions/delete", + # for verify_vm_assess_patches + "Microsoft.Compute/virtualMachines/assessPatches/action", + # for VM resize test suite + "Microsoft.Compute/virtualMachines/vmSizes/read", # For disk_support_restore_point & verify_vmsnapshot_extension "Microsoft.Compute/restorePointCollections/write", + # For verify_vmsnapshot_extension + "Microsoft.Compute/restorePointCollections/restorePoints/read", + "Microsoft.Compute/restorePointCollections/restorePoints/write", "Microsoft.ManagedIdentity/userAssignedIdentities/write", + # For verify_azsecpack + "Microsoft.ManagedIdentity/userAssignedIdentities/assign/action", "Microsoft.Network/virtualNetworks/read", "Microsoft.Network/virtualNetworks/write", "Microsoft.Network/virtualNetworks/subnets/join/action", From f1bb82e03a37f888cc92e3a3af6ab528c996c1ea Mon Sep 17 00:00:00 2001 From: Lili Deng Date: Tue, 27 Aug 2024 10:27:53 +0800 Subject: [PATCH 12/17] Use default_factory to initialize ConnectionInfo --- lisa/sut_orchestrator/baremetal/context.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lisa/sut_orchestrator/baremetal/context.py b/lisa/sut_orchestrator/baremetal/context.py index fe68db647a..aef161c544 100644 --- a/lisa/sut_orchestrator/baremetal/context.py +++ b/lisa/sut_orchestrator/baremetal/context.py @@ -1,4 +1,4 @@ -from dataclasses import dataclass +from dataclasses import dataclass, field from lisa import schema from lisa.environment import Environment @@ -14,7 +14,7 @@ class EnvironmentContext: @dataclass class NodeContext: - connection: schema.ConnectionInfo = schema.ConnectionInfo(password="mock") + connection: schema.ConnectionInfo = field(default_factory=schema.ConnectionInfo) @dataclass From b389c3c4510b29cb13c64a9cf14093edd6bed0e7 Mon Sep 17 00:00:00 2001 From: Lili Deng Date: Tue, 27 Aug 2024 15:16:36 +0800 Subject: [PATCH 13/17] support detect vmware esxi --- lisa/executable.py | 17 +++++++++++++++-- lisa/operating_system.py | 12 ++++++++++++ lisa/tools/lscpu.py | 41 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 68 insertions(+), 2 deletions(-) diff --git a/lisa/executable.py b/lisa/executable.py index 46185465bf..9a085f533c 100644 --- a/lisa/executable.py +++ b/lisa/executable.py @@ -26,7 +26,6 @@ if TYPE_CHECKING: from lisa.node import Node - T = TypeVar("T") @@ -168,6 +167,10 @@ def create(cls, node: Node, *args: Any, **kwargs: Any) -> Tool: freebsd_tool = cls._freebsd_tool() if freebsd_tool: tool_cls = freebsd_tool + elif "VMWareESXi" in node.os.name: + vmware_esxi_tool = cls._vmware_esxi_tool() + if vmware_esxi_tool: + tool_cls = vmware_esxi_tool return tool_cls(node, *args, **kwargs) @classmethod @@ -184,11 +187,21 @@ def _freebsd_tool(cls) -> Optional[Type[Tool]]: """ return None + @classmethod + def _vmware_esxi_tool(cls) -> Optional[Type[Tool]]: + """ + return a vmware esxi version tool class, if it's needed + """ + return None + def command_exists(self, command: str) -> Tuple[bool, bool]: exists = False use_sudo = False if self.node.is_posix: - where_command = "command -v" + if "VMWareESXi" in self.node.os.name: + where_command = "which" + else: + where_command = "command -v" else: where_command = "where" where_command = f"{where_command} {command}" diff --git a/lisa/operating_system.py b/lisa/operating_system.py index b83d5b294b..5bb44da351 100644 --- a/lisa/operating_system.py +++ b/lisa/operating_system.py @@ -128,6 +128,9 @@ class OperatingSystem: __release_pattern = re.compile(r"^DISTRIB_ID='?([^ \n']+).*$", re.M) __suse_release_pattern = re.compile(r"^(SUSE).*$", re.M) __bmc_release_pattern = re.compile(r".*(wcscli).*$", re.M) + # VMware ESXi 8.0.2 build-23305546 + # VMware ESXi 8.0 Update 2 + __vmware_esxi_release_pattern = re.compile(r"^(VMware ESXi).*$", re.M) __posix_factory: Optional[Factory[Any]] = None @@ -250,6 +253,9 @@ def _get_detect_string(cls, node: Any) -> Iterable[str]: cmd_result = typed_node.execute(cmd="wcscli", no_error_log=True) yield get_matched_str(cmd_result.stdout, cls.__bmc_release_pattern) + cmd_result = typed_node.execute(cmd="vmware -lv", no_error_log=True) + yield get_matched_str(cmd_result.stdout, cls.__vmware_esxi_release_pattern) + # try best from distros'family through ID_LIKE yield get_matched_str( cmd_result_os_release.stdout, cls.__os_release_pattern_idlike @@ -681,6 +687,12 @@ def name_pattern(cls) -> Pattern[str]: return re.compile("^wcscli$") +class VMWareESXi(Posix): + @classmethod + def name_pattern(cls) -> Pattern[str]: + return re.compile("^VMware ESXi$") + + class MacOS(Posix): @classmethod def name_pattern(cls) -> Pattern[str]: diff --git a/lisa/tools/lscpu.py b/lisa/tools/lscpu.py index 88bc49463f..7f7bac8362 100644 --- a/lisa/tools/lscpu.py +++ b/lisa/tools/lscpu.py @@ -107,6 +107,10 @@ def command(self) -> str: def _windows_tool(cls) -> Optional[Type[Tool]]: return WindowsLscpu + @classmethod + def _vmware_esxi_tool(cls) -> Optional[Type[Tool]]: + return VMWareESXiLscpu + @classmethod def _freebsd_tool(cls) -> Optional[Type[Tool]]: return BSDLscpu @@ -438,3 +442,40 @@ def calculate_vcpu_count(self, force_run: bool = False) -> int: * self.get_cluster_count() * self.get_thread_per_core_count() ) + + +class VMWareESXiLscpu(Lscpu): + # CPU Threads: 208 + __cpu_threads = re.compile(r"CPU Threads:[ ]+([\d]+)?", re.M) + # CPU Packages: 2 + __cpu_packages = re.compile(r"CPU Packages:[ ]+([\d]+)?", re.M) + # CPU Cores: 104 + __cpu_cores = re.compile(r"CPU Cores:[ ]+([\d]+)?", re.M) + + @property + def command(self) -> str: + return "esxcli" + + def get_core_count(self, force_run: bool = False) -> int: + result = self.run("hardware cpu global get", force_run) + matched = self.__cpu_threads.findall(result.stdout) + assert_that( + len(matched), + f"cpu thread should have exact one line, but got {matched}", + ).is_equal_to(1) + self._core_count = int(matched[0]) + return self._core_count + + def calculate_vcpu_count(self, force_run: bool = False) -> int: + result = self.run("hardware cpu global get", force_run) + matched_cpu_packages = self.__cpu_packages.findall(result.stdout) + assert_that( + len(matched_cpu_packages), + f"cpu packages should have exact one line, but got {matched_cpu_packages}", + ).is_equal_to(1) + matched_cpu_cores = self.__cpu_cores.findall(result.stdout) + assert_that( + len(matched_cpu_cores), + f"cpu cores should have exact one line, but got {matched_cpu_cores}", + ).is_equal_to(1) + return int(matched_cpu_packages[0]) * int(matched_cpu_cores[0]) From d97439f4afe688a4e410b3090ab0dac3b09c5da1 Mon Sep 17 00:00:00 2001 From: Smit Gardhariya <113590758+smit-gardhariya@users.noreply.github.com> Date: Mon, 2 Sep 2024 20:09:41 +0530 Subject: [PATCH 14/17] Set env var for bzImage for CH tests (#3404) Read the testcase variable for bzImage and set it accordingly under env var while running the testcase. This is needed to test the CH tests with shipped bzImage. Signed-off-by: Smit Gardhariya --- microsoft/testsuites/cloud_hypervisor/ch_tests.py | 3 +++ microsoft/testsuites/cloud_hypervisor/ch_tests_tool.py | 3 +++ 2 files changed, 6 insertions(+) diff --git a/microsoft/testsuites/cloud_hypervisor/ch_tests.py b/microsoft/testsuites/cloud_hypervisor/ch_tests.py index ee0f77745e..5000b33fb0 100644 --- a/microsoft/testsuites/cloud_hypervisor/ch_tests.py +++ b/microsoft/testsuites/cloud_hypervisor/ch_tests.py @@ -199,6 +199,7 @@ def _set_ms_clh_param(self, variables: Dict[str, Any]) -> None: use_ms_guest_kernel = variables.get("use_ms_guest_kernel", "NO") use_ms_hypervisor_fw = variables.get("use_ms_hypervisor_fw", "NO") use_ms_ovmf_fw = variables.get("use_ms_ovmf_fw", "NO") + use_ms_bz_image = variables.get("use_ms_bz_image", "NO") # Below three params are for running block_* clh perf test # with no disk caching and with direct mode. By Default, we @@ -224,6 +225,8 @@ def _set_ms_clh_param(self, variables: Dict[str, Any]) -> None: CloudHypervisorTests.use_ms_hypervisor_fw = use_ms_hypervisor_fw if use_ms_ovmf_fw == "YES": CloudHypervisorTests.use_ms_ovmf_fw = use_ms_ovmf_fw + if use_ms_bz_image == "YES": + CloudHypervisorTests.use_ms_bz_image = use_ms_bz_image if block_size_kb: CloudHypervisorTests.block_size_kb = block_size_kb diff --git a/microsoft/testsuites/cloud_hypervisor/ch_tests_tool.py b/microsoft/testsuites/cloud_hypervisor/ch_tests_tool.py index ecf7eda463..c370dfa8b4 100644 --- a/microsoft/testsuites/cloud_hypervisor/ch_tests_tool.py +++ b/microsoft/testsuites/cloud_hypervisor/ch_tests_tool.py @@ -58,6 +58,7 @@ class CloudHypervisorTests(Tool): use_ms_guest_kernel = "" use_ms_hypervisor_fw = "" use_ms_ovmf_fw = "" + use_ms_bz_image = "" # Block perf related env var use_datadisk = "" @@ -258,6 +259,8 @@ def _install(self) -> bool: self.env_vars["USE_MS_HV_FW"] = self.use_ms_hypervisor_fw if self.use_ms_ovmf_fw: self.env_vars["USE_MS_OVMF_FW"] = self.use_ms_ovmf_fw + if self.use_ms_bz_image: + self.env_vars["USE_MS_BZ_IMAGE"] = self.use_ms_bz_image if self.use_datadisk: self.env_vars["USE_DATADISK"] = self.use_datadisk From 965f6770af63e000a789019a3b6c51e5b6b54ae7 Mon Sep 17 00:00:00 2001 From: Smit Gardhariya <113590758+smit-gardhariya@users.noreply.github.com> Date: Tue, 3 Sep 2024 17:27:12 +0530 Subject: [PATCH 15/17] Add device passthrough support for libvirt/hyperv platforms (#3379) * Add domain/vendor-device id for lspci tool Restructure device properties extraction using LISA's inbuilt method Add support to get domain/vendor-id/device-id for PCI devices Add support to query the tool to provide list for all devices by vendor/device id Signed-off-by: Smit Gardhariya * Add stop/start/restart/pass-through for VM under hyperv tool Extend hyperv tool to stop/start/restart VM Extend hyperv tool to configure pre-requisite to enable device pass-through for VM Signed-off-by: Smit Gardhariya * Implement common device pool for SUT platforms Implement common device pool and schema for SUT platforms. Signed-off-by: Smit Gardhariya * Add device passthrough support for hyperv platform Implement Device Pool as per runbook Schema change for runbook to get the device pool details Add powershell script to get all assignable devices Set the node context as per requirement platform: - type: hyperv hyperv: device_pools: - type: "pci_net" devices: - vendor_id: xxx device_id: xxx requirement: hyperv: device_passthrough: - pool_types: "pci_net" count: 1 Signed-off-by: Smit Gardhariya * Add device passthrough support for libvirt platform Implement device passthrough for libvirt platform Use device pool and extend the functionality for libvirt platform Schema change to configure device pool from runbook Set each node context based on requirement for device pass-through platform: - type: cloud-hypervisor cloud-hypervisor: device_pools: - type: "pci_net" devices: - vendor_id: xxx device_id: xxx requirement: cloud-hypervisor: device_passthrough: - pool_type: "pci_net" managed: "yes" count: 1 Signed-off-by: Smit Gardhariya --------- Signed-off-by: Smit Gardhariya --- lisa/sut_orchestrator/hyperv/context.py | 19 +- .../hyperv/get_assignable_devices.py | 359 ++++++++++++++++++ .../hyperv/hyperv_device_pool.py | 220 +++++++++++ lisa/sut_orchestrator/hyperv/platform_.py | 28 +- lisa/sut_orchestrator/hyperv/schema.py | 16 + lisa/sut_orchestrator/libvirt/ch_platform.py | 13 + lisa/sut_orchestrator/libvirt/context.py | 17 +- .../libvirt/libvirt_device_pool.py | 348 +++++++++++++++++ lisa/sut_orchestrator/libvirt/platform.py | 29 ++ lisa/sut_orchestrator/libvirt/schema.py | 26 ++ lisa/sut_orchestrator/util/__init__.py | 0 lisa/sut_orchestrator/util/device_pool.py | 65 ++++ lisa/sut_orchestrator/util/schema.py | 32 ++ lisa/tools/hyperv.py | 31 +- lisa/tools/lspci.py | 80 +++- 15 files changed, 1266 insertions(+), 17 deletions(-) create mode 100644 lisa/sut_orchestrator/hyperv/get_assignable_devices.py create mode 100644 lisa/sut_orchestrator/hyperv/hyperv_device_pool.py create mode 100644 lisa/sut_orchestrator/libvirt/libvirt_device_pool.py create mode 100644 lisa/sut_orchestrator/util/__init__.py create mode 100644 lisa/sut_orchestrator/util/device_pool.py create mode 100644 lisa/sut_orchestrator/util/schema.py diff --git a/lisa/sut_orchestrator/hyperv/context.py b/lisa/sut_orchestrator/hyperv/context.py index 6bea260b73..b38978abdc 100644 --- a/lisa/sut_orchestrator/hyperv/context.py +++ b/lisa/sut_orchestrator/hyperv/context.py @@ -1,14 +1,24 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. -from dataclasses import dataclass +from dataclasses import dataclass, field from pathlib import PurePath -from typing import Optional +from typing import List, Optional from lisa import Node, RemoteNode +from lisa.sut_orchestrator.hyperv.schema import DeviceAddressSchema +from lisa.sut_orchestrator.util.schema import HostDevicePoolType from lisa.util.process import Process +@dataclass +class DevicePassthroughContext: + pool_type: HostDevicePoolType = HostDevicePoolType.PCI_NIC + device_list: List[DeviceAddressSchema] = field( + default_factory=list, + ) + + @dataclass class NodeContext: vm_name: str = "" @@ -16,6 +26,11 @@ class NodeContext: working_path = PurePath() serial_log_process: Optional[Process] = None + # Device pass through configuration + passthrough_devices: List[DevicePassthroughContext] = field( + default_factory=list, + ) + @property def console_log_path(self) -> PurePath: return self.working_path / f"{self.vm_name}-console.log" diff --git a/lisa/sut_orchestrator/hyperv/get_assignable_devices.py b/lisa/sut_orchestrator/hyperv/get_assignable_devices.py new file mode 100644 index 0000000000..98bc9224d5 --- /dev/null +++ b/lisa/sut_orchestrator/hyperv/get_assignable_devices.py @@ -0,0 +1,359 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. +# Refer: https://learn.microsoft.com/en-us/windows-server/virtualization/hyper-v/deploy/deploying-graphics-devices-using-dda # noqa E501 +import re +from typing import Dict, List, Optional + +from lisa.node import Node +from lisa.tools import PowerShell +from lisa.util import LisaException, find_group_in_lines, find_groups_in_lines +from lisa.util.logger import Logger + +from .schema import DeviceAddressSchema + + +class HypervAssignableDevices: + PKEY_DEVICE_TYPE = "{3AB22E31-8264-4b4e-9AF5-A8D2D8E33E62} 1" + PKEY_BASE_CLASS = "{3AB22E31-8264-4b4e-9AF5-A8D2D8E33E62} 3" + PKEY_REQUIRES_RESERVED_MEMORY_REGION = "{3AB22E31-8264-4b4e-9AF5-A8D2D8E33E62} 34" # noqa E501 + PKEY_ACS_COMPATIBLE_UP_HIERARCHY = "{3AB22E31-8264-4b4e-9AF5-A8D2D8E33E62} 31" # noqa E501 + PROP_DEVICE_TYPE_PCI_EXPRESS_ENDPOINT = "2" + PROP_DEVICE_TYPE_PCI_EXPRESS_LEGACY_ENDPOINT = "3" + PROP_DEVICE_TYPE_PCI_EXPRESS_ROOT_COMPLEX_INTEGRATED_ENDPOINT = "4" + PROP_DEVICE_TYPE_PCI_EXPRESS_TREATED_AS_PCI = "5" + PROP_ACS_COMPATIBLE_UP_HIERARCHY_NOT_SUPPORTED = "0" + PROP_BASE_CLASS_DISPLAY_CTRL = "3" + + def __init__(self, host_node: Node, log: Logger): + self.host_node = host_node + self.log = log + self.pwsh = self.host_node.tools[PowerShell] + self.pnp_allocated_resources: List[Dict[str, str]] = ( + self.__load_pnp_allocated_resources() + ) + + def get_assignable_devices( + self, + vendor_id: str, + device_id: str, + ) -> List[DeviceAddressSchema]: + device_id_list = self.__get_devices_by_vendor_device_id( + vendor_id=vendor_id, device_id=device_id + ) + + devices: List[DeviceAddressSchema] = [] + for rec in device_id_list: + device_id = rec["device_id"] + result = self.__get_dda_properties(device_id=device_id) + if result: + result.friendly_name = rec["friendly_name"] + devices.append(result) + return devices + + def __get_devices_by_vendor_device_id( + self, + vendor_id: str, + device_id: str, + ) -> List[Dict[str, str]]: + """ + Get the device ID list for given vendor/device ID combination + """ + devices: List[Dict[str, str]] = [] + device_regex = re.compile( + r"Description\s+:\s*(?P.+)\n.*DeviceID\s+:\s*(?P.+)" + ) + + cmd = ( + "Get-WmiObject Win32_PnPEntity -Filter " + f"\"DeviceID LIKE 'PCI\\\\VEN_{vendor_id}&DEV_{device_id}%'\"" + ) + stdout = self.pwsh.run_cmdlet( + cmdlet=cmd, + force_run=True, + sudo=True, + ) + + devices_str = stdout.strip().split("\r\n\r\n") + filtered_devices = [i.strip() for i in devices_str if i.strip() != ""] + for device_properties in filtered_devices: + res = find_group_in_lines( + lines=device_properties, + pattern=device_regex, + single_line=False, + ) + if not res: + raise LisaException("Can not extract DeviceId/Description") + + devices.append({ + "device_id": res["device_id"].strip(), + "friendly_name": res["desc"].strip(), + }) + return devices + + def __get_pnp_device_property(self, device_id: str, property_name: str) -> str: + """ + Retrieve a PnP device property by instance ID and property key. + """ + cmd = ( + "(Get-PnpDeviceProperty -InstanceId " + f"'{device_id}' '{property_name}').Data" + ) + + output = self.pwsh.run_cmdlet( + cmdlet=cmd, + sudo=True, + force_run=True, + ) + return output.strip() + + def __load_pnp_allocated_resources(self) -> List[Dict[str, str]]: + # Command output result (just 2 device properties) + # ======================================================== + # __GENUS : 2 + # __CLASS : Win32_PNPAllocatedResource + # __SUPERCLASS : CIM_AllocatedResource + # __DYNASTY : CIM_Dependency + # __RELPATH : Win32_PNPAllocatedResource.Antecedent="\\\\WIN-2IDCNC2D5V + # C\\root\\cimv2:Win32_DeviceMemoryAddress.StartingAddress=\ + # "2463203328\"",Dependent="\\\\WIN-2IDCNC2D5VC\\root\\cimv2: + # Win32_PnPEntity.DeviceID=\"PCI\\\\VEN_8086&DEV_A1A3& + # SUBSYS_07161028&REV_09\\\\3&11583659&0&FC\"" + # __PROPERTY_COUNT : 2 + # __DERIVATION : {CIM_AllocatedResource, CIM_Dependency} + # __SERVER : WIN-2IDCNC2D5VC + # __NAMESPACE : root\cimv2 + # __PATH : \\WIN-2IDCNC2D5VC\root\cimv2:Win32_PNPAllocatedResource. + # Antecedent="\\\\WIN-2IDCNC2D5VC\\root\\cimv2:Win32_ + # DeviceMemoryAddress.StartingAddress=\"2463203328\"", + # Dependent="\\\\WIN-2IDCNC2D5VC\\root\\cimv2:Win32_PnP + # Entity.DeviceID=\"PCI\\\\VEN_8086&DEV_A1A3&SUBSYS_07161028& + # REV_09\\\\3&11583659&0&FC\"" + # Antecedent : \\WIN-2IDCNC2D5VC\root\cimv2:Win32_DeviceMemoryAddress. + # StartingAddress="2463203328" + # Dependent : \\WIN-2IDCNC2D5VC\root\cimv2:Win32_PnPEntity.DeviceID= + # "PCI\\VEN_8086&DEV_A1A3&SUBSYS_07161028&REV_09\\3 + # &11583659&0&FC" + # PSComputerName : WIN-2IDCNC2D5VC + + # __GENUS : 2 + # __CLASS : Win32_PNPAllocatedResource + # __SUPERCLASS : CIM_AllocatedResource + # __DYNASTY : CIM_Dependency + # __RELPATH : Win32_PNPAllocatedResource.Antecedent="\\\\WIN-2IDCNC2D5VC + # \\root\\cimv2:Win32_PortResource.StartingAddress=\"8192\"", + # Dependent="\\\\WIN-2IDCNC2D5VC\\root\\cimv2:Win32_PnPEntity + # .DeviceID=\"PCI\\\\VEN_8086&DEV_A1A3&SUBSYS_07161028&REV_09 + # \\\\3&11583659&0&FC\"" + # __PROPERTY_COUNT : 2 + # __DERIVATION : {CIM_AllocatedResource, CIM_Dependency} + # __SERVER : WIN-2IDCNC2D5VC + # __NAMESPACE : root\cimv2 + # __PATH : \\WIN-2IDCNC2D5VC\root\cimv2:Win32_PNPAllocatedResource. + # Antecedent="\\\\WIN-2IDCNC2D5VC\\root\\cimv2:Win32_PortR + # esource.StartingAddress=\"8192\"",Dependent="\\\\WIN-2ID + # CNC2D5VC\\root\\cimv2:Win32_PnPEntity.DeviceID=\"PCI\\\\ + # VEN_8086&DEV_A1A3&SUBSYS_07161028&REV_09\\\\3&11 + # 583659&0&FC\"" + # Antecedent : \\WIN-2IDCNC2D5VC\root\cimv2:Win32_PortResource. + # StartingAddress="8192" + # Dependent : \\WIN-2IDCNC2D5VC\root\cimv2:Win32_PnPEntity.DeviceID= + # "PCI\\VEN_8086&DEV_A1A3&SUBSYS_07161028&REV_09\\3& + # 11583659&0&FC" + # PSComputerName : WIN-2IDCNC2D5VC + + stdout = self.pwsh.run_cmdlet( + cmdlet="gwmi -query 'select * from Win32_PnPAllocatedResource'", + sudo=True, + force_run=True, + ) + pnp_allocated_resources = stdout.strip().split("\r\n\r\n") + result: List[Dict[str, str]] = [] + # Regular expression to match the key-value pairs + pattern = re.compile(r'(?P\S+)\s*:\s*(?P.*?)(?=\n\S|\Z)', re.DOTALL) + + for rec in pnp_allocated_resources: + extract_val = {} + matches = find_groups_in_lines( + lines=rec.strip(), + pattern=pattern, + single_line=False, + ) + if matches: + for element in matches: + key = element["key"] + val = element["value"] + val = val.replace(" ", "") + val = val.replace("\r\n", "") + extract_val[key] = val + result.append(extract_val) + return result + + def __get_mmio_end_address(self, start_addr: str) -> Optional[str]: + # MemoryType Name Status + # ---------- ---- ------ + # WindowDecode 0xE1800000-0xE1BFFFFF OK + # 0xE2000000-0xE2000FFF OK + # WindowDecode 0xD4000000-0xD43FFFFF OK + # 0xD4800000-0xD4800FFF OK + # 0xFED1C000-0xFED3FFFF OK + + device_mem_addr = self.pwsh.run_cmdlet( + cmdlet="gwmi -query 'select * from Win32_DeviceMemoryAddress'", + sudo=True, + force_run=True, + ) + end_addr_rec = None + for rec in device_mem_addr.splitlines(): + rec = rec.strip() + if rec.find(start_addr) >= 0: + addr = rec.split("-") + start_addr_rec = addr[0].split()[-1] + end_addr_rec = addr[1].split()[0].strip() + + err = "MMIO Starting address not matching" + assert start_addr == start_addr_rec, err + break + return end_addr_rec + + def __get_dda_properties(self, device_id: str) -> Optional[DeviceAddressSchema]: + """ + Determine if a PCI device is assignable using Discrete Device Assignment (DDA) + If so, get DDA proerprties like locationpath, device-id, friendly name + """ + self.log.debug(f"PCI InstanceId: {device_id}") + + rmrr = self.__get_pnp_device_property( + device_id=device_id, + property_name=self.PKEY_REQUIRES_RESERVED_MEMORY_REGION, + ) + rmrr = rmrr.strip() + if rmrr != "False": + self.log.debug( + "BIOS requires that this device remain attached to BIOS-owned memory." + "Not assignable." + ) + return None + + acs_up = self.__get_pnp_device_property( + device_id=device_id, + property_name=self.PKEY_ACS_COMPATIBLE_UP_HIERARCHY, + ) + acs_up = acs_up.strip() + if acs_up == self.PROP_ACS_COMPATIBLE_UP_HIERARCHY_NOT_SUPPORTED: + self.log.debug( + "Traffic from this device may be redirected to other devices in " + "the system. Not assignable." + ) + return None + + dev_type = self.__get_pnp_device_property( + device_id=device_id, + property_name=self.PKEY_DEVICE_TYPE + ) + dev_type = dev_type.strip() + if dev_type == self.PROP_DEVICE_TYPE_PCI_EXPRESS_ENDPOINT: + self.log.debug("Express Endpoint -- more secure.") + else: + if dev_type == ( + self.PROP_DEVICE_TYPE_PCI_EXPRESS_ROOT_COMPLEX_INTEGRATED_ENDPOINT + ): + self.log.debug("Embedded Endpoint -- less secure.") + elif dev_type == self.PROP_DEVICE_TYPE_PCI_EXPRESS_LEGACY_ENDPOINT: + dev_base_class = self.__get_pnp_device_property( + device_id=device_id, + property_name=self.PKEY_BASE_CLASS, + ) + dev_base_class = dev_base_class.strip() + if dev_base_class == self.PROP_BASE_CLASS_DISPLAY_CTRL: + self.log.debug("Legacy Express Endpoint -- graphics controller.") + else: + self.log.debug("Legacy, non-VGA PCI device. Not assignable.") + return None + else: + if dev_type == self.PROP_DEVICE_TYPE_PCI_EXPRESS_TREATED_AS_PCI: + self.log.debug( + "BIOS kept control of PCI Express for this device. " + "Not assignable." + ) + else: + self.log.debug( + "Old-style PCI device, switch port, etc. " + "Not assignable." + ) + return None + + # Get the device location path + location_path = self.__get_pnp_device_property( + device_id=device_id, + property_name="DEVPKEY_Device_LocationPaths", + ) + location_path = location_path.strip().splitlines()[0] + self.log.debug(f"Device locationpath: {location_path}") + assert location_path.find("PCI") == 0, "Location path is wrong" + + cmd = ( + "(Get-PnpDevice -PresentOnly -InstanceId " + f"'{device_id}').ConfigManagerErrorCode" + ) + conf_mng_err_code = self.pwsh.run_cmdlet( + cmdlet=cmd, + force_run=True, + sudo=True, + ) + conf_mng_err_code = conf_mng_err_code.strip() + self.log.debug(f"ConfigManagerErrorCode: {conf_mng_err_code}") + if conf_mng_err_code == "CM_PROB_DISABLED": + self.log.debug( + "Device is Disabled, unable to check resource requirements, " + "it may be assignable." + ) + self.log.debug("Enable the device and rerun this script to confirm.") + return None + + irq_assignements = [ + i for i in self.pnp_allocated_resources + if i["Dependent"].find(device_id.replace("\\", "\\\\")) >= 0 + ] + if irq_assignements: + msi_assignments = [ + i for i in self.pnp_allocated_resources + if i["Antecedent"].find("IRQNumber=42949") >= 0 + ] + if not msi_assignments: + self.log.debug( + "All of the interrupts are line-based, no assignment can work." + ) + return None + else: + self.log.debug("Its interrupts are message-based, assignment can work.") + else: + self.log.debug("It has no interrupts at all -- assignment can work.") + + mmio_assignments = [ + i for i in self.pnp_allocated_resources + if i["Dependent"].find(device_id.replace("\\", "\\\\")) >= 0 + and i["__RELPATH"].find("Win32_DeviceMemoryAddres") >= 0 + ] + mmio_total = 0 + if mmio_assignments: + for rec in mmio_assignments: + antecedent_val = rec["Antecedent"] + addresses = antecedent_val.split('"') + assert len(addresses) >= 2, "Antecedent: Can't get MMIO Start Address" + start_address = hex(int(addresses[1].strip())).upper() + start_address_hex = start_address.replace("X", "x") + end_address = self.__get_mmio_end_address(start_address_hex) + assert end_address, "Can not get MMIO End Address" + + mmio = int(end_address, 16) - int(start_address, 16) + mmio_total += mmio + if mmio_total: + mmio_total = round(mmio_total / (1024 * 1024)) + self.log.debug(f"Device '{device_id}', Total MMIO = {mmio_total}MB ") + else: + self.log.debug("It has no MMIO space") + + device = DeviceAddressSchema() + device.location_path = location_path + device.instance_id = device_id + return device diff --git a/lisa/sut_orchestrator/hyperv/hyperv_device_pool.py b/lisa/sut_orchestrator/hyperv/hyperv_device_pool.py new file mode 100644 index 0000000000..c90718dd63 --- /dev/null +++ b/lisa/sut_orchestrator/hyperv/hyperv_device_pool.py @@ -0,0 +1,220 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. +from typing import Dict, List, Optional + +from lisa.node import RemoteNode +from lisa.sut_orchestrator.hyperv.get_assignable_devices import HypervAssignableDevices +from lisa.sut_orchestrator.hyperv.schema import ( + DeviceAddressSchema, + HypervNodeSchema, + HypervPlatformSchema, +) +from lisa.sut_orchestrator.util.device_pool import BaseDevicePool +from lisa.sut_orchestrator.util.schema import HostDevicePoolSchema, HostDevicePoolType +from lisa.tools import HyperV, PowerShell +from lisa.util import ResourceAwaitableException +from lisa.util.logger import Logger + +from .context import DevicePassthroughContext, NodeContext + + +class HyperVDevicePool(BaseDevicePool): + def __init__( + self, + node: RemoteNode, + runbook: HypervPlatformSchema, + log: Logger, + ) -> None: + # Device Passthrough configs + # Mapping of Host Device Passthrough + self.available_host_devices: Dict[ + HostDevicePoolType, List[DeviceAddressSchema] + ] = {} + self.supported_pool_type = [ + HostDevicePoolType.PCI_NIC, + HostDevicePoolType.PCI_GPU, + ] + self._server = node + self._hyperv_runbook = runbook + self.log = log + + def create_device_pool( + self, + pool_type: HostDevicePoolType, + vendor_id: str, + device_id: str, + ) -> None: + hv_dev = HypervAssignableDevices( + host_node=self._server, + log=self.log, + ) + devices = hv_dev.get_assignable_devices( + vendor_id=vendor_id, + device_id=device_id, + ) + primary_nic_id_list = self.get_primary_nic_id() + pool = self.available_host_devices.get(pool_type, []) + for dev in devices: + if dev.instance_id not in primary_nic_id_list: + pool.append(dev) + self.available_host_devices[pool_type] = pool + + def request_devices( + self, + pool_type: HostDevicePoolType, + count: int, + ) -> List[DeviceAddressSchema]: + pool = self.available_host_devices[pool_type] + if len(pool) < count: + raise ResourceAwaitableException( + f"Not enough devices are available under pool: {pool_type}. " + f"Required count is {count}" + ) + devices = pool[:count] + + # Update the pool + pool = pool[count:] + self.available_host_devices[pool_type] = pool + + return devices + + def release_devices( + self, + node_context: NodeContext, + ) -> None: + vm_name = node_context.vm_name + devices_ctx = node_context.passthrough_devices + confing_commands = [] + for ctx in devices_ctx: + for device in ctx.device_list: + confing_commands.append( + f"Remove-VMAssignableDevice " + f"-LocationPath '{device.location_path}' -VMName '{vm_name}'" + ) + confing_commands.append( + f"Mount-VMHostAssignableDevice -LocationPath " + f"'{device.location_path}'" + ) + confing_commands.append( + f"Enable-PnpDevice -InstanceId '{device.instance_id}' " + "-Confirm:$false" + ) + + powershell = self._server.tools[PowerShell] + for cmd in confing_commands: + powershell.run_cmdlet( + cmdlet=cmd, + force_run=True, + ) + + def get_primary_nic_id(self) -> List[str]: + powershell = self._server.tools[PowerShell] + ip: str = self._server.public_address + + # Get the NIC name via IP. + # We will get vEthernet switch interface name, not actual NIC for baremetal + cmd = ( + "(Get-NetAdapter | Get-NetIPAddress | Where-Object " + f"{{ $_.IPAddress -eq '{ip}' }}).InterfaceAlias" + ) + interface_name = powershell.run_cmdlet( + cmdlet=cmd, + force_run=True, + ) + + # Get the MAC for above interface + cmd = ( + "(Get-NetAdapter | Where-Object " + f"{{ $_.Name -eq '{interface_name}' }}).MacAddress" + ) + mac_address = powershell.run_cmdlet( + cmdlet=cmd, + force_run=True, + ) + + # Get all interfaces for above MAC Address + cmd = ( + "(Get-NetAdapter | Where-Object " + f"{{ $_.MacAddress -eq '{mac_address}' }}).Name" + ) + inf_names_str = powershell.run_cmdlet( + cmdlet=cmd, + force_run=True, + ) + inf_names: List[str] = inf_names_str.strip().splitlines() + + # Get device id for all above interface names we got + pnp_device_id_list: List[str] = [] + for name in inf_names: + cmd = ( + "(Get-NetAdapter | Where-Object " + f"{{ $_.Name -eq '{name}' }}).PnPDeviceID" + ) + interface_device_id = powershell.run_cmdlet( + cmdlet=cmd, + force_run=True, + ) + interface_device_id = interface_device_id.strip() + pnp_device_id_list.append(interface_device_id) + + return pnp_device_id_list + + def configure_device_passthrough_pool( + self, + device_configs: Optional[List[HostDevicePoolSchema]], + ) -> None: + super().configure_device_passthrough_pool( + device_configs=device_configs, + ) + + def _assign_devices_to_vm( + self, + vm_name: str, + devices: List[DeviceAddressSchema], + ) -> None: + # Assign the devices to the VM + confing_commands = [] + for device in devices: + confing_commands.append( + f"Disable-PnpDevice -InstanceId '{device.instance_id}' -Confirm:$false" + ) + confing_commands.append( + f"Dismount-VMHostAssignableDevice -Force " + f"-LocationPath '{device.location_path}'" + ) + confing_commands.append( + f"Add-VMAssignableDevice -LocationPath '{device.location_path}' " + f"-VMName '{vm_name}'" + ) + + powershell = self._server.tools[PowerShell] + for cmd in confing_commands: + powershell.run_cmdlet( + cmdlet=cmd, + force_run=True, + ) + + def _set_device_passthrough_node_context( + self, + node_context: NodeContext, + node_runbook: HypervNodeSchema, + hv: HyperV, + vm_name: str, + ) -> None: + if not node_runbook.device_passthrough: + return + hv.enable_device_passthrough(name=vm_name) + + for config in node_runbook.device_passthrough: + devices = self.request_devices( + pool_type=config.pool_type, + count=config.count, + ) + self._assign_devices_to_vm( + vm_name=vm_name, + devices=devices, + ) + device_context = DevicePassthroughContext() + device_context.pool_type = config.pool_type + device_context.device_list = devices + node_context.passthrough_devices.append(device_context) diff --git a/lisa/sut_orchestrator/hyperv/platform_.py b/lisa/sut_orchestrator/hyperv/platform_.py index 11d4dc00b8..fd319ce750 100644 --- a/lisa/sut_orchestrator/hyperv/platform_.py +++ b/lisa/sut_orchestrator/hyperv/platform_.py @@ -1,6 +1,5 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. - from functools import partial from pathlib import PurePath from typing import Any, List, Optional, Type, cast @@ -17,6 +16,7 @@ from .. import HYPERV from .context import NodeContext, get_node_context +from .hyperv_device_pool import HyperVDevicePool from .schema import HypervNodeSchema, HypervPlatformSchema from .serial_console import SerialConsole, SerialConsoleLogger from .source import Source @@ -45,6 +45,12 @@ def _initialize(self, *args: Any, **kwargs: Any) -> None: self._source_factory = Factory[Source](Source) self._source_files: Optional[List[PurePath]] = None + self.device_pool = HyperVDevicePool( + node=self._server, + runbook=self._hyperv_runbook, + log=self._log, + ) + def _get_hyperv_runbook(self) -> HypervPlatformSchema: hyperv_runbook = self.runbook.get_extended_runbook(HypervPlatformSchema) assert hyperv_runbook, "platform runbook cannot be empty" @@ -97,6 +103,12 @@ def _prepare_environment(self, environment: Environment, log: Logger) -> bool: return False environment.runbook.nodes_requirement = nodes_requirement + + # If Device_passthrough is set in runbook, + # Configure device passthrough params / Refresh the pool + self.device_pool.configure_device_passthrough_pool( + self._hyperv_runbook.device_pools, + ) return True def _get_host_capabilities(self, log: Logger) -> _HostCapabilities: @@ -285,6 +297,15 @@ def _deploy_environment(self, environment: Environment, log: Logger) -> None: }, extra_args=extra_args, ) + # perform device passthrough for the VM + self.device_pool._set_device_passthrough_node_context( + node_context=node_context, + node_runbook=node_runbook, + hv=hv, + vm_name=vm_name, + ) + # Start the VM + hv.start_vm(name=vm_name, extra_args=extra_args) ip_addr = hv.get_ip_address(vm_name) username = self.runbook.admin_username @@ -312,6 +333,11 @@ def _delete_node(node_ctx: NodeContext, wait_delete: bool) -> None: hv = self._server.tools[HyperV] vm_name = node_ctx.vm_name + # Reassign passthrough devices to host before VM is deleted + # This will be hot-unplug of device + if len(node_ctx.passthrough_devices) > 0: + self.device_pool.release_devices(node_ctx) + if wait_delete: hv.delete_vm(vm_name) else: diff --git a/lisa/sut_orchestrator/hyperv/schema.py b/lisa/sut_orchestrator/hyperv/schema.py index 72de6de953..58d52ed825 100644 --- a/lisa/sut_orchestrator/hyperv/schema.py +++ b/lisa/sut_orchestrator/hyperv/schema.py @@ -7,6 +7,10 @@ from dataclasses_json import dataclass_json from lisa import schema +from lisa.sut_orchestrator.util.schema import ( + DevicePassthroughSchema, + HostDevicePoolSchema, +) from lisa.util import field_metadata @@ -54,6 +58,7 @@ class HypervPlatformSchema: servers: List[HypervServer] = field(default_factory=list) extra_args: List[ExtraArgs] = field(default_factory=list) wait_delete: bool = False + device_pools: Optional[List[HostDevicePoolSchema]] = None @dataclass_json @@ -68,3 +73,14 @@ class HypervNodeSchema: hyperv_generation: int = 2 vhd: Optional[VhdSchema] = None osdisk_size_in_gb: int = 30 + # Configuration options for device-passthrough. + device_passthrough: Optional[List[DevicePassthroughSchema]] = None + + +@dataclass_json() +@dataclass +class DeviceAddressSchema: + # Host device details for which we want to perform device-passthrough + instance_id: str = "" + location_path: str = "" + friendly_name: str = "" diff --git a/lisa/sut_orchestrator/libvirt/ch_platform.py b/lisa/sut_orchestrator/libvirt/ch_platform.py index 1526ee6b7a..de2c7e7a0d 100644 --- a/lisa/sut_orchestrator/libvirt/ch_platform.py +++ b/lisa/sut_orchestrator/libvirt/ch_platform.py @@ -118,6 +118,11 @@ def _create_node_domain_xml( os_kernel.text = node_context.firmware_path devices = ET.SubElement(domain, "devices") + if len(node_context.passthrough_devices) > 0: + devices = self.device_pool._add_device_passthrough_xml( + devices, + node_context, + ) console = ET.SubElement(devices, "console") console.attrib["type"] = "pty" @@ -171,6 +176,14 @@ def _create_domain_and_attach_logger( node_context.domain, node_context.console_log_file_path ) + if len(node_context.passthrough_devices) > 0: + # Once libvirt domain is created, check if driver attached to device + # on the host is vfio-pci for PCI device passthrough to make sure if + # pass-through for PCI device is happened properly or not + self.device_pool._verify_device_passthrough_post_boot( + node_context=node_context, + ) + # Create the OS disk. def _create_node_os_disk( self, environment: Environment, log: Logger, node: Node diff --git a/lisa/sut_orchestrator/libvirt/context.py b/lisa/sut_orchestrator/libvirt/context.py index 469f508fb4..6e6793ae52 100644 --- a/lisa/sut_orchestrator/libvirt/context.py +++ b/lisa/sut_orchestrator/libvirt/context.py @@ -5,9 +5,10 @@ from lisa.environment import Environment from lisa.node import Node +from lisa.sut_orchestrator.util.schema import HostDevicePoolType from .console_logger import QemuConsoleLogger -from .schema import DiskImageFormat +from .schema import DeviceAddressSchema, DiskImageFormat @dataclass @@ -33,6 +34,15 @@ class InitSystem: IGNITION: str = "ignition" +@dataclass +class DevicePassthroughContext: + pool_type: HostDevicePoolType = HostDevicePoolType.PCI_NIC + device_list: List[DeviceAddressSchema] = field( + default_factory=list, + ) + managed: str = "" + + @dataclass class NodeContext: vm_name: str = "" @@ -57,6 +67,11 @@ class NodeContext: console_logger: Optional[QemuConsoleLogger] = None domain: Optional[libvirt.virDomain] = None + # Device pass through configuration + passthrough_devices: List[DevicePassthroughContext] = field( + default_factory=list, + ) + def get_environment_context(environment: Environment) -> EnvironmentContext: return environment.get_context(EnvironmentContext) diff --git a/lisa/sut_orchestrator/libvirt/libvirt_device_pool.py b/lisa/sut_orchestrator/libvirt/libvirt_device_pool.py new file mode 100644 index 0000000000..7e4c871c71 --- /dev/null +++ b/lisa/sut_orchestrator/libvirt/libvirt_device_pool.py @@ -0,0 +1,348 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +import re +import xml.etree.ElementTree as ET # noqa: N817 +from itertools import combinations +from typing import Any, Dict, List, Optional, cast + +from lisa.node import Node, RemoteNode +from lisa.sut_orchestrator.util.device_pool import BaseDevicePool +from lisa.sut_orchestrator.util.schema import HostDevicePoolSchema, HostDevicePoolType +from lisa.tools import Ls, Lspci, Modprobe +from lisa.util import LisaException, ResourceAwaitableException, find_group_in_lines + +from .context import DevicePassthroughContext, NodeContext +from .schema import ( + BaseLibvirtNodeSchema, + BaseLibvirtPlatformSchema, + DeviceAddressSchema, +) + + +class LibvirtDevicePool(BaseDevicePool): + def __init__( + self, + host_node: Node, + runbook: BaseLibvirtPlatformSchema, + ) -> None: + # Mapping of Host Device Passthrough + self.available_host_devices: Dict[ + HostDevicePoolType, Dict[str, List[DeviceAddressSchema]] + ] = {} + + self.supported_pool_type = [ + HostDevicePoolType.PCI_NIC, + HostDevicePoolType.PCI_GPU, + ] + self.host_node = host_node + self.platform_runbook = runbook + + def configure_device_passthrough_pool( + self, + device_configs: Optional[List[HostDevicePoolSchema]], + ) -> None: + if not device_configs: + return + + # Check if host support device passthrough + self._check_passthrough_support(self.host_node) + + super().configure_device_passthrough_pool( + device_configs=device_configs, + ) + + modprobe = self.host_node.tools[Modprobe] + allow_unsafe_interrupt = modprobe.load( + modules="vfio_iommu_type1", + parameters="allow_unsafe_interrupts=1", + ) + if not allow_unsafe_interrupt: + raise LisaException("Allowing unsafe interrupt failed") + + def request_devices( + self, + pool_type: HostDevicePoolType, + count: int, + ) -> List[DeviceAddressSchema]: + pool = self.available_host_devices.get(pool_type, {}) + keys = list(pool.keys()) + results = [] + for r in range(1, len(keys) + 1): + for combo in combinations(keys, r): + if sum(len(pool.get(key, [])) for key in combo) == count: + results.append(combo) + if not results: + for r in range(1, len(keys) + 1): + for combo in combinations(keys, r): + if sum(len(pool.get(key, [])) for key in combo) >= count: + results.append(combo) + break + if results: + break + + if not results: + raise ResourceAwaitableException( + f"Pool {pool_type} running out of devices: {pool}, " + "No IOMMU Group has sufficient count of devices, " + f"Refer: {pool}" + ) + + devices: List[DeviceAddressSchema] = [] + selected_pools = results[0] + for iommu_grp in selected_pools: + devices += pool.pop(iommu_grp) + self.available_host_devices[pool_type] = pool + return devices + + def release_devices( + self, + node_context: NodeContext, + ) -> None: + device_context = node_context.passthrough_devices + for context in device_context: + pool_type = context.pool_type + devices_list = context.device_list + pool = self.available_host_devices.get(pool_type, {}) + for device in devices_list: + iommu_grp = self._get_device_iommu_group(device) + pool_devices = pool.get(iommu_grp, []) + pool_devices.append(device) + pool[iommu_grp] = pool_devices + self.available_host_devices[pool_type] = pool + + def get_primary_nic_id(self) -> List[str]: + # This is for baremetal. For azure, we have to get private IP + host_ip = cast(RemoteNode, self.host_node).connection_info.get("address") + assert host_ip, "Host IP is empty" + cmd = "ip -o -4 addr show" + err = f"Can not get interface for IP: {host_ip}" + result = self.host_node.execute( + cmd=cmd, + shell=True, + sudo=True, + expected_exit_code=0, + expected_exit_code_failure_message=err, + ) + # Output for above command + # =============================== + # root [ /home/cloud ]# ip -o -4 addr show + # 1: lo inet 127.0.0.1/8 + # scope host lo\ valid_lft forever preferred_lft forever + # 3: eth1 inet 10.195.88.216/23 metric 1024 brd 10.195.89.255 + # scope global dynamic eth1\ valid_lft 7210sec preferred_lft 7210sec + # 6: eth4 inet 10.10.40.135/22 metric 1024 brd 10.10.43.255 + # scope global dynamic eth4\ valid_lft 27011sec preferred_lft 27011sec + + interface_name = "" + for line in result.stdout.strip().splitlines(): + if line.find(host_ip) >= 0: + interface_name = line.split()[1].strip() + + assert interface_name, "Can not find interface name" + result = self.host_node.execute( + cmd=f"find /sys/devices/ -name *{interface_name}*", + sudo=True, + shell=True, + ) + stdout = result.stdout.strip() + assert len(stdout.splitlines()) == 1 + pci_address_pattern = re.compile( + r"/(?P[0-9a-fA-F]{4}:[0-9a-fA-F]{2}:[0-9a-fA-F]{2}\.[0-9a-fA-F])/" + r"(?P[0-9a-fA-F]{4}:[0-9a-fA-F]{2}:[0-9a-fA-F]{2}\.[0-9a-fA-F])/" + ) + match = find_group_in_lines( + lines=stdout, + pattern=pci_address_pattern, + single_line=False, + ) + if match: + pci_address = match.get("id", "") + assert pci_address, "Can not get primary NIC IOMMU Group" + device = DeviceAddressSchema() + domain, bus, slot, fn = self._parse_pci_address_str(addr=pci_address) + device.domain = domain + device.bus = bus + device.slot = slot + device.function = fn + + iommu_grp = self._get_device_iommu_group(device) + return [iommu_grp] + else: + raise LisaException( + f"Can't find pci address of for: {interface_name}, " + f"stdout for command: {stdout}" + ) + + def create_device_pool( + self, + pool_type: HostDevicePoolType, + vendor_id: str, + device_id: str, + ) -> None: + self.available_host_devices[pool_type] = {} + lspci = self.host_node.tools[Lspci] + device_list = lspci.get_devices_by_vendor_device_id( + vendor_id=vendor_id, + device_id=device_id, + ) + primary_nic_iommu = self.get_primary_nic_id() + for item in device_list: + device = DeviceAddressSchema() + domain, bus, slot, fn = self._parse_pci_address_str(addr=item.slot) + device.domain = domain + device.bus = bus + device.slot = slot + device.function = fn + iommu_group = self._get_device_iommu_group(device) + is_vfio_pci = self._is_driver_vfio_pci(device) + + if not is_vfio_pci and iommu_group not in primary_nic_iommu: + pool = self.available_host_devices.get(pool_type, {}) + devices = pool.get(iommu_group, []) + devices.append(device) + pool[iommu_group] = devices + self.available_host_devices[pool_type] = pool + + def _add_device_passthrough_xml( + self, + devices: ET.Element, + node_context: NodeContext, + ) -> ET.Element: + for context in node_context.passthrough_devices: + for config in context.device_list: + hostdev = ET.SubElement(devices, "hostdev") + hostdev.attrib["mode"] = "subsystem" + + assert context.managed + hostdev.attrib["managed"] = context.managed + + assert context.pool_type + if "pci" in context.pool_type.value: + hostdev.attrib["type"] = "pci" + + source = ET.SubElement(hostdev, "source") + src_addrs = ET.SubElement(source, "address") + + assert config.domain + src_addrs.attrib["domain"] = f"0x{config.domain}" + + assert config.bus + src_addrs.attrib["bus"] = f"0x{config.bus}" + + assert config.slot + src_addrs.attrib["slot"] = f"0x{config.slot}" + + assert config.function + src_addrs.attrib["function"] = f"0x{config.function}" + + driver = ET.SubElement(hostdev, "driver") + driver.attrib["name"] = "vfio" + + return devices + + def _get_pci_address_str( + self, + device_addr: DeviceAddressSchema, + with_domain: bool = True, + ) -> str: + bus = device_addr.bus + slot = device_addr.slot + fn = device_addr.function + domain = device_addr.domain + addr = f"{bus}:{slot}.{fn}" + if with_domain: + addr = f"{domain}:{addr}" + return addr + + def _parse_pci_address_str( + self, + addr: str, + with_domain: bool = True, + ) -> Any: + addr_split = addr.strip().split(":") + idx = 1 if with_domain else 0 + + bus = addr_split[idx] + slot = addr_split[idx + 1].split(".")[0] + fn = addr_split[idx + 1].split(".")[1] + + if with_domain: + domain = addr_split[0] + return domain, bus, slot, fn + else: + return bus, slot, fn + + def _verify_device_passthrough_post_boot( + self, + node_context: NodeContext, + ) -> None: + device_context = node_context.passthrough_devices + for context in device_context: + devices = context.device_list + for device in devices: + err = f"Kernel driver is not vfio-pci for device: {device}" + pool_type = context.pool_type.value + if context.managed == "yes" and "pci" in pool_type: + is_vfio_pci = self._is_driver_vfio_pci(device) + assert is_vfio_pci, err + + def _check_passthrough_support(self, host_node: Node) -> None: + ls = host_node.tools[Ls] + path = "/dev/vfio/vfio" + err = "Host does not support IOMMU" + if not ls.path_exists(path=path, sudo=True): + raise LisaException(f"{err} : {path} does not exist") + + path = "/sys/kernel/iommu_groups/" + if len(ls.list(path=path, sudo=True)) == 0: + raise LisaException(f"{err} : {path} does not have any entry") + + def _is_driver_vfio_pci( + self, + device_addr: DeviceAddressSchema, + ) -> bool: + lspci = self.host_node.tools[Lspci] + device_addr_str = self._get_pci_address_str(device_addr) + kernel_module = lspci.get_used_module(device_addr_str) + return kernel_module == "vfio-pci" + + def _set_device_passthrough_node_context( + self, + node_context: NodeContext, + node_runbook: BaseLibvirtNodeSchema, + ) -> None: + if not node_runbook.device_passthrough: + return + for config in node_runbook.device_passthrough: + device_context = DevicePassthroughContext() + device_context.managed = config.managed + device_context.pool_type = config.pool_type + devices = self.request_devices(config.pool_type, config.count) + device_context.device_list = devices + node_context.passthrough_devices.append(device_context) + + def _get_device_iommu_group(self, device: DeviceAddressSchema) -> str: + iommu_pattern = re.compile(r"/sys/kernel/iommu_groups/(?P\d+)/devices/.*") + device_id = self._get_pci_address_str(device) + command = "find /sys/kernel/iommu_groups/ -type l" + err = "Command failed to list IOMMU Groups" + result = self.host_node.execute( + cmd=command, + shell=True, + sudo=True, + expected_exit_code=0, + expected_exit_code_failure_message=err, + ) + + iommu_grp = "" + for line in result.stdout.strip().splitlines(): + if line.find(device_id) >= 0: + iommu_grp_res = find_group_in_lines( + lines=line, + pattern=iommu_pattern, + ) + iommu_grp = iommu_grp_res.get("id", "") + break + assert iommu_grp, f"Can not get IOMMU group for device: {device}" + return f"iommu_grp_{iommu_grp}" diff --git a/lisa/sut_orchestrator/libvirt/platform.py b/lisa/sut_orchestrator/libvirt/platform.py index 9e866b2fab..887176c359 100644 --- a/lisa/sut_orchestrator/libvirt/platform.py +++ b/lisa/sut_orchestrator/libvirt/platform.py @@ -26,6 +26,7 @@ from lisa.node import Node, RemoteNode, local_node_connect from lisa.operating_system import CBLMariner from lisa.platform_ import Platform +from lisa.sut_orchestrator.libvirt.libvirt_device_pool import LibvirtDevicePool from lisa.tools import ( Chmod, Chown, @@ -179,6 +180,13 @@ def _initialize(self, *args: Any, **kwargs: Any) -> None: self.__init_libvirt_conn_string() self.libvirt_conn = libvirt.open(self.libvirt_conn_str) + self.device_pool = LibvirtDevicePool(self.host_node, self.platform_runbook) + # If Device_passthrough is set in runbook, + # Configure device passthrough params + self.device_pool.configure_device_passthrough_pool( + self.platform_runbook.device_pools, + ) + def _prepare_environment(self, environment: Environment, log: Logger) -> bool: # Ensure environment log directory is created before connecting to any nodes. _ = environment.log_path @@ -503,6 +511,11 @@ def _configure_node( node_context.data_disks.append(data_disk) + self.device_pool._set_device_passthrough_node_context( + node_context, + node_runbook, + ) + def restart_domain_and_attach_logger(self, node: Node) -> None: node_context = get_node_context(node) domain = node_context.domain @@ -537,6 +550,13 @@ def _create_domain_and_attach_logger( # Start the VM. node_context.domain.resume() + # Once libvirt domain is created, check if driver attached to device + # on the host is vfio-pci for PCI device passthrough to make sure if + # pass-through for PCI device is happened properly or not + self.device_pool._verify_device_passthrough_post_boot( + node_context=node_context, + ) + # Create all the VMs. def _create_nodes( self, @@ -654,6 +674,10 @@ def _delete_node(self, node: Node, log: Logger) -> None: watchdog.cancel() + # Add passthrough device back in the + # list of available device once domain is deleted + self.device_pool.release_devices(node_context) + def _get_domain_undefine_flags(self) -> int: return int( libvirt.VIR_DOMAIN_UNDEFINE_MANAGED_SAVE @@ -942,6 +966,11 @@ def _create_node_domain_xml( on_crash.text = "destroy" devices = ET.SubElement(domain, "devices") + if len(node_context.passthrough_devices) > 0: + devices = self.device_pool._add_device_passthrough_xml( + devices, + node_context, + ) serial = ET.SubElement(devices, "serial") serial.attrib["type"] = "pty" diff --git a/lisa/sut_orchestrator/libvirt/schema.py b/lisa/sut_orchestrator/libvirt/schema.py index 87e7e1d3c9..2c85bbebd6 100644 --- a/lisa/sut_orchestrator/libvirt/schema.py +++ b/lisa/sut_orchestrator/libvirt/schema.py @@ -4,6 +4,10 @@ from dataclasses_json import dataclass_json +from lisa.sut_orchestrator.util.schema import ( + DevicePassthroughSchema, + HostDevicePoolSchema, +) from lisa.util import LisaException FIRMWARE_TYPE_BIOS = "bios" @@ -34,6 +38,17 @@ def is_remote(self) -> bool: return self.address is not None +@dataclass_json() +@dataclass +class DeviceAddressSchema: + # Host device details for which we want to perform device-passthrough + # we can get it using lspci command + domain: str = "" + bus: str = "" + slot: str = "" + function: str = "" + + # QEMU orchestrator's global configuration options. @dataclass_json() @dataclass @@ -51,6 +66,14 @@ class BaseLibvirtPlatformSchema: capture_libvirt_debug_logs: bool = False + device_pools: Optional[List[HostDevicePoolSchema]] = None + + +@dataclass_json() +@dataclass +class LibvirtDevicePassthroughSchema(DevicePassthroughSchema): + managed: str = "" + # Possible disk image formats class DiskImageFormat(Enum): @@ -85,6 +108,9 @@ class BaseLibvirtNodeSchema: # Whether to enable secure boot. enable_secure_boot: bool = False + # Configuration options for device-passthrough. + device_passthrough: Optional[List[LibvirtDevicePassthroughSchema]] = None + # QEMU orchestrator's per-node configuration options. # This ensures backward compatibility with existing runbooks that specify the diff --git a/lisa/sut_orchestrator/util/__init__.py b/lisa/sut_orchestrator/util/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/lisa/sut_orchestrator/util/device_pool.py b/lisa/sut_orchestrator/util/device_pool.py new file mode 100644 index 0000000000..7f91d3e787 --- /dev/null +++ b/lisa/sut_orchestrator/util/device_pool.py @@ -0,0 +1,65 @@ +from typing import Any, List, Optional + +from lisa.sut_orchestrator.util.schema import HostDevicePoolSchema, HostDevicePoolType +from lisa.util import LisaException + + +class BaseDevicePool: + def __init__(self) -> None: + self.supported_pool_type: List[Any] = [] + + def create_device_pool( + self, + pool_type: HostDevicePoolType, + vendor_id: str, + device_id: str, + ) -> None: + raise NotImplementedError() + + def get_primary_nic_id(self) -> List[str]: + raise NotImplementedError() + + def request_devices( + self, + pool_type: HostDevicePoolType, + count: int, + ) -> Any: + raise NotImplementedError() + + def release_devices( + self, + node_context: Any, + ) -> None: + raise NotImplementedError() + + def configure_device_passthrough_pool( + self, + device_configs: Optional[List[HostDevicePoolSchema]], + ) -> None: + if device_configs: + pool_types_from_runbook = [config.type for config in device_configs] + for pool_type in pool_types_from_runbook: + if pool_type not in self.supported_pool_type: + raise LisaException( + f"Pool type '{pool_type}' is not supported by platform" + ) + for config in device_configs: + vendor_device_list = config.devices + if len(vendor_device_list) > 1: + raise LisaException( + "Device Pool does not support more than one " + "vendor/device id list for given pool type" + ) + + vendor_device_id = vendor_device_list[0] + assert vendor_device_id.vendor_id.strip() + vendor_id = vendor_device_id.vendor_id.strip() + + assert vendor_device_id.device_id.strip() + device_id = vendor_device_id.device_id.strip() + + self.create_device_pool( + pool_type=config.type, + vendor_id=vendor_id, + device_id=device_id, + ) diff --git a/lisa/sut_orchestrator/util/schema.py b/lisa/sut_orchestrator/util/schema.py new file mode 100644 index 0000000000..369d5eb9fd --- /dev/null +++ b/lisa/sut_orchestrator/util/schema.py @@ -0,0 +1,32 @@ +from dataclasses import dataclass, field +from enum import Enum +from typing import List + +from dataclasses_json import dataclass_json + + +class HostDevicePoolType(Enum): + PCI_NIC = "pci_net" + PCI_GPU = "pci_gpu" + + +@dataclass_json() +@dataclass +class DeviceIdentifier: + vendor_id: str = "" + device_id: str = "" + + +# Configuration options for device-passthrough for the VM. +@dataclass_json() +@dataclass +class HostDevicePoolSchema: + type: HostDevicePoolType = HostDevicePoolType.PCI_NIC + devices: List[DeviceIdentifier] = field(default_factory=list) + + +@dataclass_json() +@dataclass +class DevicePassthroughSchema: + pool_type: HostDevicePoolType = HostDevicePoolType.PCI_NIC + count: int = 0 diff --git a/lisa/tools/hyperv.py b/lisa/tools/hyperv.py index c84039d30d..a4c575cfb6 100644 --- a/lisa/tools/hyperv.py +++ b/lisa/tools/hyperv.py @@ -49,9 +49,10 @@ def delete_vm_async(self, name: str) -> Optional[Process]: return None # stop and delete vm + self.stop_vm(name=name) powershell = self.node.tools[PowerShell] return powershell.run_cmdlet_async( - f"Stop-VM -Name {name} -Force; Remove-VM -Name {name} -Force", + f"Remove-VM -Name {name} -Force", force_run=True, ) @@ -148,6 +149,11 @@ def create_vm( force_run=True, ) + def start_vm( + self, + name: str, + extra_args: Optional[Dict[str, str]] = None, + ) -> None: # start vm self._run_hyperv_cmdlet( "Start-VM", f"-Name {name}", extra_args=extra_args, force_run=True @@ -170,6 +176,29 @@ def create_vm( if not is_ready: raise LisaException(f"VM {name} did not start") + def stop_vm(self, name: str) -> None: + # stop vm + self._run_hyperv_cmdlet("Stop-VM", f"-Name {name} -Force", force_run=True) + + def restart_vm( + self, + name: str, + ) -> None: + # restart vm + self._run_hyperv_cmdlet("Restart-VM", f"-Name {name} -Force", force_run=True) + + def enable_device_passthrough(self, name: str, mmio_mb: int = 5120) -> None: + self._run_hyperv_cmdlet( + "Set-VM", + f"-Name {name} -AutomaticStopAction TurnOff", + force_run=True, + ) + self._run_hyperv_cmdlet( + "Set-VM", + f"-HighMemoryMappedIoSpace {mmio_mb}Mb -VMName {name}", + force_run=True, + ) + def get_default_external_switch(self) -> Optional[VMSwitch]: switch_json = self.node.tools[PowerShell].run_cmdlet( 'Get-VMSwitch | Where-Object {$_.SwitchType -eq "External"} ' diff --git a/lisa/tools/lspci.py b/lisa/tools/lspci.py index f0170562f8..c023b763f4 100644 --- a/lisa/tools/lspci.py +++ b/lisa/tools/lspci.py @@ -8,7 +8,14 @@ from lisa.executable import Tool from lisa.operating_system import Posix from lisa.tools import Echo -from lisa.util import LisaException, constants, find_patterns_in_lines, get_matched_str +from lisa.util import ( + LisaException, + constants, + find_group_in_lines, + find_groups_in_lines, + find_patterns_in_lines, + get_matched_str, +) # Example output of lspci command - # lspci -m @@ -36,6 +43,13 @@ re.MULTILINE, ) +# With -mnn option, result would be with vendor/device id +# d8:00.0 "Ethernet controller [0200]" "Mellanox Technologies [15b3]" +# "MT27520 Family [ConnectX-3 Pro] [1007]" "Mellanox Technologies [15b3]" +# "Mellanox Technologies ConnectX-3 Pro Stand-up dual-port 40GbE MCX314A-BCCT [0006]" +PATTERN_DEVICE_ID = re.compile(r"\[(?P[^\]]{4})\]") + + DEVICE_TYPE_DICT: Dict[str, List[str]] = { constants.DEVICE_TYPE_SRIOV: ["Ethernet controller"], constants.DEVICE_TYPE_NVME: ["Non-Volatile memory controller"], @@ -59,19 +73,48 @@ def __init__(self, pci_device_raw: str) -> None: def __str__(self) -> str: return ( - f"PCI device: {self.slot} " - f"class {self.device_class} " - f"vendor {self.vendor} " - f"info: {self.device_info} " + f"PCI device: {self.slot}, " + f"class: {self.device_class}, " + f"vendor: {self.vendor}, " + f"info: {self.device_info}, " + f"vendor_id: {self.vendor_id}, " + f"device_id: {self.device_id}" ) def parse(self, raw_str: str) -> None: - matched_pci_device_info = PATTERN_PCI_DEVICE.match(raw_str) - if matched_pci_device_info: - self.slot = matched_pci_device_info.group("slot") - self.device_class = matched_pci_device_info.group("device_class") - self.vendor = matched_pci_device_info.group("vendor") - self.device_info = matched_pci_device_info.group("device") + matched_pci_device_info_list = find_groups_in_lines( + lines=raw_str, + pattern=PATTERN_PCI_DEVICE, + ) + if matched_pci_device_info_list: + matched_pci_device_info = matched_pci_device_info_list[0] + self.slot = matched_pci_device_info.get("slot", "").strip() + assert self.slot, f"Can not find slot info for: {raw_str}" + + device_class = matched_pci_device_info.get("device_class", "") + assert device_class, f"Can not find device class for: {raw_str}" + self.device_class = PATTERN_DEVICE_ID.sub("", device_class).strip() + + vendor = matched_pci_device_info.get("vendor", "") + assert vendor, f"Can not find vendor info for: {raw_str}" + vendor_id_raw = find_group_in_lines( + lines=vendor, + pattern=PATTERN_DEVICE_ID, + single_line=False, + ) + self.vendor_id = vendor_id_raw.get("id", "") + assert self.vendor_id, f"cannot find vendor id from {raw_str}" + self.vendor = PATTERN_DEVICE_ID.sub("", vendor).strip() + + self.device_info = matched_pci_device_info.get("device", "") + assert self.device_info, f"Can not find device info for: {raw_str}" + device_id_raw = find_group_in_lines( + lines=self.device_info, + pattern=PATTERN_DEVICE_ID, + single_line=False, + ) + self.device_id = device_id_raw.get("id", "") + assert self.device_id, f"cannot find device id from {raw_str}" else: raise LisaException("cannot find any matched pci devices") @@ -126,7 +169,7 @@ def get_devices(self, force_run: bool = False) -> List[PciDevice]: # Ensure pci device ids and name mappings are updated. self.node.execute("update-pciids", sudo=True, shell=True) result = self.run( - "-m", + "-Dmnn", force_run=force_run, shell=True, expected_exit_code=0, @@ -184,6 +227,19 @@ def get_gpu_devices(self, force_run: bool = False) -> List[PciDevice]: ] return gpu_device_list + def get_devices_by_vendor_device_id( + self, + vendor_id: str, + device_id: str, + force_run: bool = False, + ) -> List[PciDevice]: + full_list = self.get_devices(force_run=force_run) + devices_list = [] + for device in full_list: + if device.device_id == device_id and device.vendor_id == vendor_id: + devices_list.append(device) + return devices_list + class LspciBSD(Lspci): _DEVICE_DRIVER_MAPPING: Dict[str, Pattern[str]] = { From 8538c62f288479496caeac0508b667b6f6700bdf Mon Sep 17 00:00:00 2001 From: Aditya Nagesh Date: Wed, 4 Sep 2024 12:13:43 +0530 Subject: [PATCH 16/17] Hibernation: Log uptime and resume offset (#3403) * Hibernation: Log uptime and resume offset Uptime seems to have a drift of 1 second due to Hibernation, which is a bug. To prevent all tests from failing, just log uptime and resume offset details for debugging purpose. --- lisa/tools/hibernation_setup.py | 30 +++++++++++++++++++++++++++- microsoft/testsuites/power/common.py | 23 +++++++++++++++++---- 2 files changed, 48 insertions(+), 5 deletions(-) diff --git a/lisa/tools/hibernation_setup.py b/lisa/tools/hibernation_setup.py index fcb5cc3e62..f1ddd6bebd 100644 --- a/lisa/tools/hibernation_setup.py +++ b/lisa/tools/hibernation_setup.py @@ -8,7 +8,7 @@ from lisa.base_tools import Cat, Systemctl from lisa.executable import Tool from lisa.operating_system import CBLMariner -from lisa.util import find_patterns_in_lines +from lisa.util import find_patterns_in_lines, get_matched_str from .git import Git from .ls import Ls @@ -28,6 +28,22 @@ class HibernationSetup(Tool): # [ 159.898806] hv_utils: Sent hibernation uevent _uevent_pattern = re.compile(r"^(.*Sent hibernation uevent.*)$", re.MULTILINE) + """ + The below shows an example output of `filefrag -v /hibfile.sys` + We are interested in the physical offset of the hibfile. + + Filesystem type is: ef53 + File size of /hibfile is 1048576 (256 blocks of 4096 bytes) + ext: logical_offset: physical_offset: length: expected: flags: + 0: 0.. 255: 123456.. 123711: 256: last,unwritten,eof + /hibfile: 1 extent found + """ + _hibsys_resume_offset_pattern = re.compile( + r"^\s*\d+:\s+\d+\.\.\s+\d+:\s+(\d+)\.\.", re.MULTILINE + ) + + _cmdline_resume_offset_pattern = re.compile(r"resume_offset=(\d+)") + @property def command(self) -> str: return "hibernation-setup-tool" @@ -62,6 +78,18 @@ def check_uevent(self) -> int: def hibernate(self) -> None: self.node.tools[Systemctl].hibernate() + def get_hibernate_resume_offset_from_hibfile(self) -> str: + filefrag_hibfile = self.node.execute( + "filefrag -v /hibfile.sys", sudo=True + ).stdout + offset = get_matched_str(filefrag_hibfile, self._hibsys_resume_offset_pattern) + return offset + + def get_hibernate_resume_offset_from_cmd(self) -> str: + cmdline = self.node.tools[Cat].read("/proc/cmdline") + offset = get_matched_str(cmdline, self._cmdline_resume_offset_pattern) + return offset + def _install(self) -> bool: if isinstance(self.node.os, CBLMariner): self.node.os.install_packages(["glibc-devel", "kernel-headers", "binutils"]) diff --git a/microsoft/testsuites/power/common.py b/microsoft/testsuites/power/common.py index 4b3ff56b21..242cc42dfe 100644 --- a/microsoft/testsuites/power/common.py +++ b/microsoft/testsuites/power/common.py @@ -7,6 +7,7 @@ from assertpy import assert_that from lisa import Environment, Logger, Node, RemoteNode, features +from lisa.base_tools.cat import Cat from lisa.features import StartStop from lisa.features.startstop import VMStatus from lisa.operating_system import Redhat, Suse, Ubuntu @@ -59,6 +60,7 @@ def verify_hibernation( _expand_os_partition(node, log) hibernation_setup_tool = node.tools[HibernationSetup] startstop = node.features[StartStop] + cat = node.tools[Cat] node_nic = node.nics lower_nics_before_hibernation = node_nic.get_lower_nics() @@ -71,7 +73,9 @@ def verify_hibernation( # only set up hibernation setup tool for the first time hibernation_setup_tool.start() uptime = node.tools[Uptime] + uptime_before_hibernation = uptime.since_time() + hibfile_offset = hibernation_setup_tool.get_hibernate_resume_offset_from_hibfile() try: startstop.stop(state=features.StopState.Hibernate) @@ -93,15 +97,25 @@ def verify_hibernation( raise LisaException("VM is not in deallocated status after hibernation") startstop.start() + dmesg = node.tools[Dmesg] dmesg.check_kernel_errors(force_run=True, throw_error=throw_error) + offset_from_cmd = hibernation_setup_tool.get_hibernate_resume_offset_from_cmd() uptime_after_hibernation = uptime.since_time() - assert_that(uptime_after_hibernation).described_as( - "Hibernation should not change uptime." - ).is_equal_to(uptime_before_hibernation) + offset_from_sys_power = cat.read("/sys/power/resume_offset") + + log.info( + "Uptime before Hibernation: " + f"{uptime_before_hibernation}, Uptime after Hibernation: " + f"{uptime_after_hibernation}" + ) + log.info( + f"Hibfile resume offset: {hibfile_offset}, " + f"Resume offset from cmdline: {offset_from_cmd}" + ) - log.info("Hibernation resume is successful. Uptime is not changed.") + log.info(f"Resume offset from /sys/power/resume_offset: {offset_from_sys_power}") entry_after_hibernation = hibernation_setup_tool.check_entry() exit_after_hibernation = hibernation_setup_tool.check_exit() @@ -125,6 +139,7 @@ def verify_hibernation( node_nic.initialize() lower_nics_after_hibernation = node_nic.get_lower_nics() upper_nics_after_hibernation = node_nic.get_nic_names() + assert_that(len(lower_nics_after_hibernation)).described_as( "sriov nics count changes after hibernation." ).is_equal_to(len(lower_nics_before_hibernation)) From 225fb52d56573fda5a75b070bab5e3cf4e64b3fa Mon Sep 17 00:00:00 2001 From: Smit Gardhariya <113590758+smit-gardhariya@users.noreply.github.com> Date: Wed, 4 Sep 2024 14:02:10 +0530 Subject: [PATCH 17/17] Replace initrd and add kernel config as optional parameter to transformer 'dom0_binaries' (#3405) Copy kernel config under /boot if passed to the transformer Copy initrd if passed from runbook else create softlink for existing initrd image with new kernel version in filename Signed-off-by: Smit Gardhariya --- lisa/transformers/dom0_kernel_installer.py | 51 ++++++++++++++++------ 1 file changed, 37 insertions(+), 14 deletions(-) diff --git a/lisa/transformers/dom0_kernel_installer.py b/lisa/transformers/dom0_kernel_installer.py index f840a61759..19a63c0b48 100644 --- a/lisa/transformers/dom0_kernel_installer.py +++ b/lisa/transformers/dom0_kernel_installer.py @@ -9,7 +9,7 @@ from lisa import schema from lisa.node import Node -from lisa.tools import Cp, Echo, Ls, Sed, Tar, Uname +from lisa.tools import Cp, Echo, Ln, Ls, Sed, Tar, Uname from lisa.util import field_metadata from .kernel_installer import BaseInstaller, BaseInstallerSchema @@ -35,6 +35,14 @@ class BinaryInstallerSchema(BaseInstallerSchema): ), ) + # kernel config local absolute path + kernel_config_path: str = field( + default="", + metadata=field_metadata( + required=True, + ), + ) + # initrd binary local absolute path initrd_image_path: str = field( default="", @@ -67,7 +75,7 @@ def install(self) -> str: kernel_image_path: str = runbook.kernel_image_path initrd_image_path: str = runbook.initrd_image_path kernel_modules_path: str = runbook.kernel_modules_path - is_initrd: bool = False + kernel_config_path: str = runbook.kernel_config_path uname = node.tools[Uname] current_kernel = uname.get_linux_information().kernel_version_raw @@ -106,7 +114,6 @@ def install(self) -> str: if initrd_image_path: err = f"Can not find initrd image path: {initrd_image_path}" assert os.path.exists(initrd_image_path), err - is_initrd = True node.shell.copy( PurePath(initrd_image_path), node.get_pure_path(f"/var/tmp/initrd.img-{new_kernel}"), @@ -116,10 +123,29 @@ def install(self) -> str: node.get_pure_path(f"/var/tmp/initrd.img-{new_kernel}"), node.get_pure_path(f"/boot/initrd.img-{new_kernel}"), ) + else: + ln = node.tools[Ln] + ln.create_link( + target=f"/boot/initrd.img-{current_kernel}", + link=f"/boot/initrd.img-{new_kernel}", + ) + + if kernel_config_path: + # Copy kernel config + err = f"Can not find kernel config path: {kernel_config_path}" + assert os.path.exists(kernel_config_path), err + node.shell.copy( + PurePath(kernel_config_path), + node.get_pure_path(f"/var/tmp/config-{new_kernel}"), + ) + _copy_kernel_binary( + node, + node.get_pure_path(f"/var/tmp/config-{new_kernel}"), + node.get_pure_path(f"/boot/config-{new_kernel}"), + ) _update_mariner_config( node, - is_initrd, current_kernel, new_kernel, ) @@ -161,7 +187,6 @@ def install(self) -> str: _update_mariner_config( node, - True, current_kernel, new_kernel, ) @@ -184,7 +209,6 @@ def _copy_kernel_binary( def _update_mariner_config( node: Node, - is_initrd: bool, current_kernel: str, new_kernel: str, ) -> None: @@ -199,11 +223,10 @@ def _update_mariner_config( sudo=True, ) - if is_initrd: - # Modify the /boot/mariner-mshv.cfg to point new initrd binary - sed.substitute( - regexp=f"mariner_initrd_mshv=initrd.img-{current_kernel}", - replacement=f"mariner_initrd_mshv=initrd.img-{new_kernel}", - file=mariner_config, - sudo=True, - ) + # Modify the /boot/mariner-mshv.cfg to point new initrd binary + sed.substitute( + regexp=f"mariner_initrd_mshv=initrd.img-{current_kernel}", + replacement=f"mariner_initrd_mshv=initrd.img-{new_kernel}", + file=mariner_config, + sudo=True, + )