From 18b829691d88cceb81a34747b27e73555874992d Mon Sep 17 00:00:00 2001 From: Michael Vogt Date: Wed, 8 Jan 2025 11:38:57 +0100 Subject: [PATCH 1/3] test: run stuff in parallel Use filelock.FileLock to workaround the issue that session fixtures do not work with xdist. This should probably be redone in some less hard to read way :( --- .github/workflows/tests.yml | 4 +- test/conftest.py | 7 + test/containerbuild.py | 117 +++++---- test/requirements.txt | 2 + test/test_build.py | 478 ++++++++++++++++++------------------ 5 files changed, 319 insertions(+), 289 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index a253e66a..e06cb9ff 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -130,7 +130,9 @@ jobs: # podman needs (parts of) the environment but will break when # XDG_RUNTIME_DIR is set. # TODO: figure out what exactly podman needs - sudo -E XDG_RUNTIME_DIR= pytest-3 --basetemp=/mnt/var/tmp/bib-tests + # TODO2: figure out how much wecan run in parallel before running + # out of diskspace + sudo -E XDG_RUNTIME_DIR= pytest-3 --basetemp=/mnt/var/tmp/bib-tests -n 4 --dist worksteal --maxschedchunk=1 - name: Diskspace (after) if: ${{ always() }} run: | diff --git a/test/conftest.py b/test/conftest.py index 4db68ad6..6f4d83fe 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -20,3 +20,10 @@ def pytest_make_parametrize_id(config, val): # pylint: disable=W0613 if isinstance(val, TestCase): return f"{val}" return None + + +@pytest.fixture(name="shared_tmpdir", scope='session') +def shared_tmpdir_fixture(tmp_path_factory): + tmp_path = tmp_path_factory.getbasetemp().parent / "shared" + tmp_path.mkdir(exist_ok=True) + yield tmp_path diff --git a/test/containerbuild.py b/test/containerbuild.py index 44154c75..2103f640 100644 --- a/test/containerbuild.py +++ b/test/containerbuild.py @@ -1,4 +1,5 @@ import os +import pathlib import platform import random import string @@ -7,6 +8,7 @@ from contextlib import contextmanager import pytest +from filelock import FileLock @contextmanager @@ -33,67 +35,74 @@ def make_container(container_path, arch=None): @pytest.fixture(name="build_container", scope="session") -def build_container_fixture(): +def build_container_fixture(shared_tmpdir): """Build a container from the Containerfile and returns the name""" if tag_from_env := os.getenv("BIB_TEST_BUILD_CONTAINER_TAG"): return tag_from_env container_tag = "bootc-image-builder-test" - subprocess.check_call([ - "podman", "build", - "-f", "Containerfile", - "-t", container_tag, - ]) + with FileLock(shared_tmpdir / pathlib.Path(container_tag + ".lock")): + if not os.path.exists(shared_tmpdir / container_tag): + subprocess.check_call([ + "podman", "build", + "-f", "Containerfile", + "-t", container_tag, + ]) + (shared_tmpdir / container_tag).write_text("done") return container_tag @pytest.fixture(name="build_fake_container", scope="session") -def build_fake_container_fixture(tmpdir_factory, build_container): +def build_fake_container_fixture(shared_tmpdir, build_container): """Build a container with a fake osbuild and returns the name""" - tmp_path = tmpdir_factory.mktemp("build-fake-container") - - # see https://github.com/osbuild/osbuild/blob/main/osbuild/testutil/__init__.py#L91 - tracing_podman_path = tmp_path / "tracing-podman" - tracing_podman_path.write_text(textwrap.dedent("""\ - #!/bin/sh -e - - TRACE_PATH=/output/"$(basename $0)".log - for arg in "$@"; do - echo "$arg" >> "$TRACE_PATH" - done - # extra separator to differenciate between calls - echo >> "$TRACE_PATH" - exec "$0".real "$@" - """), encoding="utf8") - - fake_osbuild_path = tmp_path / "fake-osbuild" - fake_osbuild_path.write_text(textwrap.dedent("""\ - #!/bin/bash -e - - # injest generated manifest from the images library, if we do not - # do this images may fail with "broken" pipe errors - cat - >/dev/null - - mkdir -p /output/qcow2 - echo "fake-disk.qcow2" > /output/qcow2/disk.qcow2 - - """), encoding="utf8") - - cntf_path = tmp_path / "Containerfile" - - cntf_path.write_text(textwrap.dedent(f"""\n - FROM {build_container} - COPY fake-osbuild /usr/bin/osbuild - RUN chmod 755 /usr/bin/osbuild - COPY --from={build_container} /usr/bin/podman /usr/bin/podman.real - COPY tracing-podman /usr/bin/podman - RUN chmod 755 /usr/bin/podman - """), encoding="utf8") - - container_tag = "bootc-image-builder-test-faked-osbuild" - subprocess.check_call([ - "podman", "build", - "-t", container_tag, - tmp_path, - ]) - return container_tag + tmp_path = shared_tmpdir / "build-fake-container" + + with FileLock(tmp_path + ".lock"): + if tmp_path.exists(): + return container_tag + tmp_path.mkdir() + # see https://github.com/osbuild/osbuild/blob/main/osbuild/testutil/__init__.py#L91 + tracing_podman_path = tmp_path / "tracing-podman" + tracing_podman_path.write_text(textwrap.dedent("""\ + #!/bin/sh -e + + TRACE_PATH=/output/"$(basename $0)".log + for arg in "$@"; do + echo "$arg" >> "$TRACE_PATH" + done + # extra separator to differenciate between calls + echo >> "$TRACE_PATH" + exec "$0".real "$@" + """), encoding="utf8") + + fake_osbuild_path = tmp_path / "fake-osbuild" + fake_osbuild_path.write_text(textwrap.dedent("""\ + #!/bin/bash -e + + # injest generated manifest from the images library, if we do not + # do this images may fail with "broken" pipe errors + cat - >/dev/null + + mkdir -p /output/qcow2 + echo "fake-disk.qcow2" > /output/qcow2/disk.qcow2 + + """), encoding="utf8") + + cntf_path = tmp_path / "Containerfile" + + cntf_path.write_text(textwrap.dedent(f"""\n + FROM {build_container} + COPY fake-osbuild /usr/bin/osbuild + RUN chmod 755 /usr/bin/osbuild + COPY --from={build_container} /usr/bin/podman /usr/bin/podman.real + COPY tracing-podman /usr/bin/podman + RUN chmod 755 /usr/bin/podman + """), encoding="utf8") + + container_tag = "bootc-image-builder-test-faked-osbuild" + subprocess.check_call([ + "podman", "build", + "-t", container_tag, + tmp_path, + ]) + return container_tag diff --git a/test/requirements.txt b/test/requirements.txt index 9be09ce7..3bfd65c8 100644 --- a/test/requirements.txt +++ b/test/requirements.txt @@ -4,3 +4,5 @@ paramiko==2.12.0 boto3==1.33.13 qmp==1.1.0 pylint==3.2.5 +pytest-xdist==3.6.1 +filelock==3.16.1 diff --git a/test/test_build.py b/test/test_build.py index c8ab9151..efcd2d6a 100644 --- a/test/test_build.py +++ b/test/test_build.py @@ -10,13 +10,14 @@ import tempfile import uuid from contextlib import contextmanager, ExitStack -from typing import NamedTuple from dataclasses import dataclass +from typing import NamedTuple import pytest # local test utils import testutil from containerbuild import build_container_fixture # pylint: disable=unused-import +from filelock import FileLock from testcases import CLOUD_BOOT_IMAGE_TYPES, DISK_IMAGE_TYPES, gen_testcases from vm import AWS, QEMU @@ -67,12 +68,6 @@ class RegistryConf: lookaside_conf: str -@pytest.fixture(name="shared_tmpdir", scope='session') -def shared_tmpdir_fixture(tmpdir_factory): - tmp_path = pathlib.Path(tmpdir_factory.mktemp("shared")) - yield tmp_path - - @pytest.fixture(name="gpg_conf", scope='session') def gpg_conf_fixture(shared_tmpdir): key_params_tmpl = """ @@ -90,19 +85,25 @@ def gpg_conf_fixture(shared_tmpdir): pub_key_file = f"{shared_tmpdir}/GPG-KEY-bib-tests" key_params = key_params_tmpl.format(key_length=key_length, email=email) - os.makedirs(home_dir, mode=0o700, exist_ok=False) - subprocess.run( - ["gpg", "--gen-key", "--batch"], - check=True, env={"GNUPGHOME": home_dir}, - input=key_params, - text=True) - subprocess.run( - ["gpg", "--output", pub_key_file, - "--armor", "--export", email], - check=True, env={"GNUPGHOME": home_dir}) + # XXX: fugly + with FileLock(home_dir + ".lock"): + if os.path.exists(home_dir): + yield GPGConf(email=email, home_dir=home_dir, + key_length=key_length, pub_key_file=pub_key_file, key_params=key_params) + else: + os.makedirs(home_dir, mode=0o700, exist_ok=False) + subprocess.run( + ["gpg", "--gen-key", "--batch"], + check=True, env={"GNUPGHOME": home_dir}, + input=key_params, + text=True) + subprocess.run( + ["gpg", "--output", pub_key_file, + "--armor", "--export", email], + check=True, env={"GNUPGHOME": home_dir}) - yield GPGConf(email=email, home_dir=home_dir, - key_length=key_length, pub_key_file=pub_key_file, key_params=key_params) + yield GPGConf(email=email, home_dir=home_dir, + key_length=key_length, pub_key_file=pub_key_file, key_params=key_params) @pytest.fixture(name="registry_conf", scope='session') @@ -276,226 +277,235 @@ def build_images(shared_tmpdir, build_container, request, force_aws_upload, gpg_ if tc.sign: container_ref = get_signed_container_ref(registry_conf.local_registry, tc.container_ref) - # params can be long and the qmp socket (that has a limit of 100ish - # AF_UNIX) is derived from the path - # hash the container_ref+target_arch, but exclude the image_type so that the output path is shared between calls to - # different image type combinations - output_path = shared_tmpdir / format(abs(hash(container_ref + str(tc.disk_config) + str(tc.target_arch))), "x") - output_path.mkdir(exist_ok=True) - - # make sure that the test store exists, because podman refuses to start if the source directory for a volume - # doesn't exist - pathlib.Path("/var/tmp/osbuild-test-store").mkdir(exist_ok=True, parents=True) - - journal_log_path = output_path / "journal.log" - bib_output_path = output_path / "bib-output.log" - - ssh_keyfile_private_path = output_path / "ssh-keyfile" - ssh_keyfile_public_path = ssh_keyfile_private_path.with_suffix(".pub") - - artifact = { - "qcow2": pathlib.Path(output_path) / "qcow2/disk.qcow2", - "ami": pathlib.Path(output_path) / "image/disk.raw", - "raw": pathlib.Path(output_path) / "image/disk.raw", - "vmdk": pathlib.Path(output_path) / "vmdk/disk.vmdk", - "vhd": pathlib.Path(output_path) / "vpc/disk.vhd", - "gce": pathlib.Path(output_path) / "gce/image.tar.gz", - "anaconda-iso": pathlib.Path(output_path) / "bootiso/install.iso", - } - assert len(artifact) == len(set(tc.image for tc in gen_testcases("all"))), \ - "please keep artifact mapping and supported images in sync" - - # this helper checks the cache - results = [] - for image_type in image_types: - # TODO: properly cache amis here. The issue right now is that - # ami and raw are the same image on disk which means that if a test - # like "boots_in_aws" requests an ami it will get the raw file on - # disk. However that is not sufficient because part of the ami test - # is the upload to AWS and the generated metadata. The fix could be - # to make the boot-in-aws a new image type like "ami-aws" where we - # cache the metadata instead of the disk image. Alternatively we - # could stop testing ami locally at all and just skip any ami tests - # if there are no AWS credentials. - if image_type in CLOUD_BOOT_IMAGE_TYPES: - continue - generated_img = artifact[image_type] - print(f"Checking for cached image {image_type} -> {generated_img}") - if generated_img.exists(): - print(f"NOTE: reusing cached image {generated_img}") - journal_output = journal_log_path.read_text(encoding="utf8") - bib_output = bib_output_path.read_text(encoding="utf8") + # Wait until the image with this ID is generated, the first woker + img_id = format(abs(hash(container_ref + str(tc.disk_config) + str(tc.target_arch))), "x") + fn = shared_tmpdir / f"{img_id}.lock" + print(f"Using lockfile {fn}") + # XXX: the diff for this is huge now, this is really just the previous + # code moved 4 spaces to the right, the code is already capable of dealing + # with existing output dirs, so the filelock just ensures the part below + # is serialized + with FileLock(fn): + # params can be long and the qmp socket (that has a limit of 100ish + # AF_UNIX) is derived from the path + # hash the container_ref+target_arch, but exclude the image_type so that the output path is shared between calls to + # different image type combinations + output_path = shared_tmpdir / img_id + output_path.mkdir(exist_ok=True) + + # make sure that the test store exists, because podman refuses to start if the source directory for a volume + # doesn't exist + pathlib.Path("/var/tmp/osbuild-test-store").mkdir(exist_ok=True, parents=True) + + journal_log_path = output_path / "journal.log" + bib_output_path = output_path / "bib-output.log" + + ssh_keyfile_private_path = output_path / "ssh-keyfile" + ssh_keyfile_public_path = ssh_keyfile_private_path.with_suffix(".pub") + + artifact = { + "qcow2": pathlib.Path(output_path) / "qcow2/disk.qcow2", + "ami": pathlib.Path(output_path) / "image/disk.raw", + "raw": pathlib.Path(output_path) / "image/disk.raw", + "vmdk": pathlib.Path(output_path) / "vmdk/disk.vmdk", + "vhd": pathlib.Path(output_path) / "vpc/disk.vhd", + "gce": pathlib.Path(output_path) / "gce/image.tar.gz", + "anaconda-iso": pathlib.Path(output_path) / "bootiso/install.iso", + } + assert len(artifact) == len(set(tc.image for tc in gen_testcases("all"))), \ + "please keep artifact mapping and supported images in sync" + + # this helper checks the cache + results = [] + for image_type in image_types: + # TODO: properly cache amis here. The issue right now is that + # ami and raw are the same image on disk which means that if a test + # like "boots_in_aws" requests an ami it will get the raw file on + # disk. However that is not sufficient because part of the ami test + # is the upload to AWS and the generated metadata. The fix could be + # to make the boot-in-aws a new image type like "ami-aws" where we + # cache the metadata instead of the disk image. Alternatively we + # could stop testing ami locally at all and just skip any ami tests + # if there are no AWS credentials. + if image_type in CLOUD_BOOT_IMAGE_TYPES: + continue + generated_img = artifact[image_type] + print(f"Checking for cached image {image_type} -> {generated_img}") + if generated_img.exists(): + print(f"NOTE: reusing cached image {generated_img}") + journal_output = journal_log_path.read_text(encoding="utf8") + bib_output = bib_output_path.read_text(encoding="utf8") + results.append(ImageBuildResult( + image_type, generated_img, tc.target_arch, + container_ref, tc.rootfs, tc.disk_config, + username, password, + ssh_keyfile_private_path, kargs, bib_output, journal_output)) + + # generate new keyfile + if not ssh_keyfile_private_path.exists(): + subprocess.run([ + "ssh-keygen", + "-N", "", + # be very conservative with keys for paramiko + "-b", "2048", + "-t", "rsa", + "-f", os.fspath(ssh_keyfile_private_path), + ], check=True) + ssh_pubkey = ssh_keyfile_public_path.read_text(encoding="utf8").strip() + + # Because we always build all image types, regardless of what was requested, we should either have 0 results or all + # should be available, so if we found at least one result but not all of them, this is a problem with our setup + assert not results or len(results) == len(image_types), \ + f"unexpected number of results found: requested {image_types} but got {results}" + + if results: + yield results + return + + print(f"Requested {len(image_types)} images but found {len(results)} cached images. Building...") + + # not all requested image types are available - build them + cfg = { + "customizations": { + "user": [ + { + "name": "root", + "key": ssh_pubkey, + # cannot use default /root as is on a read-only place + "home": "/var/roothome", + }, { + "name": username, + "password": password, + "groups": ["wheel"], + }, + ], + "kernel": { + "append": kargs, + }, + }, + } + testutil.maybe_create_filesystem_customizations(cfg, tc) + testutil.maybe_create_disk_customizations(cfg, tc) + print(f"config for {output_path} {tc=}: {cfg=}") + + config_json_path = output_path / "config.json" + config_json_path.write_text(json.dumps(cfg), encoding="utf-8") + + cursor = testutil.journal_cursor() + + upload_args = [] + creds_args = [] + target_arch_args = [] + if tc.target_arch: + target_arch_args = ["--target-arch", tc.target_arch] + + with tempfile.TemporaryDirectory() as tempdir: + if "ami" in image_types: + creds_file = pathlib.Path(tempdir) / "aws.creds" + if testutil.write_aws_creds(creds_file): + creds_args = ["-v", f"{creds_file}:/root/.aws/credentials:ro", + "--env", "AWS_PROFILE=default"] + + upload_args = [ + f"--aws-ami-name=bootc-image-builder-test-{str(uuid.uuid4())}", + f"--aws-region={testutil.AWS_REGION}", + "--aws-bucket=bootc-image-builder-ci", + ] + elif force_aws_upload: + # upload forced but credentials aren't set + raise RuntimeError("AWS credentials not available (upload forced)") + + # all disk-image types can be generated via a single build + if image_types[0] in DISK_IMAGE_TYPES: + types_arg = [f"--type={it}" for it in DISK_IMAGE_TYPES] + else: + types_arg = [f"--type={image_types[0]}"] + + # run container to deploy an image into a bootable disk and upload to a cloud service if applicable + cmd = [ + *testutil.podman_run_common, + "-v", f"{config_json_path}:/config.json:ro", + "-v", f"{output_path}:/output", + "-v", "/var/tmp/osbuild-test-store:/store", # share the cache between builds + ] + + # we need to mount the host's container store + if tc.local: + cmd.extend(["-v", "/var/lib/containers/storage:/var/lib/containers/storage"]) + + if tc.sign: + sign_container_image(gpg_conf, registry_conf, tc.container_ref) + signed_image_args = [ + "-v", f"{registry_conf.policy_file}:/etc/containers/policy.json", + "-v", f"{registry_conf.lookaside_conf_file}:/etc/containers/registries.d/bib-lookaside.yaml", + "-v", f"{registry_conf.sigstore_dir}:{registry_conf.sigstore_dir}", + "-v", f"{gpg_conf.pub_key_file}:{gpg_conf.pub_key_file}", + ] + cmd.extend(signed_image_args) + + cmd.extend([ + *creds_args, + build_container, + container_ref, + *types_arg, + *upload_args, + *target_arch_args, + *tc.bib_rootfs_args(), + "--local" if tc.local else "--local=false", + "--tls-verify=false" if tc.sign else "--tls-verify=true" + ]) + + # print the build command for easier tracing + print(" ".join(cmd)) + p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True) + # not using subprocss.check_output() to ensure we get live output + # during the text + bib_output = "" + while True: + line = p.stdout.readline() + if not line: + break + print(line, end="") + bib_output += line + rc = p.wait(timeout=10) + assert rc == 0, f"bootc-image-builder failed with return code {rc}" + + journal_output = testutil.journal_after_cursor(cursor) + metadata = {} + if "ami" in image_types and upload_args: + metadata["ami_id"] = parse_ami_id_from_log(journal_output) + + def del_ami(): + testutil.deregister_ami(metadata["ami_id"]) + request.addfinalizer(del_ami) + + journal_log_path.write_text(journal_output, encoding="utf8") + bib_output_path.write_text(bib_output, encoding="utf8") + + results = [] + for image_type in image_types: results.append(ImageBuildResult( - image_type, generated_img, tc.target_arch, + image_type, artifact[image_type], tc.target_arch, container_ref, tc.rootfs, tc.disk_config, username, password, - ssh_keyfile_private_path, kargs, bib_output, journal_output)) - - # generate new keyfile - if not ssh_keyfile_private_path.exists(): - subprocess.run([ - "ssh-keygen", - "-N", "", - # be very conservative with keys for paramiko - "-b", "2048", - "-t", "rsa", - "-f", os.fspath(ssh_keyfile_private_path), - ], check=True) - ssh_pubkey = ssh_keyfile_public_path.read_text(encoding="utf8").strip() - - # Because we always build all image types, regardless of what was requested, we should either have 0 results or all - # should be available, so if we found at least one result but not all of them, this is a problem with our setup - assert not results or len(results) == len(image_types), \ - f"unexpected number of results found: requested {image_types} but got {results}" - - if results: + ssh_keyfile_private_path, kargs, bib_output, journal_output, metadata)) yield results - return - - print(f"Requested {len(image_types)} images but found {len(results)} cached images. Building...") - - # not all requested image types are available - build them - cfg = { - "customizations": { - "user": [ - { - "name": "root", - "key": ssh_pubkey, - # cannot use default /root as is on a read-only place - "home": "/var/roothome", - }, { - "name": username, - "password": password, - "groups": ["wheel"], - }, - ], - "kernel": { - "append": kargs, - }, - }, - } - testutil.maybe_create_filesystem_customizations(cfg, tc) - testutil.maybe_create_disk_customizations(cfg, tc) - print(f"config for {output_path} {tc=}: {cfg=}") - - config_json_path = output_path / "config.json" - config_json_path.write_text(json.dumps(cfg), encoding="utf-8") - - cursor = testutil.journal_cursor() - - upload_args = [] - creds_args = [] - target_arch_args = [] - if tc.target_arch: - target_arch_args = ["--target-arch", tc.target_arch] - - with tempfile.TemporaryDirectory() as tempdir: - if "ami" in image_types: - creds_file = pathlib.Path(tempdir) / "aws.creds" - if testutil.write_aws_creds(creds_file): - creds_args = ["-v", f"{creds_file}:/root/.aws/credentials:ro", - "--env", "AWS_PROFILE=default"] - - upload_args = [ - f"--aws-ami-name=bootc-image-builder-test-{str(uuid.uuid4())}", - f"--aws-region={testutil.AWS_REGION}", - "--aws-bucket=bootc-image-builder-ci", - ] - elif force_aws_upload: - # upload forced but credentials aren't set - raise RuntimeError("AWS credentials not available (upload forced)") - - # all disk-image types can be generated via a single build - if image_types[0] in DISK_IMAGE_TYPES: - types_arg = [f"--type={it}" for it in DISK_IMAGE_TYPES] - else: - types_arg = [f"--type={image_types[0]}"] - - # run container to deploy an image into a bootable disk and upload to a cloud service if applicable - cmd = [ - *testutil.podman_run_common, - "-v", f"{config_json_path}:/config.json:ro", - "-v", f"{output_path}:/output", - "-v", "/var/tmp/osbuild-test-store:/store", # share the cache between builds - ] - - # we need to mount the host's container store - if tc.local: - cmd.extend(["-v", "/var/lib/containers/storage:/var/lib/containers/storage"]) - - if tc.sign: - sign_container_image(gpg_conf, registry_conf, tc.container_ref) - signed_image_args = [ - "-v", f"{registry_conf.policy_file}:/etc/containers/policy.json", - "-v", f"{registry_conf.lookaside_conf_file}:/etc/containers/registries.d/bib-lookaside.yaml", - "-v", f"{registry_conf.sigstore_dir}:{registry_conf.sigstore_dir}", - "-v", f"{gpg_conf.pub_key_file}:{gpg_conf.pub_key_file}", - ] - cmd.extend(signed_image_args) - - cmd.extend([ - *creds_args, - build_container, - container_ref, - *types_arg, - *upload_args, - *target_arch_args, - *tc.bib_rootfs_args(), - "--local" if tc.local else "--local=false", - "--tls-verify=false" if tc.sign else "--tls-verify=true" - ]) - # print the build command for easier tracing - print(" ".join(cmd)) - p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True) - # not using subprocss.check_output() to ensure we get live output - # during the text - bib_output = "" - while True: - line = p.stdout.readline() - if not line: - break - print(line, end="") - bib_output += line - rc = p.wait(timeout=10) - assert rc == 0, f"bootc-image-builder failed with return code {rc}" - - journal_output = testutil.journal_after_cursor(cursor) - metadata = {} - if "ami" in image_types and upload_args: - metadata["ami_id"] = parse_ami_id_from_log(journal_output) - - def del_ami(): - testutil.deregister_ami(metadata["ami_id"]) - request.addfinalizer(del_ami) - - journal_log_path.write_text(journal_output, encoding="utf8") - bib_output_path.write_text(bib_output, encoding="utf8") - - results = [] - for image_type in image_types: - results.append(ImageBuildResult( - image_type, artifact[image_type], tc.target_arch, - container_ref, tc.rootfs, tc.disk_config, - username, password, - ssh_keyfile_private_path, kargs, bib_output, journal_output, metadata)) - yield results - - # Try to cache as much as possible - for image_type in image_types: - img = artifact[image_type] - print(f"Checking disk usage for {img}") - if os.path.exists(img): - # might already be removed if we're deleting 'raw' and 'ami' - disk_usage = shutil.disk_usage(img) - print(f"NOTE: disk usage after {img}: {disk_usage.free / 1_000_000} / {disk_usage.total / 1_000_000}") - if disk_usage.free < 1_000_000_000: - print(f"WARNING: running low on disk space, removing {img}") - img.unlink() - else: - print("does not exist") - subprocess.run(["podman", "rmi", container_ref], check=False) - return + # Try to cache as much as possible + for image_type in image_types: + img = artifact[image_type] + print(f"Checking disk usage for {img}") + if os.path.exists(img): + # might already be removed if we're deleting 'raw' and 'ami' + disk_usage = shutil.disk_usage(img) + print(f"NOTE: disk usage after {img}: {disk_usage.free / 1_000_000} / {disk_usage.total / 1_000_000}") + if disk_usage.free < 1_000_000_000: + print(f"WARNING: running low on disk space, removing {img}") + img.unlink() + else: + print("does not exist") + subprocess.run(["podman", "rmi", container_ref], check=False) + return def test_container_builds(build_container): From 71836cae43b46acc356b518a99e431f445f1b13f Mon Sep 17 00:00:00 2001 From: Michael Vogt Date: Fri, 10 Jan 2025 13:22:08 +0100 Subject: [PATCH 2/3] test: drop indirect fixtures as they are not parallel-izable --- test/containerbuild.py | 2 +- test/test_build.py | 110 +++++++++++++++++++---------------------- 2 files changed, 51 insertions(+), 61 deletions(-) diff --git a/test/containerbuild.py b/test/containerbuild.py index 2103f640..027e643c 100644 --- a/test/containerbuild.py +++ b/test/containerbuild.py @@ -57,6 +57,7 @@ def build_fake_container_fixture(shared_tmpdir, build_container): """Build a container with a fake osbuild and returns the name""" tmp_path = shared_tmpdir / "build-fake-container" + container_tag = "bootc-image-builder-test-faked-osbuild" with FileLock(tmp_path + ".lock"): if tmp_path.exists(): return container_tag @@ -99,7 +100,6 @@ def build_fake_container_fixture(shared_tmpdir, build_container): RUN chmod 755 /usr/bin/podman """), encoding="utf8") - container_tag = "bootc-image-builder-test-faked-osbuild" subprocess.check_call([ "podman", "build", "-t", container_tag, diff --git a/test/test_build.py b/test/test_build.py index efcd2d6a..7a5c1313 100644 --- a/test/test_build.py +++ b/test/test_build.py @@ -1,15 +1,15 @@ +import atexit import json import os import pathlib import platform import random import re -import shutil import string import subprocess import tempfile import uuid -from contextlib import contextmanager, ExitStack +from contextlib import ExitStack from dataclasses import dataclass from typing import NamedTuple @@ -208,22 +208,21 @@ def sign_container_image(gpg_conf: GPGConf, registry_conf: RegistryConf, contain subprocess.run(cmd, check=True, env={"GNUPGHOME": gpg_conf.home_dir}) -@pytest.fixture(name="image_type", scope="session") # pylint: disable=too-many-arguments -def image_type_fixture(shared_tmpdir, build_container, request, force_aws_upload, gpg_conf, registry_conf): +def image_type_fixture(shared_tmpdir, build_container, tc, force_aws_upload=False, gpg_conf=None, registry_conf=None): """ Build an image inside the passed build_container and return an ImageBuildResult with the resulting image path and user/password In the case an image is being built from a local container, the function will build the required local container for the test. """ - container_ref = request.param.container_ref + container_ref = tc.container_ref - if request.param.local: + if tc.local: cont_tag = "localhost/cont-base-" + "".join(random.choices(string.digits, k=12)) # we are not cross-building local images (for now) - request.param.target_arch = "" + tc.target_arch = "" # copy the container into containers-storage subprocess.check_call([ @@ -232,41 +231,34 @@ def image_type_fixture(shared_tmpdir, build_container, request, force_aws_upload f"containers-storage:[overlay@/var/lib/containers/storage+/run/containers/storage]{cont_tag}" ]) - with build_images(shared_tmpdir, build_container, - request, force_aws_upload, gpg_conf, registry_conf) as build_results: - yield build_results[0] + build_results = build_images(shared_tmpdir, build_container, + tc, force_aws_upload, gpg_conf, registry_conf) + return build_results[0] -@pytest.fixture(name="images", scope="session") # pylint: disable=too-many-arguments -def images_fixture(shared_tmpdir, build_container, request, force_aws_upload, gpg_conf, registry_conf): +def images_fixture(shared_tmpdir, build_container, request, force_aws_upload=False, gpg_conf=None, registry_conf=None): """ Build one or more images inside the passed build_container and return an ImageBuildResult array with the resulting image path and user/password """ - with build_images(shared_tmpdir, build_container, - request, force_aws_upload, gpg_conf, registry_conf) as build_results: - yield build_results + return build_images(shared_tmpdir, build_container, + request, force_aws_upload, gpg_conf, registry_conf) # XXX: refactor # pylint: disable=too-many-locals,too-many-branches,too-many-statements,too-many-arguments -@contextmanager -def build_images(shared_tmpdir, build_container, request, force_aws_upload, gpg_conf, registry_conf): +def build_images(shared_tmpdir, build_container, tc, force_aws_upload, gpg_conf, registry_conf): """ Build all available image types if necessary and return the results for the image types that were requested via :request:. Will return cached results of previous build requests. - - :request.param: has the form "container_url,img_type1+img_type2,arch,local" """ - # the testcases.TestCase comes from the request.parameter - tc = request.param # images might be multiple --type args # split and check each one - image_types = request.param.image.split("+") + image_types = tc.image.split("+") username = "test" password = "password" @@ -288,7 +280,8 @@ def build_images(shared_tmpdir, build_container, request, force_aws_upload, gpg_ with FileLock(fn): # params can be long and the qmp socket (that has a limit of 100ish # AF_UNIX) is derived from the path - # hash the container_ref+target_arch, but exclude the image_type so that the output path is shared between calls to + # hash the container_ref+target_arch, but exclude the image_type + # so that the output path is shared between calls to # different image type combinations output_path = shared_tmpdir / img_id output_path.mkdir(exist_ok=True) @@ -353,14 +346,15 @@ def build_images(shared_tmpdir, build_container, request, force_aws_upload, gpg_ ], check=True) ssh_pubkey = ssh_keyfile_public_path.read_text(encoding="utf8").strip() - # Because we always build all image types, regardless of what was requested, we should either have 0 results or all - # should be available, so if we found at least one result but not all of them, this is a problem with our setup + # Because we always build all image types, regardless of what + # was requested, we should either have 0 results or all should + # be available, so if we found at least one result but not all + # of them, this is a problem with our setup assert not results or len(results) == len(image_types), \ f"unexpected number of results found: requested {image_types} but got {results}" if results: - yield results - return + return results print(f"Requested {len(image_types)} images but found {len(results)} cached images. Building...") @@ -477,7 +471,9 @@ def build_images(shared_tmpdir, build_container, request, force_aws_upload, gpg_ def del_ami(): testutil.deregister_ami(metadata["ami_id"]) - request.addfinalizer(del_ami) + # XXX: ??? + # request.addfinalizer(del_ami) + atexit.register(del_ami) journal_log_path.write_text(journal_output, encoding="utf8") bib_output_path.write_text(bib_output, encoding="utf8") @@ -489,23 +485,9 @@ def del_ami(): container_ref, tc.rootfs, tc.disk_config, username, password, ssh_keyfile_private_path, kargs, bib_output, journal_output, metadata)) - yield results - - # Try to cache as much as possible - for image_type in image_types: - img = artifact[image_type] - print(f"Checking disk usage for {img}") - if os.path.exists(img): - # might already be removed if we're deleting 'raw' and 'ami' - disk_usage = shutil.disk_usage(img) - print(f"NOTE: disk usage after {img}: {disk_usage.free / 1_000_000} / {disk_usage.total / 1_000_000}") - if disk_usage.free < 1_000_000_000: - print(f"WARNING: running low on disk space, removing {img}") - img.unlink() - else: - print("does not exist") + # XXX: ? subprocess.run(["podman", "rmi", container_ref], check=False) - return + return results def test_container_builds(build_container): @@ -514,8 +496,9 @@ def test_container_builds(build_container): assert build_container in output -@pytest.mark.parametrize("image_type", gen_testcases("multidisk"), indirect=["image_type"]) -def test_image_is_generated(image_type): +@pytest.mark.parametrize("tc", gen_testcases("multidisk")) +def test_image_is_generated(shared_tmpdir, build_container, tc): + image_type = image_type_fixture(shared_tmpdir, build_container, tc) assert image_type.img_path.exists(), "output file missing, dir "\ f"content: {os.listdir(os.fspath(image_type.img_path))}" @@ -529,8 +512,9 @@ def assert_kernel_args(test_vm, image_type): @pytest.mark.skipif(platform.system() != "Linux", reason="boot test only runs on linux right now") -@pytest.mark.parametrize("image_type", gen_testcases("qemu-boot"), indirect=["image_type"]) -def test_image_boots(image_type): +@pytest.mark.parametrize("tc", gen_testcases("qemu-boot")) +def test_image_boots(shared_tmpdir, build_container, tc): + image_type = image_type_fixture(shared_tmpdir, build_container, tc) with QEMU(image_type.img_path, arch=image_type.img_arch) as test_vm: # user/password login works exit_status, _ = test_vm.run("true", user=image_type.username, password=image_type.password) @@ -552,8 +536,9 @@ def test_image_boots(image_type): assert_fs_customizations(image_type, test_vm) -@pytest.mark.parametrize("image_type", gen_testcases("ami-boot"), indirect=["image_type"]) -def test_ami_boots_in_aws(image_type, force_aws_upload): +@pytest.mark.parametrize("tc", gen_testcases("ami-boot")) +def test_ami_boots_in_aws(shared_tmpdir, build_container, tc, force_aws_upload): + image_type = image_type_fixture(shared_tmpdir, build_container, tc, force_aws_upload=force_aws_upload) if not testutil.write_aws_creds("/dev/null"): # we don't care about the file, just the variables being there if force_aws_upload: # upload forced but credentials aren't set @@ -605,8 +590,9 @@ def has_selinux(): @pytest.mark.skipif(not has_selinux(), reason="selinux not enabled") -@pytest.mark.parametrize("image_type", gen_testcases("qemu-boot"), indirect=["image_type"]) -def test_image_build_without_se_linux_denials(image_type): +@pytest.mark.parametrize("tc", gen_testcases("qemu-boot")) +def test_image_build_without_se_linux_denials(shared_tmpdir, build_container, tc): + image_type = image_type_fixture(shared_tmpdir, build_container, tc) # the journal always contains logs from the image building assert image_type.journal_output != "" assert not log_has_osbuild_selinux_denials(image_type.journal_output), \ @@ -614,8 +600,9 @@ def test_image_build_without_se_linux_denials(image_type): @pytest.mark.skipif(platform.system() != "Linux", reason="boot test only runs on linux right now") -@pytest.mark.parametrize("image_type", gen_testcases("anaconda-iso"), indirect=["image_type"]) -def test_iso_installs(image_type): +@pytest.mark.parametrize("tc", gen_testcases("anaconda-iso")) +def test_iso_installs(shared_tmpdir, build_container, tc): + image_type = image_type_fixture(shared_tmpdir, build_container, tc) installer_iso_path = image_type.img_path test_disk_path = installer_iso_path.with_name("test-disk.img") with open(test_disk_path, "w", encoding="utf8") as fp: @@ -649,8 +636,9 @@ def osinfo_for(it: ImageBuildResult, arch: str) -> str: @pytest.mark.skipif(platform.system() != "Linux", reason="osinfo detect test only runs on linux right now") -@pytest.mark.parametrize("image_type", gen_testcases("anaconda-iso"), indirect=["image_type"]) -def test_iso_os_detection(image_type): +@pytest.mark.parametrize("tc", gen_testcases("anaconda-iso")) +def test_iso_os_detection(shared_tmpdir, build_container, tc): + image_type = image_type_fixture(shared_tmpdir, build_container, tc) installer_iso_path = image_type.img_path arch = image_type.img_arch if not arch: @@ -666,8 +654,9 @@ def test_iso_os_detection(image_type): @pytest.mark.skipif(platform.system() != "Linux", reason="osinfo detect test only runs on linux right now") @pytest.mark.skipif(not testutil.has_executable("unsquashfs"), reason="need unsquashfs") -@pytest.mark.parametrize("image_type", gen_testcases("anaconda-iso"), indirect=["image_type"]) -def test_iso_install_img_is_squashfs(tmp_path, image_type): +@pytest.mark.parametrize("tc", gen_testcases("anaconda-iso")) +def test_iso_install_img_is_squashfs(tmp_path, shared_tmpdir, build_container, tc): + image_type = image_type_fixture(shared_tmpdir, build_container, tc) installer_iso_path = image_type.img_path with ExitStack() as cm: mount_point = tmp_path / "cdrom" @@ -680,8 +669,9 @@ def test_iso_install_img_is_squashfs(tmp_path, image_type): assert "usr/bin/bootc" in output -@pytest.mark.parametrize("images", gen_testcases("multidisk"), indirect=["images"]) -def test_multi_build_request(images): +@pytest.mark.parametrize("tc", gen_testcases("multidisk")) +def test_multi_build_request(shared_tmpdir, build_container, tc): + images = images_fixture(shared_tmpdir, build_container, tc) artifacts = set() expected = {"disk.qcow2", "disk.raw", "disk.vhd", "disk.vmdk", "image.tar.gz"} for result in images: From 5d6dcedad2e3d449d56ed6e20a17a05b98e2f050 Mon Sep 17 00:00:00 2001 From: Michael Vogt Date: Fri, 10 Jan 2025 22:49:24 +0100 Subject: [PATCH 3/3] workflow: run 8 tests in parallel (to see the effects) --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index e06cb9ff..b879e5d1 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -132,7 +132,7 @@ jobs: # TODO: figure out what exactly podman needs # TODO2: figure out how much wecan run in parallel before running # out of diskspace - sudo -E XDG_RUNTIME_DIR= pytest-3 --basetemp=/mnt/var/tmp/bib-tests -n 4 --dist worksteal --maxschedchunk=1 + sudo -E XDG_RUNTIME_DIR= pytest-3 --basetemp=/mnt/var/tmp/bib-tests -n 8 --dist worksteal --maxschedchunk=1 - name: Diskspace (after) if: ${{ always() }} run: |