From 1f9ffa33b08d66b2302c3bc57f5414ed90ae416c Mon Sep 17 00:00:00 2001 From: "Louise K. Schmidtgen" Date: Tue, 12 Nov 2024 19:26:57 +0100 Subject: [PATCH] Performance Test Set-up (1) (#197) --- .github/workflows/e2e.yml | 4 + .github/workflows/performance.yaml | 141 ++++++++ test/performance/.copyright.tmpl | 1 + test/performance/Readme.md | 60 ++++ test/performance/lxd-profile.yaml | 105 ++++++ test/performance/requirements-dev.txt | 5 + test/performance/requirements-test.txt | 5 + test/performance/tests/conftest.py | 189 ++++++++++ test/performance/tests/test_performance.py | 32 ++ test/performance/tests/test_util/config.py | 45 +++ .../tests/test_util/harness/__init__.py | 11 + .../tests/test_util/harness/base.py | 114 ++++++ .../tests/test_util/harness/lxd.py | 181 ++++++++++ test/performance/tests/test_util/util.py | 324 ++++++++++++++++++ test/performance/tox.ini | 51 +++ 15 files changed, 1268 insertions(+) create mode 100644 .github/workflows/performance.yaml create mode 100644 test/performance/.copyright.tmpl create mode 100644 test/performance/Readme.md create mode 100644 test/performance/lxd-profile.yaml create mode 100644 test/performance/requirements-dev.txt create mode 100644 test/performance/requirements-test.txt create mode 100644 test/performance/tests/conftest.py create mode 100644 test/performance/tests/test_performance.py create mode 100644 test/performance/tests/test_util/config.py create mode 100644 test/performance/tests/test_util/harness/__init__.py create mode 100644 test/performance/tests/test_util/harness/base.py create mode 100644 test/performance/tests/test_util/harness/lxd.py create mode 100644 test/performance/tests/test_util/util.py create mode 100644 test/performance/tox.ini diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml index dbd60d85..5429d51a 100644 --- a/.github/workflows/e2e.yml +++ b/.github/workflows/e2e.yml @@ -5,6 +5,10 @@ on: branches: [master] pull_request: +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + jobs: test: name: Test diff --git a/.github/workflows/performance.yaml b/.github/workflows/performance.yaml new file mode 100644 index 00000000..127c4cb9 --- /dev/null +++ b/.github/workflows/performance.yaml @@ -0,0 +1,141 @@ +name: Performance Test K8s-snap + +on: + push: + branches: ["master"] + pull_request: + +permissions: + contents: read + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +env: + BASE_SHA: ${{ github.before || github.event.pull_request.base.sha }} + BASE_BRANCH: ${{ github.base_ref || github.ref }} + TARGET_SHA: ${{ github.sha }} + +jobs: + build: + name: K8s-snap Performance Test + runs-on: ubuntu-20.04 + steps: + - name: Harden Runner + uses: step-security/harden-runner@v2 + with: + egress-policy: audit + - name: Checking out repo + uses: actions/checkout@v4 + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: "3.10" + - name: Install tox + run: | + pip install tox + - name: Install Go + uses: actions/setup-go@v5 + with: + go-version: "1.22" + - name: Install lxd + run: | + sudo snap refresh lxd --channel 5.21/stable + sudo lxd init --auto + sudo usermod --append --groups lxd $USER + sg lxd -c 'lxc version' + - name: Ensure lxd network traffic flows by removing docker if installed + run: | + if command -v docker >/dev/null 2>&1; then + echo "Docker is installed, purging it" + sudo apt-get purge -y docker-engine docker docker.io docker-ce docker-ce-cli containerd runc + fi + - name: Download latest k8s-snap + run: | + sudo snap download k8s --channel=latest/edge --basename k8s + - name: Unpack Snap + run: | + sudo unsquashfs -d snap-unpack-dir k8s.snap + - name: Create snap with k8s-dqlite ${{ github.head_ref }} + run: | + make static + sudo cp ./bin/static/k8s-dqlite snap-unpack-dir/bin/k8s-dqlite + sudo chmod o+r snap-unpack-dir/bin/k8s-dqlite + sudo mksquashfs snap-unpack-dir head.snap -noappend -comp lzo -no-fragments + - name: Run Performance test ${{ github.head_ref }} snap + env: + TEST_SNAP: ${{ github.workspace }}/head.snap + TEST_SUBSTRATE: lxd + TEST_LXD_IMAGE: ubuntu:22.04 + TEST_INSPECTION_REPORTS_DIR: ${{ github.workspace }}/inspection-reports + run: | + cd test/performance && sg lxd -c 'tox -e performance' + - name: Create snap with k8s-dqlite base code + run: | + set -o pipefail + git fetch origin $BASE_BRANCH + git reset --hard $BASE_SHA + make static + sudo cp ./bin/static/k8s-dqlite snap-unpack-dir/bin/k8s-dqlite + sudo chmod o+r snap-unpack-dir/bin/k8s-dqlite + sudo mksquashfs snap-unpack-dir base-code.snap -noappend -comp lzo -no-fragments + - name: Switch back to target branch + run: git reset --hard $TARGET_SHA + - name: Run Performance test for base code snap + env: + TEST_SNAP: ${{ github.workspace }}/base-code.snap + TEST_SUBSTRATE: lxd + TEST_LXD_IMAGE: ubuntu:22.04 + TEST_INSPECTION_REPORTS_DIR: ${{ github.workspace }}/inspection-reports + run: | + cd test/performance && sg lxd -c 'tox -e performance' + - name: Create snap with k8s-dqlite v1.1.11 + run: | + set -o pipefail + git fetch origin --tags + git reset --hard v1.1.11 + make static + sudo cp ./bin/static/k8s-dqlite snap-unpack-dir/bin/k8s-dqlite + sudo chmod o+r snap-unpack-dir/bin/k8s-dqlite + sudo mksquashfs snap-unpack-dir v1-1-11.snap -noappend -comp lzo -no-fragments + - name: Switch back to target branch + run: git reset --hard $TARGET_SHA + - name: Run Performance test for v1.1.11 snap + env: + TEST_SNAP: ${{ github.workspace }}/v1-1-11.snap + TEST_SUBSTRATE: lxd + TEST_LXD_IMAGE: ubuntu:22.04 + TEST_INSPECTION_REPORTS_DIR: ${{ github.workspace }}/inspection-reports + run: | + cd test/performance && sg lxd -c 'tox -e performance' + - name: Create snap with k8s-dqlite v1.2.0 + run: | + set -o pipefail + git fetch origin --tags + git reset --hard v1.2.0 + make static + sudo cp ./bin/static/k8s-dqlite snap-unpack-dir/bin/k8s-dqlite + sudo chmod o+r snap-unpack-dir/bin/k8s-dqlite + sudo mksquashfs snap-unpack-dir v1-2-0.snap -noappend -comp lzo -no-fragments + - name: Switch back to target branch + run: git reset --hard $TARGET_SHA + - name: Run Performance test for v1.2.0 snap + env: + TEST_SNAP: ${{ github.workspace }}/v1-2-0.snap + TEST_SUBSTRATE: lxd + TEST_LXD_IMAGE: ubuntu:22.04 + TEST_INSPECTION_REPORTS_DIR: ${{ github.workspace }}/inspection-reports + run: | + cd test/performance && sg lxd -c 'tox -e performance' + - name: Prepare inspection reports + if: failure() + run: | + tar -czvf inspection-reports.tar.gz -C ${{ github.workspace }} inspection-reports + echo "artifact_name=inspection-reports-${{ matrix.os }}" | sed 's/:/-/g' >> $GITHUB_ENV + - name: Upload inspection report artifact + if: failure() + uses: actions/upload-artifact@v4 + with: + name: ${{ env.artifact_name }} + path: ${{ github.workspace }}/inspection-reports.tar.gz diff --git a/test/performance/.copyright.tmpl b/test/performance/.copyright.tmpl new file mode 100644 index 00000000..ecbed6c7 --- /dev/null +++ b/test/performance/.copyright.tmpl @@ -0,0 +1 @@ +Copyright ${years} ${owner}. diff --git a/test/performance/Readme.md b/test/performance/Readme.md new file mode 100644 index 00000000..9bcabdd5 --- /dev/null +++ b/test/performance/Readme.md @@ -0,0 +1,60 @@ +# Performance Testing + +## Overview + +End to end tests are written in Python. They are built on top of a [Harness](./tests/conftest.py) fixture so that they can run on multiple environments like LXD or in the future on the local machine. + +End to end tests can be configured using environment variables. You can see all available options in [./tests/config.py](./tests/config.py). + +## Running end to end tests + +Running the end to end tests requires `python3` and `tox`. Install with: + +```bash +sudo apt install python3-virtualenv +virtualenv .venv +. .venv/bin/activate +pip install 'tox<5' +``` + +Further, make sure that you have downloaded the `k8s.snap`: + +```bash +sudo snap download k8s --channel=latest/edge --basename k8s +``` + +In general, all end to end tests will require specifying the local path to the snap package under test, using the `TEST_SNAP` environment variable. Make sure to specify the full path to the file. + +End to end tests are typically run with: `cd test/performance && tox -e performance` + +### Running end to end tests on the local machine + +```bash +export TEST_SNAP=$PWD/k8s.snap +export TEST_SUBSTRATE=local + +cd test/performance && tox -e performance +``` + +> *NOTE*: When running locally, end to end tests that create more than one instance will fail. + +### Running end to end tests on LXD containers + +First, make sure that you have initialized LXD: + +```bash +sudo lxd init --auto +``` + +Then, run the tests with: + +```bash +export TEST_SNAP=$PWD/k8s.snap +export TEST_SUBSTRATE=lxd + +export TEST_LXD_IMAGE=ubuntu:22.04 # (optionally) specify which image to use for LXD containers +export TEST_LXD_PROFILE_NAME=k8s-performance # (optionally) specify profile name to configure +export TEST_SKIP_CLEANUP=1 # (optionally) do not destroy machines after tests finish + +cd test/performance && tox -e performance +``` diff --git a/test/performance/lxd-profile.yaml b/test/performance/lxd-profile.yaml new file mode 100644 index 00000000..c6a05f38 --- /dev/null +++ b/test/performance/lxd-profile.yaml @@ -0,0 +1,105 @@ +description: "LXD profile for Canonical Kubernetes" +config: + linux.kernel_modules: ip_vs,ip_vs_rr,ip_vs_wrr,ip_vs_sh,ip_tables,ip6_tables,iptable_raw,netlink_diag,nf_nat,overlay,br_netfilter,xt_socket + raw.lxc: | + lxc.apparmor.profile=unconfined + lxc.mount.auto=proc:rw sys:rw cgroup:rw + lxc.cgroup.devices.allow=a + lxc.cap.drop= + security.nesting: "true" + security.privileged: "true" +devices: + aadisable2: + path: /dev/kmsg + source: /dev/kmsg + type: unix-char + dev-loop-control: + major: "10" + minor: "237" + path: /dev/loop-control + type: unix-char + dev-loop0: + major: "7" + minor: "0" + path: /dev/loop0 + type: unix-block + dev-loop1: + major: "7" + minor: "1" + path: /dev/loop1 + type: unix-block + dev-loop2: + major: "7" + minor: "2" + path: /dev/loop2 + type: unix-block + dev-loop3: + major: "7" + minor: "3" + path: /dev/loop3 + type: unix-block + dev-loop4: + major: "7" + minor: "4" + path: /dev/loop4 + type: unix-block + dev-loop5: + major: "7" + minor: "5" + path: /dev/loop5 + type: unix-block + dev-loop6: + major: "7" + minor: "6" + path: /dev/loop6 + type: unix-block + dev-loop7: + major: "7" + minor: "7" + path: /dev/loop7 + type: unix-block + dev-loop8: + major: "7" + minor: "8" + path: /dev/loop8 + type: unix-block + dev-loop9: + major: "7" + minor: "9" + path: /dev/loop9 + type: unix-block + dev-loop10: + major: "7" + minor: "10" + path: /dev/loop10 + type: unix-block + dev-loop11: + major: "7" + minor: "11" + path: /dev/loop11 + type: unix-block + dev-loop12: + major: "7" + minor: "12" + path: /dev/loop12 + type: unix-block + dev-loop13: + major: "7" + minor: "13" + path: /dev/loop13 + type: unix-block + dev-loop14: + major: "7" + minor: "14" + path: /dev/loop14 + type: unix-block + dev-loop15: + major: "7" + minor: "15" + path: /dev/loop15 + type: unix-block + dev-loop16: + major: "7" + minor: "16" + path: /dev/loop16 + type: unix-block diff --git a/test/performance/requirements-dev.txt b/test/performance/requirements-dev.txt new file mode 100644 index 00000000..a66721ae --- /dev/null +++ b/test/performance/requirements-dev.txt @@ -0,0 +1,5 @@ +black==24.3.0 +codespell==2.2.4 +flake8==6.0.0 +isort==5.12.0 +licenseheaders==0.8.8 diff --git a/test/performance/requirements-test.txt b/test/performance/requirements-test.txt new file mode 100644 index 00000000..91282e09 --- /dev/null +++ b/test/performance/requirements-test.txt @@ -0,0 +1,5 @@ +coverage[toml]==7.2.5 +pytest==7.3.1 +PyYAML==6.0.1 +tenacity==8.2.3 +pylint==3.2.5 diff --git a/test/performance/tests/conftest.py b/test/performance/tests/conftest.py new file mode 100644 index 00000000..ad815994 --- /dev/null +++ b/test/performance/tests/conftest.py @@ -0,0 +1,189 @@ +# +# Copyright 2024 Canonical, Ltd.# +import itertools +import logging +from pathlib import Path +from typing import Generator, Iterator, List, Optional, Union + +import pytest +from test_util import config, harness, util + +LOG = logging.getLogger(__name__) + + +def _harness_clean(h: harness.Harness): + "Clean up created instances within the test harness." + + if config.SKIP_CLEANUP: + LOG.warning( + "Skipping harness cleanup. " + "It is your job now to clean up cloud resources" + ) + else: + LOG.debug("Cleanup") + h.cleanup() + + +def _generate_inspection_report(h: harness.Harness, instance_id: str): + LOG.debug("Generating inspection report for %s", instance_id) + + inspection_path = Path(config.INSPECTION_REPORTS_DIR) + result = h.exec( + instance_id, + ["/snap/k8s/current/k8s/scripts/inspect.sh", "/inspection-report.tar.gz"], + capture_output=True, + text=True, + check=False, + ) + + (inspection_path / instance_id).mkdir(parents=True, exist_ok=True) + (inspection_path / instance_id / "inspection_report_logs.txt").write_text( + result.stdout + ) + + try: + h.pull_file( + instance_id, + "/inspection-report.tar.gz", + (inspection_path / instance_id / "inspection_report.tar.gz").as_posix(), + ) + except harness.HarnessError as e: + LOG.warning("Failed to pull inspection report: %s", e) + + +@pytest.fixture(scope="session") +def h() -> harness.Harness: + LOG.debug("Create harness for %s", config.SUBSTRATE) + # if config.SUBSTRATE == "local": + # h = harness.LocalHarness() + if config.SUBSTRATE == "lxd": + h = harness.LXDHarness() + else: + raise harness.HarnessError( + "TEST_SUBSTRATE must be one of: local, lxd, multipass, juju" + ) + + yield h + + if config.INSPECTION_REPORTS_DIR is not None: + for instance_id in h.instances: + LOG.debug("Generating inspection reports for session instances") + _generate_inspection_report(h, instance_id) + + _harness_clean(h) + + +def pytest_configure(config): + config.addinivalue_line( + "markers", + "bootstrap_config: Provide a custom bootstrap config to the bootstrapping node.\n" + "disable_k8s_bootstrapping: By default, the first k8s node is bootstrapped. This marker disables that.\n" + "no_setup: No setup steps (pushing snap, bootstrapping etc.) are performed on any node for this test.\n" + "network_type: Specify network type to use for the infrastructure (IPv4, Dualstack or IPv6).\n" + "etcd_count: Mark a test to specify how many etcd instance nodes need to be created (None by default)\n" + "node_count: Mark a test to specify how many instance nodes need to be created\n" + "snap_versions: Mark a test to specify snap_versions for each node\n", + ) + + +@pytest.fixture(scope="function") +def node_count(request) -> int: + node_count_marker = request.node.get_closest_marker("node_count") + if not node_count_marker: + return 1 + node_count_arg, *_ = node_count_marker.args + return int(node_count_arg) + + +def snap_versions(request) -> Iterator[Optional[str]]: + """An endless iterable of snap versions for each node in the test.""" + marking = () + if snap_version_marker := request.node.get_closest_marker("snap_versions"): + marking, *_ = snap_version_marker.args + # endlessly repeat of the configured snap version after exhausting the marking + return itertools.chain(marking, itertools.repeat(None)) + + +@pytest.fixture(scope="function") +def disable_k8s_bootstrapping(request) -> bool: + return bool(request.node.get_closest_marker("disable_k8s_bootstrapping")) + + +@pytest.fixture(scope="function") +def no_setup(request) -> bool: + return bool(request.node.get_closest_marker("no_setup")) + + +@pytest.fixture(scope="function") +def bootstrap_config(request) -> Union[str, None]: + bootstrap_config_marker = request.node.get_closest_marker("bootstrap_config") + if not bootstrap_config_marker: + return None + config, *_ = bootstrap_config_marker.args + return config + + +@pytest.fixture(scope="function") +def network_type(request) -> Union[str, None]: + bootstrap_config_marker = request.node.get_closest_marker("network_type") + if not bootstrap_config_marker: + return "IPv4" + network_type, *_ = bootstrap_config_marker.args + return network_type + + +@pytest.fixture(scope="function") +def instances( + h: harness.Harness, + node_count: int, + tmp_path: Path, + disable_k8s_bootstrapping: bool, + no_setup: bool, + bootstrap_config: Union[str, None], + request, + network_type: str, +) -> Generator[List[harness.Instance], None, None]: + """Construct instances for a cluster. + + Bootstrap and setup networking on the first instance, if `disable_k8s_bootstrapping` marker is not set. + """ + if node_count <= 0: + pytest.xfail("Test requested 0 or fewer instances, skip this test.") + + LOG.info(f"Creating {node_count} instances") + instances: List[harness.Instance] = [] + + for _, snap in zip(range(node_count), snap_versions(request)): + # Create instances and setup the k8s snap in each. + instance = h.new_instance(network_type=network_type) + instances.append(instance) + if not no_setup: + util.setup_k8s_snap(instance, tmp_path, snap) + + if not disable_k8s_bootstrapping and not no_setup: + first_node, *_ = instances + + if bootstrap_config is not None: + first_node.exec( + ["k8s", "bootstrap", "--file", "-"], + input=str.encode(bootstrap_config), + ) + else: + first_node.exec(["k8s", "bootstrap"]) + + yield instances + + if config.SKIP_CLEANUP: + LOG.warning("Skipping clean-up of instances, delete them on your own") + return + + # Cleanup after each test. + # We cannot execute _harness_clean() here as this would also + # remove the session_instance. The harness ensures that everything is cleaned up + # at the end of the test session. + for instance in instances: + if config.INSPECTION_REPORTS_DIR is not None: + LOG.debug("Generating inspection reports for test instances") + _generate_inspection_report(h, instance.id) + + h.delete_instance(instance.id) diff --git a/test/performance/tests/test_performance.py b/test/performance/tests/test_performance.py new file mode 100644 index 00000000..2d05e1e0 --- /dev/null +++ b/test/performance/tests/test_performance.py @@ -0,0 +1,32 @@ +# +# Copyright 2024 Canonical, Ltd.# +import logging +from typing import List + +import pytest +from test_util import harness, util + +LOG = logging.getLogger(__name__) + + +@pytest.mark.node_count(3) +def test_load_test(instances: List[harness.Instance]): + cluster_node = instances[0] + joining_node = instances[1] + joining_node_2 = instances[2] + + join_token = util.get_join_token(cluster_node, joining_node) + join_token_2 = util.get_join_token(cluster_node, joining_node_2) + + assert join_token != join_token_2 + + util.join_cluster(joining_node, join_token) + util.join_cluster(joining_node_2, join_token_2) + + util.wait_until_k8s_ready(cluster_node, instances) + nodes = util.ready_nodes(cluster_node) + assert len(nodes) == 3, "nodes should have joined cluster" + + assert "control-plane" in util.get_local_node_status(cluster_node) + assert "control-plane" in util.get_local_node_status(joining_node) + assert "control-plane" in util.get_local_node_status(joining_node_2) diff --git a/test/performance/tests/test_util/config.py b/test/performance/tests/test_util/config.py new file mode 100644 index 00000000..6272a019 --- /dev/null +++ b/test/performance/tests/test_util/config.py @@ -0,0 +1,45 @@ +# +# Copyright 2024 Canonical, Ltd.# +import os +from pathlib import Path + +DIR = Path(__file__).absolute().parent + +# The following defaults are used to define how long to wait for a condition to be met. +DEFAULT_WAIT_RETRIES = int(os.getenv("TEST_DEFAULT_WAIT_RETRIES") or 50) +DEFAULT_WAIT_DELAY_S = int(os.getenv("TEST_DEFAULT_WAIT_DELAY_S") or 10) + +MANIFESTS_DIR = DIR / ".." / ".." / "templates" + +# INSPECTION_REPORTS_DIR is the directory where inspection reports are stored. +# If empty, no reports are generated. +INSPECTION_REPORTS_DIR = os.getenv("TEST_INSPECTION_REPORTS_DIR") + +# SKIP_CLEANUP can be used to prevent machines to be automatically destroyed +# after the tests complete. +SKIP_CLEANUP = (os.getenv("TEST_SKIP_CLEANUP") or "") == "1" + +# SNAP is the path to the snap under test. +SNAP = os.getenv("TEST_SNAP") or "" + +# SNAP_NAME is the name of the snap under test. +SNAP_NAME = os.getenv("TEST_SNAP_NAME") or "k8s" + +# FLAVOR is the flavour to use for running the performance tests. +FLAVOR = os.getenv("TEST_FLAVOR") or "" + +# SUBSTRATE is the substrate to use for running the performance tests. +# Default 'lxd'. +SUBSTRATE = os.getenv("TEST_SUBSTRATE") or "lxd" + +# LXD_IMAGE is the image to use for LXD containers. +LXD_IMAGE = os.getenv("TEST_LXD_IMAGE") or "ubuntu:22.04" + +# LXD_PROFILE is the profile to use for LXD containers. +LXD_PROFILE = ( + os.getenv("TEST_LXD_PROFILE") + or (DIR / ".." / ".." / "lxd-profile.yaml").read_text() +) + +# LXD_PROFILE_NAME is the profile name to use for LXD containers. +LXD_PROFILE_NAME = os.getenv("TEST_LXD_PROFILE_NAME") or "k8s-performance" diff --git a/test/performance/tests/test_util/harness/__init__.py b/test/performance/tests/test_util/harness/__init__.py new file mode 100644 index 00000000..d1016962 --- /dev/null +++ b/test/performance/tests/test_util/harness/__init__.py @@ -0,0 +1,11 @@ +# +# Copyright 2024 Canonical, Ltd.# +from test_util.harness.base import Harness, HarnessError, Instance +from test_util.harness.lxd import LXDHarness + +__all__ = [ + HarnessError, + Harness, + Instance, + LXDHarness, +] diff --git a/test/performance/tests/test_util/harness/base.py b/test/performance/tests/test_util/harness/base.py new file mode 100644 index 00000000..381f1304 --- /dev/null +++ b/test/performance/tests/test_util/harness/base.py @@ -0,0 +1,114 @@ +# +# Copyright 2024 Canonical, Ltd.# +import subprocess +from functools import cached_property, partial + + +class HarnessError(Exception): + """Base error for all our harness failures""" + + pass + + +class Instance: + """Reference to a harness and a given instance id. + + Provides convenience methods for an instance to call its harness' methods + """ + + def __init__(self, h: "Harness", id: str) -> None: + self._h = h + self._id = id + + self.send_file = partial(h.send_file, id) + self.pull_file = partial(h.pull_file, id) + self.exec = partial(h.exec, id) + self.delete_instance = partial(h.delete_instance, id) + + @property + def id(self) -> str: + return self._id + + @cached_property + def arch(self) -> str: + """Return the architecture of the instance""" + return self.exec( + ["dpkg", "--print-architecture"], text=True, capture_output=True + ).stdout.strip() + + def __str__(self) -> str: + return f"{self._h.name}:{self.id}" + + +class Harness: + """Abstract how performance tests can start and manage multiple machines. This allows + writing performance tests that can run on the local machine, LXD, or Multipass with minimum + effort. + """ + + name: str + + def new_instance(self, network_type: str = "IPv4") -> Instance: + """Creates a new instance on the infrastructure and returns an object + which can be used to interact with it. + + dualstack: If True, the instance will be created with dualstack support. + + If the operation fails, a HarnessError is raised. + """ + raise NotImplementedError + + def send_file(self, instance_id: str, source: str, destination: str): + """Send a local file to the instance. + + :param instance_id: The instance_id, as returned by new_instance() + :param source: Path to the file that will be copied to the instance + :param destination: Path in the instance where the file will be copied. + This must always be an absolute path. + + + If the operation fails, a HarnessError is raised. + """ + raise NotImplementedError + + def pull_file(self, instance_id: str, source: str, destination: str): + """Pull a file from the instance and save it on the local machine + + :param instance_id: The instance_id, as returned by new_instance() + :param source: Path to the file that will be copied from the instance. + This must always be an absolute path. + :param destination: Path on the local machine the file will be saved. + + If the operation fails, a HarnessError is raised. + """ + raise NotImplementedError + + def exec( + self, instance_id: str, command: list, **kwargs + ) -> subprocess.CompletedProcess: + """Run a command as root on the instance. + + :param instance_id: The instance_id, as returned by new_instance() + :param command: Command for subprocess.run() + :param kwargs: Keyword args compatible with subprocess.run() + + If the operation fails, a subprocesss.CalledProcessError is raised. + """ + raise NotImplementedError + + def delete_instance(self, instance_id: str): + """Delete a previously created instance. + + :param instance_id: The instance_id, as returned by new_instance() + + If the operation fails, a HarnessError is raised. + """ + raise NotImplementedError + + def cleanup(self): + """Delete any leftover resources after the tests are done, e.g. delete any + instances that might still be running. + + If the operation fails, a HarnessError is raised. + """ + raise NotImplementedError diff --git a/test/performance/tests/test_util/harness/lxd.py b/test/performance/tests/test_util/harness/lxd.py new file mode 100644 index 00000000..ce6041aa --- /dev/null +++ b/test/performance/tests/test_util/harness/lxd.py @@ -0,0 +1,181 @@ +# +# Copyright 2024 Canonical, Ltd.# +import logging +import os +import shlex +import subprocess +from pathlib import Path +from typing import List + +from test_util import config +from test_util.harness import Harness, HarnessError, Instance +from test_util.util import run, stubbornly + +LOG = logging.getLogger(__name__) + + +class LXDHarness(Harness): + """A Harness that creates an LXD container for each instance.""" + + name = "lxd" + + def next_id(self) -> int: + self._next_id += 1 + return self._next_id + + def __init__(self): + super(LXDHarness, self).__init__() + + self._next_id = 0 + self.profile = config.LXD_PROFILE_NAME + self.image = config.LXD_IMAGE + self.instances = set() + + self._configure_profile(self.profile, config.LXD_PROFILE) + + self._configure_network( + "lxdbr0", + "ipv4.address=auto", + "ipv4.nat=true", + ) + + LOG.debug( + "Configured LXD substrate (profile %s, image %s)", self.profile, self.image + ) + + def new_instance(self, network_type: str = "IPv4") -> Instance: + instance_id = f"k8s-performance-{os.urandom(3).hex()}-{self.next_id()}" + + LOG.debug("Creating instance %s with image %s", instance_id, self.image) + launch_lxd_command = [ + "lxc", + "launch", + self.image, + instance_id, + "-p", + "default", + "-p", + self.profile, + ] + + if network_type.lower() != "ipv4": + raise HarnessError( + f"unknown network type {network_type}, need to be one of 'IPv4'" + ) + + try: + stubbornly(retries=3, delay_s=1).exec(launch_lxd_command) + self.instances.add(instance_id) + + except subprocess.CalledProcessError as e: + raise HarnessError(f"Failed to create LXD container {instance_id}") from e + + self.exec(instance_id, ["snap", "wait", "system", "seed.loaded"]) + return Instance(self, instance_id) + + def _configure_profile(self, profile_name: str, profile_config: str): + LOG.debug("Checking for LXD profile %s", profile_name) + try: + run(["lxc", "profile", "show", profile_name]) + except subprocess.CalledProcessError: + try: + LOG.debug("Creating LXD profile %s", profile_name) + run(["lxc", "profile", "create", profile_name]) + + except subprocess.CalledProcessError as e: + raise HarnessError( + f"Failed to create LXD profile {profile_name}" + ) from e + + try: + LOG.debug("Configuring LXD profile %s", profile_name) + run( + ["lxc", "profile", "edit", profile_name], + input=profile_config.encode(), + ) + except subprocess.CalledProcessError as e: + raise HarnessError(f"Failed to configure LXD profile {profile_name}") from e + + def _configure_network(self, network_name: str, *network_args: List[str]): + LOG.debug("Checking for LXD network %s", network_name) + try: + run(["lxc", "network", "show", network_name]) + except subprocess.CalledProcessError: + try: + LOG.debug("Creating LXD network %s", network_name) + run(["lxc", "network", "create", network_name, *network_args]) + + except subprocess.CalledProcessError as e: + raise HarnessError( + f"Failed to create LXD network {network_name}" + ) from e + + def send_file(self, instance_id: str, source: str, destination: str): + if instance_id not in self.instances: + raise HarnessError(f"unknown instance {instance_id}") + + if not Path(destination).is_absolute(): + raise HarnessError(f"path {destination} must be absolute") + + LOG.debug( + "Copying file %s to instance %s at %s", source, instance_id, destination + ) + try: + self.exec( + instance_id, + ["mkdir", "-m=0777", "-p", Path(destination).parent.as_posix()], + capture_output=True, + ) + run( + ["lxc", "file", "push", source, f"{instance_id}{destination}"], + capture_output=True, + ) + except subprocess.CalledProcessError as e: + LOG.error("command {e.cmd} failed") + LOG.error(f" {e.returncode=}") + LOG.error(f" {e.stdout.decode()=}") + LOG.error(f" {e.stderr.decode()=}") + raise HarnessError("failed to push file") from e + + def pull_file(self, instance_id: str, source: str, destination: str): + if instance_id not in self.instances: + raise HarnessError(f"unknown instance {instance_id}") + + if not Path(source).is_absolute(): + raise HarnessError(f"path {source} must be absolute") + + LOG.debug( + "Copying file %s from instance %s to %s", source, instance_id, destination + ) + try: + run( + ["lxc", "file", "pull", f"{instance_id}{source}", destination], + stdout=subprocess.DEVNULL, + ) + except subprocess.CalledProcessError as e: + raise HarnessError("lxc file push command failed") from e + + def exec(self, instance_id: str, command: list, **kwargs): + if instance_id not in self.instances: + raise HarnessError(f"unknown instance {instance_id}") + + LOG.debug("Execute command %s in instance %s", command, instance_id) + return run( + ["lxc", "shell", instance_id, "--", "bash", "-c", shlex.join(command)], + **kwargs, + ) + + def delete_instance(self, instance_id: str): + if instance_id not in self.instances: + raise HarnessError(f"unknown instance {instance_id}") + + try: + run(["lxc", "rm", instance_id, "--force"]) + except subprocess.CalledProcessError as e: + raise HarnessError(f"failed to delete instance {instance_id}") from e + + self.instances.discard(instance_id) + + def cleanup(self): + for instance_id in self.instances.copy(): + self.delete_instance(instance_id) diff --git a/test/performance/tests/test_util/util.py b/test/performance/tests/test_util/util.py new file mode 100644 index 00000000..0a0d45f7 --- /dev/null +++ b/test/performance/tests/test_util/util.py @@ -0,0 +1,324 @@ +# +# Copyright 2024 Canonical, Ltd.# +import json +import logging +import re +import shlex +import subprocess +import urllib.request +from functools import partial +from pathlib import Path +from typing import Any, Callable, List, Mapping, Optional, Union + +import pytest +from tenacity import ( + RetryCallState, + retry, + retry_if_exception_type, + stop_after_attempt, + stop_never, + wait_fixed, +) +from test_util import config, harness + +LOG = logging.getLogger(__name__) +RISKS = ["stable", "candidate", "beta", "edge"] +TRACK_RE = re.compile(r"^(\d+)\.(\d+)(\S*)$") + + +def run(command: list, **kwargs) -> subprocess.CompletedProcess: + """Log and run command.""" + kwargs.setdefault("check", True) + + LOG.debug("Execute command %s (kwargs=%s)", shlex.join(command), kwargs) + return subprocess.run(command, **kwargs) + + +def stubbornly( + retries: Optional[int] = None, + delay_s: Optional[Union[float, int]] = None, + exceptions: Optional[tuple] = None, + **retry_kds, +): + """ + Retry a command for a while, using tenacity + + By default, retry immediately and forever until no exceptions occur. + + Some commands need to execute until they pass some condition + > stubbornly(*retry_args).until(*some_condition).exec(*some_command) + + Some commands need to execute until they complete + > stubbornly(*retry_args).exec(*some_command) + + : param retries int: convenience param to use stop=retry.stop_after_attempt() + : param delay_s float|int: convenience param to use wait=retry.wait_fixed(delay_s) + : param exceptions Tuple[Exception]: convenience param to use retry=retry.retry_if_exception_type(exceptions) + : param retry_kds Mapping: direct interface to all tenacity arguments for retrying + """ + + def _before_sleep(retry_state: RetryCallState): + attempt = retry_state.attempt_number + tries = f"/{retries}" if retries is not None else "" + LOG.info( + f"Attempt {attempt}{tries} failed. Error: {retry_state.outcome.exception()}" + ) + LOG.info(f"Retrying in {delay_s} seconds...") + + _waits = wait_fixed(delay_s) if delay_s is not None else wait_fixed(0) + _stops = stop_after_attempt(retries) if retries is not None else stop_never + _exceptions = exceptions or (Exception,) # default to retry on all exceptions + + _retry_args = dict( + wait=_waits, + stop=_stops, + retry=retry_if_exception_type(_exceptions), + before_sleep=_before_sleep, + ) + # Permit any tenacity retry overrides from these ^defaults + _retry_args.update(retry_kds) + + class Retriable: + def __init__(self) -> None: + self._condition = None + self._run = partial(run, capture_output=True) + + @retry(**_retry_args) + def exec( + self, + command_args: List[str], + **command_kwds, + ): + """ + Execute a command against a harness or locally with subprocess to be retried. + + :param List[str] command_args: The command to be executed, as a str or list of str + :param Mapping[str,str] command_kwds: Additional keyword arguments to be passed to exec + """ + + try: + resp = self._run(command_args, **command_kwds) + except subprocess.CalledProcessError as e: + LOG.warning(f" rc={e.returncode}") + LOG.warning(f" stdout={e.stdout.decode()}") + LOG.warning(f" stderr={e.stderr.decode()}") + raise + if self._condition: + assert self._condition(resp), "Failed to meet condition" + return resp + + def on(self, instance: harness.Instance) -> "Retriable": + """ + Target the command at some instance. + + :param instance Instance: Instance on a test harness. + """ + self._run = partial(instance.exec, capture_output=True) + return self + + def until( + self, condition: Callable[[subprocess.CompletedProcess], bool] = None + ) -> "Retriable": + """ + Test the output of the executed command against an expected response + + :param Callable condition: a callable which returns a truth about the command output + """ + self._condition = condition + return self + + return Retriable() + + +def _as_int(value: Optional[str]) -> Optional[int]: + """Convert a string to an integer.""" + try: + return int(value) + except (TypeError, ValueError): + return None + + +def setup_k8s_snap( + instance: harness.Instance, + tmp_path: Path, + snap: Optional[str] = None, + connect_interfaces=True, +): + """Installs and sets up the snap on the given instance and connects the interfaces. + + Args: + instance: instance on which to install the snap + tmp_path: path to store the snap on the instance + snap: choice of track, channel, revision, or file path + a snap track to install + a snap channel to install + a snap revision to install + a path to the snap to install + """ + cmd = ["snap", "install", "--classic"] + which_snap = snap or config.SNAP + + if not which_snap: + pytest.fail("Set TEST_SNAP to the channel, revision, or path to the snap") + + if isinstance(which_snap, str) and which_snap.startswith("/"): + LOG.info("Install k8s snap by path") + snap_path = (tmp_path / "k8s.snap").as_posix() + instance.send_file(which_snap, snap_path) + cmd += ["--dangerous", snap_path] + elif snap_revision := _as_int(which_snap): + LOG.info("Install k8s snap by revision") + cmd += [config.SNAP_NAME, "--revision", snap_revision] + elif "/" in which_snap or which_snap in RISKS: + LOG.info("Install k8s snap by specific channel: %s", which_snap) + cmd += [config.SNAP_NAME, "--channel", which_snap] + elif channel := tracks_least_risk(which_snap, instance.arch): + LOG.info("Install k8s snap by least risky channel: %s", channel) + cmd += [config.SNAP_NAME, "--channel", channel] + + instance.exec(cmd) + if connect_interfaces: + LOG.info("Ensure k8s interfaces and network requirements") + instance.exec(["/snap/k8s/current/k8s/hack/init.sh"], stdout=subprocess.DEVNULL) + + +def wait_until_k8s_ready( + control_node: harness.Instance, + instances: List[harness.Instance], + retries: int = config.DEFAULT_WAIT_RETRIES, + delay_s: int = config.DEFAULT_WAIT_DELAY_S, + node_names: Mapping[str, str] = {}, +): + """ + Validates that the K8s node is in Ready state. + + By default, the hostname of the instances is used as the node name. + If the instance name is different from the hostname, the instance name should be passed to the + node_names dictionary, e.g. {"instance_id": "node_name"}. + """ + for instance in instances: + if instance.id in node_names: + node_name = node_names[instance.id] + else: + node_name = hostname(instance) + + result = ( + stubbornly(retries=retries, delay_s=delay_s) + .on(control_node) + .until(lambda p: " Ready" in p.stdout.decode()) + .exec(["k8s", "kubectl", "get", "node", node_name, "--no-headers"]) + ) + LOG.info("Kubelet registered successfully!") + LOG.info("%s", result.stdout.decode()) + + +def wait_for_dns(instance: harness.Instance): + LOG.info("Waiting for DNS to be ready") + instance.exec(["k8s", "x-wait-for", "dns"]) + + +def wait_for_network(instance: harness.Instance): + LOG.info("Waiting for network to be ready") + instance.exec(["k8s", "x-wait-for", "network"]) + + +def hostname(instance: harness.Instance) -> str: + """Return the hostname for a given instance.""" + resp = instance.exec(["hostname"], capture_output=True) + return resp.stdout.decode().strip() + + +def get_local_node_status(instance: harness.Instance) -> str: + resp = instance.exec(["k8s", "local-node-status"], capture_output=True) + return resp.stdout.decode().strip() + + +def get_nodes(control_node: harness.Instance) -> List[Any]: + """Get a list of existing nodes. + + Args: + control_node: instance on which to execute check + + Returns: + list of nodes + """ + result = control_node.exec( + ["k8s", "kubectl", "get", "nodes", "-o", "json"], capture_output=True + ) + assert result.returncode == 0, "Failed to get nodes with kubectl" + node_list = json.loads(result.stdout.decode()) + assert node_list["kind"] == "List", "Should have found a list of nodes" + return [node for node in node_list["items"]] + + +def ready_nodes(control_node: harness.Instance) -> List[Any]: + """Get a list of the ready nodes. + + Args: + control_node: instance on which to execute check + + Returns: + list of nodes + """ + return [ + node + for node in get_nodes(control_node) + if all( + condition["status"] == "False" + for condition in node["status"]["conditions"] + if condition["type"] != "Ready" + ) + ] + + +# Create a token to join a node to an existing cluster +def get_join_token( + initial_node: harness.Instance, joining_cplane_node: harness.Instance, *args: str +) -> str: + out = initial_node.exec( + ["k8s", "get-join-token", joining_cplane_node.id, *args], + capture_output=True, + ) + return out.stdout.decode().strip() + + +# Join an existing cluster. +def join_cluster(instance: harness.Instance, join_token: str): + instance.exec(["k8s", "join-cluster", join_token]) + +def tracks_least_risk(track: str, arch: str) -> str: + """Determine the snap channel with the least risk in the provided track. + + Args: + track: the track to determine the least risk channel for + arch: the architecture to narrow the revision + + Returns: + the channel associated with the least risk + """ + LOG.debug("Determining least risk channel for track: %s on %s", track, arch) + if track == "latest": + return f"latest/edge/{config.FLAVOR or 'classic'}" + + INFO_URL = f"https://api.snapcraft.io/v2/snaps/info/{config.SNAP_NAME}" + HEADERS = { + "Snap-Device-Series": "16", + "User-Agent": "Mozilla/5.0", + } + + req = urllib.request.Request(INFO_URL, headers=HEADERS) + with urllib.request.urlopen(req) as response: + snap_info = json.loads(response.read().decode()) + + risks = [ + channel["channel"]["risk"] + for channel in snap_info["channel-map"] + if channel["channel"]["track"] == track + and channel["channel"]["architecture"] == arch + ] + if not risks: + raise ValueError(f"No risks found for track: {track}") + risk_level = {"stable": 0, "candidate": 1, "beta": 2, "edge": 3} + channel = f"{track}/{min(risks, key=lambda r: risk_level[r])}" + LOG.info("Least risk channel from track %s is %s", track, channel) + return channel diff --git a/test/performance/tox.ini b/test/performance/tox.ini new file mode 100644 index 00000000..e3d8431c --- /dev/null +++ b/test/performance/tox.ini @@ -0,0 +1,51 @@ +[tox] +skipsdist = True +skip_missing_interpreters = True +env_list = format, lint, performance + +[testenv] +set_env = + PYTHONBREAKPOINT=pdb.set_trace + PY_COLORS=1 +passenv = + PYTHONPATH + +[testenv:format] +description = Apply coding style standards to code +deps = -r {toxinidir}/requirements-dev.txt +commands = + licenseheaders -t {toxinidir}/.copyright.tmpl -cy -o 'Canonical, Ltd' -d {toxinidir}/tests + isort {toxinidir}/tests --profile=black + black {toxinidir}/tests + +[testenv:lint] +description = Check code against coding style standards +deps = -r {toxinidir}/requirements-dev.txt +commands = + codespell {toxinidir}/tests + flake8 {toxinidir}/tests + licenseheaders -t {toxinidir}/.copyright.tmpl -cy -o 'Canonical, Ltd' -d {toxinidir}/tests --dry + isort {toxinidir}/tests --profile=black --check + black {toxinidir}/tests --check --diff + +[testenv:performance] +description = Run performance tests +deps = + -r {toxinidir}/requirements-test.txt +commands = + pytest -vv \ + --maxfail 1 \ + --tb native \ + --log-cli-level DEBUG \ + --disable-warnings \ + {posargs} \ + {toxinidir}/tests +passenv = + TEST_* + +[flake8] +max-line-length = 120 +select = E,W,F,C,N +ignore = W503 +exclude = venv,.git,.tox,.tox_env,.venv,build,dist,*.egg_info +show-source = true