Skip to content

Commit

Permalink
feat(lxd): store and check PID when launching instances (#464)
Browse files Browse the repository at this point in the history
Signed-off-by: Callahan Kovacs <[email protected]>
  • Loading branch information
mr-cal authored Nov 30, 2023
1 parent d9c4465 commit d5c194b
Show file tree
Hide file tree
Showing 8 changed files with 311 additions and 87 deletions.
121 changes: 90 additions & 31 deletions craft_providers/lxd/launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import threading
import time
from datetime import datetime, timedelta, timezone
from pathlib import Path
from typing import Optional

from craft_providers import Base, ProviderError, bases
Expand Down Expand Up @@ -264,35 +265,26 @@ def _formulate_base_instance_name(
return "-".join(["base-instance", compatibility_tag, image_remote, image_name])


def _is_valid(
*,
instance_name: str,
project: str,
remote: str,
lxc: LXC,
expiration: timedelta,
) -> bool:
def _is_valid(*, instance: LXDInstance, expiration: timedelta) -> bool:
"""Check if an instance is valid.
Instances are valid if they are not expired (too old). An instance's age is measured
by it's creation date. For example, if the expiration is 90 days, then the instance
will expire 91 days after it was created.
Instances are valid if they are ready and not expired (too old).
An instance is ready if it is set up and stopped.
An instance's age is measured by its creation date. For example, if the expiration
is 90 days, then the instance will expire 91 days after it was created.
If errors occur during the validity check, the instance is assumed to be invalid.
:param instance_name: Name of instance to check the validity of.
:param project: LXD project name to create.
:param remote: LXD remote to create project on.
:param lxc: LXC client.
:param instance: LXD instance to check the validity of.
:param expiration: How long an instance will be valid from its creation date.
:returns: True if the instance is valid. False otherwise.
"""
logger.debug("Checking validity of instance %r.", instance_name)
logger.debug("Checking validity of instance %r.", instance.instance_name)

# capture instance info
try:
info = lxc.info(instance_name=instance_name, project=project, remote=remote)
info = instance.info()
except LXDError as raised:
# if the base instance info can't be retrieved, consider it invalid
logger.debug("Could not get instance info with error: %s", raised)
Expand Down Expand Up @@ -322,6 +314,12 @@ def _is_valid(
)
return False

try:
_wait_for_instance_ready(instance)
except LXDError as err:
logger.debug("Instance is not valid: %s", err)
return False

logger.debug("Instance is valid.")
return True

Expand Down Expand Up @@ -560,6 +558,80 @@ def _set_timezone(instance: LXDInstance, project: str, remote: str, lxc: LXC) ->
)


def _wait_for_instance_ready(instance: LXDInstance) -> None:
"""Wait for an instance to be ready.
If another process created an instance and is still running, then wait for
the other process to finish setting up the instance.
:param instance: LXD instance to wait for.
:raises LXDError:
- If the instance is not ready and the process that created the
instance is no longer running.
- If the function times out while waiting for another process
to set up the instance.
- On any failure to get config data or info from the instance.
"""
instance_state = instance.info().get("Status")
instance_status = instance.config_get("user.craft_providers.status")

# check if the instance is ready
if (
instance_status == ProviderInstanceStatus.FINISHED.value
and instance_state == "STOPPED"
):
logger.debug("Instance %r is ready.", instance.instance_name)
return

logger.debug(
"Instance %r is not ready (State: %s, Status: %s)",
instance.instance_name,
instance_state,
instance_status,
)

# LXD is linux-only, but verify the platform anyways
if sys.platform == "linux":
# get the PID of the process that created the instance
pid = instance.config_get("user.craft_providers.pid")

# an empty string means there was no value set
if pid == "":
raise LXDError(
brief="Instance is not ready.",
details=(
f"Instance {instance.instance_name!r} is not ready and does not "
"have the pid of the process that created the instance."
),
)

# check if the PID is active
if Path(f"/proc/{pid}").exists():
logger.debug(
"Process that created the instance %r is still running (pid %s).",
instance.instance_name,
pid,
)
else:
raise LXDError(
brief="Instance is not ready.",
details=(
f"Instance {instance.instance_name!r} is not ready and the process "
f"(pid {pid}) that created the instance is inactive."
),
)
else:
logger.debug("Skipping PID check because system is not linux")

# if the PID is active, wait for the instance to be ready
instance.lxc.check_instance_status(
instance_name=instance.instance_name,
project=instance.project,
remote=instance.remote,
)


def launch(
name: str,
*,
Expand Down Expand Up @@ -728,13 +800,7 @@ def launch(

# the base instance exists but is not valid, so delete it then create a new
# instance and base instance
if not _is_valid(
instance_name=base_instance.instance_name,
project=project,
remote=remote,
lxc=lxc,
expiration=expiration,
):
if not _is_valid(instance=base_instance, expiration=expiration):
logger.debug(
"Base instance %r is not valid. Deleting base instance.",
base_instance.instance_name,
Expand All @@ -755,13 +821,6 @@ def launch(
)
return instance

# check if the base instance is still being created
base_instance.lxc.check_instance_status(
instance_name=base_instance.instance_name,
project=project,
remote=remote,
)

# at this point, there is a valid base instance to be copied to a new instance
logger.info("Creating instance from base instance")
logger.debug(
Expand Down
18 changes: 14 additions & 4 deletions craft_providers/lxd/lxc.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import contextlib
import enum
import logging
import os
import pathlib
import shlex
import subprocess
Expand Down Expand Up @@ -579,6 +580,7 @@ def launch( # noqa: PLR0912
_default_instance_metadata: Dict[str, str] = {
"user.craft_providers.status": ProviderInstanceStatus.STARTING.value,
"user.craft_providers.timer": datetime.now(timezone.utc).isoformat(),
"user.craft_providers.pid": str(os.getpid()),
}
retry_count: int = 0
if config_keys:
Expand Down Expand Up @@ -1098,7 +1100,12 @@ def check_instance_status(
project: str = "default",
remote: str = "local",
) -> None:
"""Check build status of instance.
"""Repeatedly check if an instance is ready until the function times out.
If another process is setting up an instance, the instance's timer will keep
incrementing. This function will wait until the other process finishes setting
up the instance or times out and stops incrementing the timer. In the latter
case, a timeout will occur after 60 seconds of the timer not being incremented.
The possible status are:
- None: Either the instance is downloading or old that this is not set.
Expand All @@ -1109,6 +1116,8 @@ def check_instance_status(
was interrupted or failed.
- FINISHED: Instance is ready, all configuration and installation is successful.
When it also STOPPED, then the instance is ready to be copied.
:raises LXDError: If the instance is not ready.
"""
instance_status: Optional[str] = None
instance_info: Dict[str, Any] = {"Status": ""}
Expand All @@ -1117,8 +1126,9 @@ def check_instance_status(
# 20 * 3 seconds = 1 minute no change in timer
timer_queue: deque = deque([-2, -1], maxlen=20)

# Retry unless the timer queue is all the same
# retry until the instance's timer hasn't changed for the last 20 iterations
while len(set(timer_queue)) > 1:
logger.debug("Checking if instance is ready.")
try:
# Get instance info
instance_info = self.info(
Expand Down Expand Up @@ -1159,7 +1169,7 @@ def check_instance_status(
logger.debug("Instance %s is ready.", instance_name)
return

logger.debug("Instance is not ready.")
time.sleep(3)

# No timer change for 1 minute and the instance is still not ready.
raise LXDError("Instance setup failed. Check LXD logs for more details.")
raise LXDError(brief="Timed out waiting for instance to be ready.")
8 changes: 8 additions & 0 deletions craft_providers/lxd/lxd_instance.py
Original file line number Diff line number Diff line change
Expand Up @@ -639,3 +639,11 @@ def config_set(self, key: str, value: str) -> None:
project=self.project,
remote=self.remote,
)

def info(self) -> Dict[str, Any]:
"""Get info for an instance."""
return self.lxc.info(
instance_name=self.instance_name,
project=self.project,
remote=self.remote,
)
30 changes: 29 additions & 1 deletion tests/integration/lxd/test_lxc.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,10 @@

import pathlib
import subprocess
from datetime import datetime

import pytest
from craft_providers.lxd import LXDError
from craft_providers.lxd import LXDError, lxd_instance_status

from . import conftest

Expand All @@ -32,6 +33,33 @@ def instance(instance_name, session_project):
yield tmp_instance


def test_launch_default_config(instance, lxc, session_project):
"""Verify default config values when launching."""
status = lxc.config_get(
instance_name=instance,
key="user.craft_providers.status",
project=session_project,
)
timer = lxc.config_get(
instance_name=instance,
key="user.craft_providers.timer",
project=session_project,
)
pid = lxc.config_get(
instance_name=instance,
key="user.craft_providers.pid",
project=session_project,
)

assert status in [
status.value for status in lxd_instance_status.ProviderInstanceStatus
]
# assert timer is a valid ISO datetime
datetime.fromisoformat(timer)
# assert PID is an integer
int(pid)


def test_exec(instance, lxc, session_project):
proc = lxc.exec(
instance_name=instance,
Expand Down
6 changes: 6 additions & 0 deletions tests/integration/lxd/test_lxd_instance.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,3 +264,9 @@ def test_start_stop(reusable_instance):
reusable_instance.start()

assert reusable_instance.is_running() is True


def test_info(reusable_instance):
info = reusable_instance.info()

assert info.get("Name") == reusable_instance.instance_name
Loading

0 comments on commit d5c194b

Please sign in to comment.