Skip to content

Commit

Permalink
Merge 0e3adbf into 7f693e2
Browse files Browse the repository at this point in the history
  • Loading branch information
javierdelapuente authored Jan 30, 2025
2 parents 7f693e2 + 0e3adbf commit 4565455
Show file tree
Hide file tree
Showing 12 changed files with 149 additions and 25 deletions.
7 changes: 4 additions & 3 deletions .github/workflows/integration_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ jobs:
# INTEGRATION_TEST_ARGS to operator-workflows automatically.
integration-tests:
name: Integration test with juju 3.1
uses: canonical/operator-workflows/.github/workflows/integration_test.yaml@main
uses: canonical/operator-workflows/.github/workflows/integration_test.yaml@javi-testing
secrets: inherit
with:
juju-channel: 3.1/stable
Expand All @@ -30,19 +30,20 @@ jobs:
self-hosted-runner-label: stg-private-endpoint
openstack-interface-tests-private-endpoint:
name: openstack interface test using private-endpoint
uses: canonical/operator-workflows/.github/workflows/integration_test.yaml@main
uses: canonical/operator-workflows/.github/workflows/integration_test.yaml@javi-testing
secrets: inherit
with:
juju-channel: 3.6/stable
pre-run-script: scripts/setup-lxd.sh
provider: lxd
test-tox-env: integration-juju3.6
modules: '["test_runner_manager_openstack"]'
extra-arguments: '--log-format="%(asctime)s %(levelname)s %(message)s"'
self-hosted-runner: true
self-hosted-runner-label: stg-private-endpoint
openstack-integration-tests-private-endpoint:
name: Integration test using private-endpoint
uses: canonical/operator-workflows/.github/workflows/integration_test.yaml@main
uses: canonical/operator-workflows/.github/workflows/integration_test.yaml@javi-testing
secrets: inherit
with:
juju-channel: 3.6/stable
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,11 @@ def _run_health_check_cloud_init(
"""
result: invoke.runners.Result = _execute_ssh_command(ssh_conn, "cloud-init status")
if not result.ok:
logger.warning("cloud-init status command failed on %s: %s.", server_name, result.stderr)
logger.error("cloud-init status command failed on %s: %s.", server_name, result.stderr)
cloud_init_log_output_result = _execute_ssh_command(ssh_conn, "cat /var/log/cloud-init-output.log")
logger.error("/var/log/cloud-init-output.log stdout: %s", cloud_init_log_output_result.stdout)
cloud_init_log_result = _execute_ssh_command(ssh_conn, "cat /var/log/cloud-init.log")
logger.error("/var/log/cloud-init.log stdout: %s", cloud_init_log_result.stdout)
return False

if CloudInitStatus.DONE in result.stdout:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -653,6 +653,10 @@ def _wait_runner_startup(self, instance: OpenstackInstance) -> None:
logger.warning(
"cloud-init status command failed on %s: %s.", instance.server_name, result.stderr
)
cloud_init_log_output_result = ssh_conn.run("cat /var/log/cloud-init-output.log", warn=True, timeout=60)
logger.error("/var/log/cloud-init-output.log stdout: %s", cloud_init_log_output_result.stdout)
cloud_init_log_result = ssh_conn.run("cat /var/log/cloud-init.log")
logger.error("/var/log/cloud-init.log stdout: %s", cloud_init_log_result.stdout, warn=True, timeout=60)
raise RunnerStartError(f"Runner startup process not found on {instance.server_name}")
# A short running job may have already completed and exited the runner, hence check the
# condition via cloud-init status check.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ su - ubuntu -c 'cd ~/actions-runner && echo "{{ env_contents }}" > .env'

{% if aproxy_address %}
snap install aproxy --edge
snap watch --last=auto-refresh?
snap set aproxy proxy={{ aproxy_address }} listen=:54969
cat << EOF > /etc/nftables.conf
define default-ip = $(ip route get $(ip route show 0.0.0.0/0 | grep -oP 'via \K\S+') | grep -oP 'src \K\S+')
Expand Down
10 changes: 4 additions & 6 deletions src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ class ReconcileRunnersEvent(EventBase):


def catch_charm_errors(
func: Callable[["GithubRunnerCharm", EventT], None]
func: Callable[["GithubRunnerCharm", EventT], None],
) -> Callable[["GithubRunnerCharm", EventT], None]:
"""Catch common errors in charm.
Expand Down Expand Up @@ -145,7 +145,7 @@ def func_with_catch_errors(self: "GithubRunnerCharm", event: EventT) -> None:


def catch_action_errors(
func: Callable[["GithubRunnerCharm", ActionEvent], None]
func: Callable[["GithubRunnerCharm", ActionEvent], None],
) -> Callable[["GithubRunnerCharm", ActionEvent], None]:
"""Catch common errors in actions.
Expand Down Expand Up @@ -519,15 +519,13 @@ def _apt_install(self, packages: Sequence[str]) -> None:
def _on_debug_ssh_relation_changed(self, _: ops.RelationChangedEvent) -> None:
"""Handle debug ssh relation changed event."""
state = self._setup_state()
self.unit.status = MaintenanceStatus("Reconciling runners - added debug-ssh")

if not self._get_set_image_ready_status():
return
runner_scaler = self._get_runner_scaler(state)
runner_scaler.flush()
try:
runner_scaler.reconcile(state.runner_config.virtual_machines)
except ReconcileError:
logger.exception(FAILED_TO_RECONCILE_RUNNERS_MSG)
self._reconcile_openstack_runners(runner_scaler, state.runner_config.virtual_machines)

@catch_charm_errors
def _on_image_relation_joined(self, _: ops.RelationJoinedEvent) -> None:
Expand Down
37 changes: 31 additions & 6 deletions tests/integration/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
from tests.integration.helpers.openstack import OpenStackInstanceHelper, PrivateEndpointConfigs
from tests.status_name import ACTIVE

IMAGE_BUILDER_DEPLOY_TIMEOUT_IN_SECONDS = 30 * 60
IMAGE_BUILDER_DEPLOY_TIMEOUT_IN_SECONDS = 20 * 60

# The following line is required because we are using request.getfixturevalue in conjunction
# with pytest-asyncio. See https://github.com/pytest-dev/pytest-asyncio/issues/112
Expand Down Expand Up @@ -317,7 +317,7 @@ async def app_no_runner(
) -> AsyncIterator[Application]:
"""Application with no runner."""
await basic_app.set_config({VIRTUAL_MACHINES_CONFIG_NAME: "0"})
await model.wait_for_idle(apps=[basic_app.name], status=ACTIVE, timeout=90 * 60)
await model.wait_for_idle(apps=[basic_app.name], status=ACTIVE, timeout=20 * 60)
yield basic_app


Expand All @@ -339,7 +339,8 @@ async def image_builder_fixture(
config={
"app-channel": "edge",
"build-interval": "12",
"revision-history-limit": "5",
# JAVI be careful, maybe all tests use the same names for the images
"revision-history-limit": "15",
"openstack-auth-url": private_endpoint_config["auth_url"],
# Bandit thinks this is a hardcoded password
"openstack-password": private_endpoint_config["password"], # nosec: B105
Expand Down Expand Up @@ -401,9 +402,33 @@ async def app_openstack_runner_fixture(
wait_idle=False,
)
await model.integrate(f"{image_builder.name}:image", f"{application.name}:image")
await model.wait_for_idle(apps=[application.name], status=ACTIVE, timeout=90 * 60)
await model.wait_for_idle(
apps=[application.name, image_builder.name], status=ACTIVE, timeout=20 * 60
)

return application
# better use test-mode charm config... but let's see
command = "find /var/lib/juju -type f -name 'constants.py' -exec sed -i 's/^CREATE_SERVER_TIMEOUT = .*/CREATE_SERVER_TIMEOUT = 900/gI' {} \\;"
run_actions = await application.run(command)
logging.info("JAVI run_actions %s", run_actions)
for action_result in run_actions.actions:
logging.info("JAVI action_result %s", action_result)
action = action_result.action
logging.info("JAVI action %s", action)
# no comment...
action_id = action.tag
if action_id.startswith("action-"):
# strip the action- part of "action-<num>" tag
action_id = action_id[7:]
action = await model._wait_for_new("action", action_id)
result = await action.wait()
logging.info("JAVI output of one unit of CREATE_SERVER_TIMEOUT %s", result.results)

yield application
try:
logging.info("JAVI after yield in app_openstack_runner_fixture")
# get_file_content(unit, filename)
except Exception:
logging.exception("JAVI something failed after yield")


@pytest_asyncio.fixture(scope="module", name="app_scheduled_events")
Expand All @@ -415,7 +440,7 @@ async def app_scheduled_events_fixture(
application = app_openstack_runner
await application.set_config({"reconcile-interval": "8"})
await application.set_config({VIRTUAL_MACHINES_CONFIG_NAME: "1"})
await model.wait_for_idle(apps=[application.name], status=ACTIVE, timeout=90 * 60)
await model.wait_for_idle(apps=[application.name], status=ACTIVE, timeout=20 * 60)
await reconcile(app=application, model=model)
return application

Expand Down
65 changes: 65 additions & 0 deletions tests/integration/helpers/openstack.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# See LICENSE file for licensing details.
import logging
import secrets
import threading
from asyncio import sleep
from typing import Optional, TypedDict

Expand All @@ -17,6 +18,50 @@
logger = logging.getLogger(__name__)


async def javi_wait_for_idle(openstack_connection, model, *args, **kwargs) -> None:
"""TODO.
Args:
openstack_connection: OpenStack connection object.
model: model
args: args
kwargs: kwargs
"""
logger.info("javi_wait_for_idle")
e = threading.Event()

def _log_openstack():
"""TODO."""
end_loop = False
while True:
end_loop = e.wait(20)
# probably not thread safe, but...
try:
servers = openstack_connection.list_servers()
except Exception as ex:
logger.exception("JAVI in log openstack thread")
raise ex
logger.info(" [ runner list ]")
for runner in servers:
logger.info(
" [ runner %s ] status %s created %s updated %s",
runner.name,
runner.status,
runner.created_at,
runner.updated_at,
)
if end_loop:
break

try:
t = threading.Thread(target=_log_openstack)
t.start()
await model.wait_for_idle(*args, **kwargs)
finally:
e.set()
t.join()


class OpenStackInstanceHelper:
"""Helper class to interact with OpenStack instances."""

Expand Down Expand Up @@ -179,6 +224,26 @@ async def get_runner_name(self, unit: Unit) -> str:
assert len(runners) == 1
return runners[0].name

def log_runners(self, unit: Unit) -> None:
"""TODO LOG RUNNERS.
Expects only one runner to be present.
Args:
unit: The GitHub Runner Charm unit to get the runner name for.
"""
runners = self._get_runners(unit)
logger.info("[ list of runners for unit %s]", unit)
for runner in runners:
logger.info(
" [ runner %s ] status %s created %s updated %s",
runner.name,
runner.status,
runner.created_at,
runner.updated_at,
)
logger.info("[ end list of runners for unit %s]")

async def delete_single_runner(self, unit: Unit) -> None:
"""Delete the only runner.
Expand Down
18 changes: 17 additions & 1 deletion tests/integration/test_charm_fork_path_change.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,15 +44,31 @@ async def test_path_config_change(

logger.info("Ensure there is a runner (this calls reconcile)")
await instance_helper.ensure_charm_has_runner(app_with_forked_repo)
logger.info("after ensure_charm_has_runner")
instance_helper.log_runners(unit)

logger.info("Change Path config option")
await app_with_forked_repo.set_config({PATH_CONFIG_NAME: path})
instance_helper.log_runners(unit)

status = await model.get_status()
logger.info(" status : %s", status)

logger.info("Reconciling (again)")
await reconcile(app=app_with_forked_repo, model=model)

logger.info("after Reconciling (again)")
instance_helper.log_runners(unit)

status = await model.get_status()
logger.info("JAVI status 2: %s", status)

runner_names = await instance_helper.get_runner_names(unit)
logger.info("runners: %s", runner_names)
assert len(runner_names) == 1
#this will crash if there is not exactly one
logger.info("runner info: %s", instance_helper._get_single_runner(unit))

runner_name = runner_names[0]

runners_in_repo = github_repository.get_self_hosted_runners()
Expand All @@ -62,4 +78,4 @@ async def test_path_config_change(
filter(lambda runner: runner.name == runner_name, runners_in_repo)
)

assert len(runner_in_repo_with_same_name) == 1
assert len(runner_in_repo_with_same_name) == 1, "there has to be 1 runner in the repo"
5 changes: 3 additions & 2 deletions tests/integration/test_charm_scheduled_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from juju.model import Model

from tests.integration.helpers.common import wait_for
from tests.integration.helpers.openstack import OpenStackInstanceHelper
from tests.integration.helpers.openstack import OpenStackInstanceHelper, javi_wait_for_idle
from tests.status_name import ACTIVE

logger = logging.getLogger(__name__)
Expand All @@ -28,6 +28,7 @@ async def test_update_interval(
model: Model,
app_scheduled_events: Application,
instance_helper: OpenStackInstanceHelper,
openstack_connection,
) -> None:
"""
arrange: A working application with one runner.
Expand Down Expand Up @@ -57,7 +58,7 @@ async def _no_runners_available() -> bool:

logger.info("Wait for 10 minutes")
await sleep(10 * 60)
await model.wait_for_idle(status=ACTIVE, timeout=20 * 60)
await javi_wait_for_idle(openstack_connection, model, status=ACTIVE, timeout=20 * 60)

newnames = await instance_helper.get_runner_names(unit)
assert len(newnames) == 1, "There should be one runner after reconciliation"
Expand Down
6 changes: 3 additions & 3 deletions tests/integration/test_charm_upgrade.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,10 +91,10 @@ async def test_charm_upgrade(
)
await model.integrate(f"{image_builder.name}:image", f"{application.name}:image")
await model.wait_for_idle(
apps=[application.name],
apps=[application.name, image_builder.name],
raise_on_error=False,
wait_for_active=True,
timeout=180 * 60,
timeout=20 * 60,
check_freq=30,
)
origin = client.CharmOrigin(
Expand Down Expand Up @@ -125,6 +125,6 @@ async def test_charm_upgrade(
apps=[application.name],
raise_on_error=False,
wait_for_active=True,
timeout=180 * 60,
timeout=20 * 60,
check_freq=30,
)
12 changes: 10 additions & 2 deletions tests/integration/test_debug_ssh.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from juju.model import Model

from tests.integration.helpers.common import dispatch_workflow, get_job_logs
from tests.integration.helpers.openstack import OpenStackInstanceHelper
from tests.integration.helpers.openstack import OpenStackInstanceHelper, javi_wait_for_idle
from tests.status_name import ACTIVE

logger = logging.getLogger(__name__)
Expand All @@ -28,24 +28,30 @@ async def test_ssh_debug(
test_github_branch: Branch,
tmate_ssh_server_unit_ip: str,
instance_helper: OpenStackInstanceHelper,
openstack_connection,
):
"""
arrange: given an integrated GitHub-Runner charm and tmate-ssh-server charm.
act: when canonical/action-tmate is triggered.
assert: the ssh connection info from action-log and tmate-ssh-server matches.
"""
await model.wait_for_idle(status=ACTIVE, timeout=60 * 120)
await javi_wait_for_idle(openstack_connection, model, status=ACTIVE, timeout=60 * 120)

unit = app_no_wait_tmate.units[0]
# We need the runner to connect to the current machine, instead of the tmate_ssh_server unit,
# as the tmate_ssh_server is not routable.
logger.info("before iptables")
instance_helper.log_runners(unit)

dnat_comman_in_runner = f"sudo iptables -t nat -A OUTPUT -p tcp -d {tmate_ssh_server_unit_ip} --dport 10022 -j DNAT --to-destination 127.0.0.1:10022"
_, _, _ = await instance_helper.run_in_instance(
unit,
dnat_comman_in_runner,
assert_on_failure=True,
)
await instance_helper.expose_to_instance(unit=unit, port=10022, host=tmate_ssh_server_unit_ip)
logger.info("after exposing instance")
instance_helper.log_runners(unit)

# trigger tmate action
logger.info("Dispatching workflow_dispatch_ssh_debug.yaml workflow.")
Expand All @@ -59,6 +65,8 @@ async def test_ssh_debug(
workflow_id_or_name=SSH_DEBUG_WORKFLOW_FILE_NAME,
)

logger.info("after workflow run")
instance_helper.log_runners(unit)
logs = get_job_logs(workflow_run.jobs("latest")[0])

# ensure ssh connection info printed in logs.
Expand Down
Loading

0 comments on commit 4565455

Please sign in to comment.