Skip to content

Commit

Permalink
Change juju controller strategy to k8s
Browse files Browse the repository at this point in the history
Currently the juju controller is bootstrapped on the local
machine as manual cloud. This will cause issues for maintaining
HA controller and during upgrades. So the strategy has been
changed to bootstrap lxd controller instead and finally migrating
them to a juju controller on k8s.

Deploying LXD controller should be performed manually by the user.
sunbeam bootstrap checks for LXD controller on localhost cloud
and perform the following steps:
* Add manual cloud and Empty credentials
* Add machine model
* Add the bootstrap machine to the model
* Add juju space and update the model with the space
* Deploy sunbeam-machine plan and microk8s/k8s plan
* Add k8s cloud on juju client
* Bootstrap juju controller on k8s with controller-service-type
  loadbalancer (The flag will create a lb ip for controller so that
  it will be reachable for non-bootstrap machines)
* Add manual cloud and empty credentials on juju controller
  on k8s
* Migrate machine model from lxd controller to k8s based
  juju controller
* Add k8s credential to juju controller on k8s

Some other internal changes:
* Add new config `juju_controller_migrated_to_k8s` in cluster database
  to identify whether juju controller migration to k8s is completed
* Add checks to verify if user is part of lxd group and controller
  exists on cloud `localhost`
  • Loading branch information
hemanthnakkina committed Dec 11, 2024
1 parent b07aaee commit 321a777
Show file tree
Hide file tree
Showing 17 changed files with 965 additions and 298 deletions.
9 changes: 9 additions & 0 deletions .github/workflows/build-snap.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,15 @@ jobs:
sudo apt remove --purge docker.io containerd runc -y
sudo rm -rf /run/containerd
sudo lxd init --auto
sudo lxc network create br0
sudo lxc profile device remove default eth0
sudo lxc profile device add default eth0 nic network=br0 name=eth0
sudo lxc network delete lxdbr0
sudo snap install juju --channel 3.6/stable
juju bootstrap localhost lxdcloud
sudo snap install ${{ needs.build.outputs.snap }} --dangerous
openstack.sunbeam prepare-node-script | bash -x
sudo snap connect openstack:juju-bin juju:juju-bin
Expand Down
9 changes: 9 additions & 0 deletions .github/workflows/test-snap.yml
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,15 @@ jobs:
# 2023.x, 2024.1/beta does not support k8s provider
if [[ ! ${{ inputs.snap-channel }} =~ "2024.1/edge" && ${{ inputs.k8s-provider }} == "k8s" ]]; then echo "k8s provider not supported"; exit 1; fi
sudo lxd init --auto
sudo lxc network create br0
sudo lxc profile device remove default eth0
sudo lxc profile device add default eth0 nic network=br0 name=eth0
sudo lxc network delete lxdbr0
sudo snap install juju --channel 3.6/stable
juju bootstrap localhost lxdcloud
sudo snap install openstack --channel ${{ inputs.snap-channel }}
sudo snap set openstack k8s.provider=${{ inputs.k8s-provider }}
Expand Down
22 changes: 22 additions & 0 deletions sunbeam-python/sunbeam/clusterd/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,10 @@ class ClusterService(MicroClusterService, ExtendedAPIService):
# sucessfully run. Note: this is distinct from microcluster bootstrap.
SUNBEAM_BOOTSTRAP_KEY = "sunbeam_bootstrapped"

# This key is used to determine if Juju controller is migrated to k8s
# from lxd. This is used only in local type deployment.
JUJU_CONTROLLER_MIGRATE_KEY = "juju_controller_migrated_to_k8s"

def bootstrap(
self, name: str, address: str, role: list[str], machineid: int = -1
) -> None:
Expand Down Expand Up @@ -303,3 +307,21 @@ def check_sunbeam_bootstrapped(self) -> bool:
except service.ClusterServiceUnavailableException:
state = False
return state

def unset_juju_controller_migrated(self) -> None:
"""Remove juju controller migrated key."""
self.update_config(self.JUJU_CONTROLLER_MIGRATE_KEY, json.dumps("False"))

def set_juju_controller_migrated(self) -> None:
"""Mark juju controller as migrated."""
self.update_config(self.JUJU_CONTROLLER_MIGRATE_KEY, json.dumps("True"))

def check_juju_controller_migrated(self) -> bool:
"""Check if juju controller has been migrated."""
try:
state = json.loads(self.get_config(self.JUJU_CONTROLLER_MIGRATE_KEY))
except service.ConfigItemNotFoundException:
state = False
except service.ClusterServiceUnavailableException:
state = False
return state
62 changes: 62 additions & 0 deletions sunbeam-python/sunbeam/core/checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

import base64
import enum
import grp
import json
import logging
import os
Expand All @@ -33,6 +34,7 @@
get_host_total_cores,
get_host_total_ram,
)
from sunbeam.core.juju import JujuStepHelper

LOG = logging.getLogger(__name__)

Expand Down Expand Up @@ -553,3 +555,63 @@ def run(self) -> bool:
else:
self.message = f"Juju controller {self.controller} is not registered."
return False


class LxdGroupCheck(Check):
"""Check if user is member of lxd group."""

def __init__(self):
self.user = os.environ.get("USER")
self.group = "lxd"

super().__init__(
"Check for lxd group membership",
f"Checking if user {self.user} is member of group {self.group}",
)

def run(self) -> bool:
"""Return false if user is not member of group.
Checks:
- User is part of group
"""
if self.user not in grp.getgrnam(self.group).gr_mem:
self.message = (
f"{self.user!r} not part of lxd group"
"Insufficient permissions to run sunbeam commands\n"
f"Add the user {self.user!r} to the {self.group!r} group:\n"
"\n"
f" sudo usermod -a -G {self.group} {self.user}\n"
"\n"
"After this, reload the user groups either via a reboot or by"
f" running 'newgrp {self.group}'."
)

return False

return True


class LXDJujuControllerRegistrationCheck(Check):
"""Check if lxd juju controller exists."""

def __init__(self):
super().__init__(
"Check existence of LXD Juju Controller",
"Checking if lxd juju controller exists",
)

def run(self) -> bool:
"""Check if lxd juju controller exists."""
controllers = JujuStepHelper().get_controllers(clouds=["localhost"])
if len(controllers) == 0:
self.message = (
"Missing Juju controller on LXD"
"Bootstrap Juju controller on LXD:"
"\n"
" juju bootstrap localhost"
"\n"
)
return False

return True
41 changes: 26 additions & 15 deletions sunbeam-python/sunbeam/core/deployment.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,21 +314,7 @@ def get_manifest(self, manifest_file: pathlib.Path | None = None) -> Manifest:
self._manifest = manifest
return manifest

def _load_tfhelpers(self):
feature_manager = self.get_feature_manager()
tfvar_map = copy.deepcopy(MANIFEST_ATTRIBUTES_TFVAR_MAP)
tfvar_map_feature = feature_manager.get_all_feature_manifest_tfvar_map()
tfvar_map = sunbeam_utils.merge_dict(tfvar_map, tfvar_map_feature)

manifest = self.get_manifest()
if not manifest.core.software.terraform:
raise MissingTerraformInfoException("Manifest is missing terraform plans.")
terraform_plans = manifest.core.software.terraform.copy()
for _, feature in manifest.get_features():
if not feature.software.terraform:
continue
terraform_plans.update(feature.software.terraform.copy())

def _get_juju_clusterd_env(self) -> dict:
env = {}
if self.juju_controller and self.juju_account:
env.update(
Expand All @@ -348,6 +334,25 @@ def _load_tfhelpers(self):
"TF_HTTP_CLIENT_PRIVATE_KEY_PEM": self.clusterd_certpair.private_key, # noqa E501
}
)
return env

def _load_tfhelpers(self):
feature_manager = self.get_feature_manager()
tfvar_map = copy.deepcopy(MANIFEST_ATTRIBUTES_TFVAR_MAP)
tfvar_map_feature = feature_manager.get_all_feature_manifest_tfvar_map()
tfvar_map = sunbeam_utils.merge_dict(tfvar_map, tfvar_map_feature)

manifest = self.get_manifest()
if not manifest.core.software.terraform:
raise MissingTerraformInfoException("Manifest is missing terraform plans.")
terraform_plans = manifest.core.software.terraform.copy()
for _, feature in manifest.get_features():
if not feature.software.terraform:
continue
terraform_plans.update(feature.software.terraform.copy())

env = {}
env.update(self._get_juju_clusterd_env())
env.update(self.get_proxy_settings())

for tfplan, tf_manifest in terraform_plans.items():
Expand All @@ -373,6 +378,12 @@ def plans_directory(self) -> pathlib.Path:
snap = Snap()
return snap.paths.user_common / "etc" / self.name

def reload_tfhelpers(self):
"""Reload tfhelpers to update juju environment variables."""
env = self._get_juju_clusterd_env()
for tfplan, tfhelper in self._tfhelpers.items():
tfhelper.reload_env(env)

def get_tfhelper(self, tfplan: str) -> TerraformHelper:
"""Get an instance of TerraformHelper for the given tfplan.
Expand Down
67 changes: 65 additions & 2 deletions sunbeam-python/sunbeam/core/juju.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
from packaging import version
from snaphelpers import Snap

from sunbeam import utils
from sunbeam.clusterd.client import Client
from sunbeam.core.common import SunbeamException
from sunbeam.versions import JUJU_BASE, SUPPORTED_RELEASE
Expand Down Expand Up @@ -86,6 +87,12 @@ class ControllerNotFoundException(JujuException):
pass


class ControllerNotReachableException(JujuException):
"""Raised when controller is not reachable."""

pass


class ModelNotFoundException(JujuException):
"""Raised when model is missing."""

Expand Down Expand Up @@ -1369,6 +1376,17 @@ def maas_credential(cloud: str, credential: str, maas_apikey: str):
}
return credentials

@staticmethod
def empty_credential(cloud: str):
"""Create empty credential definition."""
credentials: dict[str, dict] = {"credentials": {}}
credentials["credentials"][cloud] = {
"empty-creds": {
"auth-type": "empty",
}
}
return credentials

async def get_spaces(self, model: str) -> list[dict]:
"""Get spaces in model."""
model_impl = await self.get_model(model)
Expand Down Expand Up @@ -1559,6 +1577,23 @@ def get_controller(self, controller: str) -> dict:
LOG.debug(e)
raise ControllerNotFoundException() from e

def get_controller_ip(self, controller: str) -> str:
"""Get Controller IP of given juju controller.
Returns Juju Controller IP.
Raises ControllerNotFoundException or ControllerNotReachableException.
"""
controller_details = self.get_controller(controller)
endpoints = controller_details.get("details", {}).get("api-endpoints", [])
controller_ip_port = utils.first_connected_server(endpoints)
if not controller_ip_port:
raise ControllerNotReachableException(
f"Juju Controller {controller} not reachable"
)

controller_ip = controller_ip_port.rsplit(":", 1)[0]
return controller_ip

def add_cloud(self, name: str, cloud: dict, controller: str | None) -> bool:
"""Add cloud to client clouds.
Expand Down Expand Up @@ -1589,8 +1624,35 @@ def add_cloud(self, name: str, cloud: dict, controller: str | None) -> bool:

return True

def add_k8s_cloud_in_client(self, name: str, kubeconfig: dict):
"""Add k8s cloud in juju client."""
with tempfile.NamedTemporaryFile() as temp:
temp.write(yaml.dump(kubeconfig).encode("utf-8"))
temp.flush()
cmd = [
self._get_juju_binary(),
"add-k8s",
name,
"--client",
"--region=localhost/localhost",
]

env = os.environ.copy()
env.update({"KUBECONFIG": temp.name})
LOG.debug(f'Running command {" ".join(cmd)}')
process = subprocess.run(
cmd, capture_output=True, text=True, check=True, env=env
)
LOG.debug(
f"Command finished. stdout={process.stdout}, stderr={process.stderr}"
)

def add_credential(self, cloud: str, credential: dict, controller: str | None):
"""Add credential to client credentials."""
"""Add credentials to client or controller.
If controller is specidifed, credential is added to controller.
If controller is None, credential is added to client.
"""
with tempfile.NamedTemporaryFile() as temp:
temp.write(yaml.dump(credential).encode("utf-8"))
temp.flush()
Expand All @@ -1600,10 +1662,11 @@ def add_credential(self, cloud: str, credential: dict, controller: str | None):
cloud,
"--file",
temp.name,
"--client",
]
if controller:
cmd.extend(["--controller", controller])
else:
cmd.extend(["--client"])
LOG.debug(f'Running command {" ".join(cmd)}')
process = subprocess.run(cmd, capture_output=True, text=True, check=True)
LOG.debug(
Expand Down
7 changes: 7 additions & 0 deletions sunbeam-python/sunbeam/core/terraform.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,13 @@ def write_terraformrc(self) -> None:
)
)

def reload_env(self, env: dict) -> None:
"""Update environment variables."""
if self.env:
self.env.update(env)
else:
self.env = env

def init(self) -> None:
"""Terraform init."""
os_env = os.environ.copy()
Expand Down
Loading

0 comments on commit 321a777

Please sign in to comment.