diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index cb26e3c68..e1b6718b9 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -704,6 +704,8 @@ obs:odvr: - apkg info cache | grep archive/dev - apkg install --build-dep - apkg test --test-dep + after_script: + - journalctl -u knot-resolver.service artifacts: expire_in: 1 week paths: @@ -783,13 +785,17 @@ pkg:fedora-40: pkg:alma-9: <<: *pkg_test image: $CI_REGISTRY/packaging/apkg/full/alma-9 + before_script: + # python-watchdog is not included in the official Alma 9 packages + # install it using PyPi just for testing + - pip3 install watchdog pkg:arch: <<: *pkg_test_user image: $CI_REGISTRY/packaging/apkg/full/arch before_script: - # prometheus is an optional dependency, but our `apkg test` needs it - - pacman -Syu --noconfirm python-prometheus_client + # prometheus and watchdog are optional dependencies, but our `apkg test` needs them + - pacman -Syu --noconfirm python-prometheus_client python-watchdog # RHEL 8 derivatives would need more work due to *default* python being old #pkg:rocky-8: diff --git a/NEWS b/NEWS index 57795d027..050612214 100644 --- a/NEWS +++ b/NEWS @@ -5,6 +5,7 @@ Improvements ------------ - avoid multiple log lines when IPv6 isn't available (!1633) - manager: fix startup on Linux without libsystemd (!1608) +- auto-reload TLS certificate files (!1626) Knot Resolver 6.0.9 (2024-11-11) diff --git a/distro/pkg/arch/PKGBUILD b/distro/pkg/arch/PKGBUILD index ebbd164d1..b01352c8e 100644 --- a/distro/pkg/arch/PKGBUILD +++ b/distro/pkg/arch/PKGBUILD @@ -46,6 +46,7 @@ optdepends=( 'lua51-http: http and prefill modules, trust_anchors bootstrap' 'lua51-psl: policy.slice_randomize_psl() function' 'python-prometheus_client: stats and metrics in Prometheus format' + 'python-watchdog: files monitoring and reload on changes' ) backup=('etc/knot-resolver/config.yaml') options=(debug strip) diff --git a/distro/pkg/deb/control b/distro/pkg/deb/control index 5661e9739..907c9ffa9 100644 --- a/distro/pkg/deb/control +++ b/distro/pkg/deb/control @@ -56,6 +56,7 @@ Recommends: lua-http, lua-psl, python3-prometheus-client, + python3-watchdog, Suggests: knot-resolver6-module-http, Description: caching, DNSSEC-validating DNS resolver - core binaries diff --git a/distro/pkg/rpm/knot-resolver.spec b/distro/pkg/rpm/knot-resolver.spec index ca8602ffd..91c1a148b 100644 --- a/distro/pkg/rpm/knot-resolver.spec +++ b/distro/pkg/rpm/knot-resolver.spec @@ -65,6 +65,7 @@ Requires: python3-pyyaml Requires: python3-typing-extensions %endif Recommends: python3-prometheus_client +Recommends: python3-watchdog # dnstap module dependencies # SUSE is missing protoc-c protobuf compiler diff --git a/doc/dev/build.rst b/doc/dev/build.rst index f65122146..d3d87dce9 100644 --- a/doc/dev/build.rst +++ b/doc/dev/build.rst @@ -306,6 +306,7 @@ All dependencies are also listed in `pyproject.toml diff --git a/pyproject.toml b/pyproject.toml index 9504e3c2b..c2c639927 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,9 +34,11 @@ pyyaml = "*" supervisor = "*" typing-extensions = "*" prometheus-client = { version = "*", optional = true } +watchdog = { version = "*", optional = true } [tool.poetry.extras] prometheus = ["prometheus-client"] +watchdog = ["watchdog"] [tool.poetry.group.dev.dependencies] poetry = "^1.8.3" diff --git a/python/knot_resolver/manager/files/__init__.py b/python/knot_resolver/manager/files/__init__.py new file mode 100644 index 000000000..49700656b --- /dev/null +++ b/python/knot_resolver/manager/files/__init__.py @@ -0,0 +1,3 @@ +from .watchdog import init_files_watchdog + +__all__ = ["init_files_watchdog"] diff --git a/python/knot_resolver/manager/files/watchdog.py b/python/knot_resolver/manager/files/watchdog.py new file mode 100644 index 000000000..64547192e --- /dev/null +++ b/python/knot_resolver/manager/files/watchdog.py @@ -0,0 +1,133 @@ +import importlib +import logging +from pathlib import Path +from threading import Timer +from typing import List, Optional + +from knot_resolver.controller.registered_workers import command_registered_workers +from knot_resolver.datamodel import KresConfig +from knot_resolver.datamodel.types import File +from knot_resolver.manager.config_store import ConfigStore, only_on_real_changes_update +from knot_resolver.utils import compat + +_watchdog = False +if importlib.util.find_spec("watchdog"): + _watchdog = True + +logger = logging.getLogger(__name__) + + +def tls_cert_paths(config: KresConfig) -> List[str]: + files: List[Optional[File]] = [ + config.network.tls.cert_file, + config.network.tls.key_file, + ] + return [str(file) for file in files if file is not None] + + +if _watchdog: + from watchdog.events import ( + FileSystemEvent, + FileSystemEventHandler, + ) + from watchdog.observers import Observer + + _tls_cert_watchdog: Optional["TLSCertWatchDog"] = None + + class TLSCertEventHandler(FileSystemEventHandler): + def __init__(self, files: List[Path], cmd: str) -> None: + self._files = files + self._cmd = cmd + self._timer: Optional[Timer] = None + + def _reload(self) -> None: + def command() -> None: + if compat.asyncio.is_event_loop_running(): + compat.asyncio.create_task(command_registered_workers(self._cmd)) + else: + compat.asyncio.run(command_registered_workers(self._cmd)) + logger.info("Reloading of TLS certificate files has finished") + + # skipping if reload was already triggered + if self._timer and self._timer.is_alive(): + logger.info("Skipping TLS certificate files reloading, reload command was already triggered") + return + # start a 5sec timer + logger.info("Delayed reload of TLS certificate files has started") + self._timer = Timer(5, command) + self._timer.start() + + def on_created(self, event: FileSystemEvent) -> None: + src_path = Path(str(event.src_path)) + if src_path in self._files: + logger.info(f"Watched file '{src_path}' has been created") + self._reload() + + def on_deleted(self, event: FileSystemEvent) -> None: + src_path = Path(str(event.src_path)) + if src_path in self._files: + logger.warning(f"Watched file '{src_path}' has been deleted") + if self._timer: + self._timer.cancel() + for file in self._files: + if file.parent == src_path: + logger.warning(f"Watched directory '{src_path}' has been deleted") + if self._timer: + self._timer.cancel() + + def on_modified(self, event: FileSystemEvent) -> None: + src_path = Path(str(event.src_path)) + if src_path in self._files: + logger.info(f"Watched file '{src_path}' has been modified") + self._reload() + + class TLSCertWatchDog: + def __init__(self, cert_file: Path, key_file: Path) -> None: + self._observer = Observer() + + cmd = f"net.tls('{cert_file}', '{key_file}')" + + cert_files: List[Path] = [] + cert_files.append(cert_file) + cert_files.append(key_file) + + cert_dirs: List[Path] = [] + cert_dirs.append(cert_file.parent) + if cert_file.parent != key_file.parent: + cert_dirs.append(key_file.parent) + + event_handler = TLSCertEventHandler(cert_files, cmd) + for d in cert_dirs: + self._observer.schedule( + event_handler, + str(d), + recursive=False, + ) + logger.info(f"Directory '{d}' scheduled for watching") + + def start(self) -> None: + self._observer.start() + + def stop(self) -> None: + self._observer.stop() + self._observer.join() + + @only_on_real_changes_update(tls_cert_paths) + async def _init_tls_cert_watchdog(config: KresConfig) -> None: + global _tls_cert_watchdog + if _tls_cert_watchdog: + _tls_cert_watchdog.stop() + + if config.network.tls.cert_file and config.network.tls.key_file: + logger.info("Initializing TLS certificate files WatchDog") + _tls_cert_watchdog = TLSCertWatchDog( + config.network.tls.cert_file.to_path(), + config.network.tls.key_file.to_path(), + ) + _tls_cert_watchdog.start() + + +async def init_files_watchdog(config_store: ConfigStore) -> None: + if _watchdog: + # watchdog for TLS certificate files + await config_store.register_on_change_callback(_init_tls_cert_watchdog) diff --git a/python/knot_resolver/manager/server.py b/python/knot_resolver/manager/server.py index 90fd4d3b9..b09ff7b99 100644 --- a/python/knot_resolver/manager/server.py +++ b/python/knot_resolver/manager/server.py @@ -27,7 +27,7 @@ from knot_resolver.datamodel.config_schema import KresConfig, get_rundir_without_validation from knot_resolver.datamodel.globals import Context, set_global_validation_context from knot_resolver.datamodel.management_schema import ManagementSchema -from knot_resolver.manager import metrics +from knot_resolver.manager import files, metrics from knot_resolver.utils import custom_atexit as atexit from knot_resolver.utils import ignore_exceptions_optional from knot_resolver.utils.async_utils import readfile @@ -60,8 +60,8 @@ async def error_handler(request: web.Request, handler: Any) -> web.Response: try: return await handler(request) - except DataValidationError as e: - return web.Response(text=f"validation of configuration failed:\n{e}", status=HTTPStatus.BAD_REQUEST) + except (AggregateDataValidationError, DataValidationError) as e: + return web.Response(text=str(e), status=HTTPStatus.BAD_REQUEST) except DataParsingError as e: return web.Response(text=f"request processing error:\n{e}", status=HTTPStatus.BAD_REQUEST) except KresManagerException as e: @@ -262,16 +262,7 @@ async def _handler_metrics_prometheus(self, _request: web.Request) -> web.Respon async def _handler_cache_clear(self, request: web.Request) -> web.Response: data = parse_from_mime_type(await request.text(), request.content_type) - - try: - config = CacheClearRPCSchema(data) - except (AggregateDataValidationError, DataValidationError) as e: - return web.Response( - body=e, - status=HTTPStatus.BAD_REQUEST, - content_type="text/plain", - charset="utf8", - ) + config = CacheClearRPCSchema(data) _, result = await command_single_registered_worker(config.render_lua()) return web.Response( @@ -566,6 +557,8 @@ async def start_server(config: Path = CONFIG_FILE) -> int: # noqa: PLR0915 # started, therefore before initializing manager await metrics.init_prometheus(config_store) + await files.init_files_watchdog(config_store) + # prepare instance of the server (no side effects) server = Server(config_store, config) diff --git a/setup.py b/setup.py index fccda515e..1e11e0c63 100644 --- a/setup.py +++ b/setup.py @@ -15,6 +15,7 @@ 'knot_resolver.datamodel.templates', 'knot_resolver.datamodel.types', 'knot_resolver.manager', + 'knot_resolver.manager.files', 'knot_resolver.manager.metrics', 'knot_resolver.utils', 'knot_resolver.utils.compat', @@ -27,7 +28,7 @@ ['aiohttp', 'jinja2', 'pyyaml', 'supervisor', 'typing-extensions'] extras_require = \ -{'prometheus': ['prometheus-client']} +{'prometheus': ['prometheus-client'], 'watchdog': ['watchdog']} entry_points = \ {'console_scripts': ['knot-resolver = knot_resolver.manager.main:main', diff --git a/tests/packaging/interactive/cache-clear.sh b/tests/packaging/interactive/cache-clear.sh index 79d88a123..512096d63 100755 --- a/tests/packaging/interactive/cache-clear.sh +++ b/tests/packaging/interactive/cache-clear.sh @@ -1,14 +1,14 @@ #!/usr/bin/env bash # clear full cache -kresctl cache clear +kresctl cache clear > /dev/null if [ "$?" -ne "0" ]; then echo "Could not clear full cache" exit 1 fi # clear just example.com. AAAA record, get JSON output -kresctl cache clear --json --exact-name --rr-type AAAA example.com. | python3 -m json.tool +kresctl cache clear --json --exact-name --rr-type AAAA example.com. | python3 -m json.tool > /dev/null if [ "$?" -ne "0" ]; then echo "Could not clear example.com. AAAA record or output is not a valid JSON" exit 1 diff --git a/tests/packaging/interactive/etag.sh b/tests/packaging/interactive/etag.sh index 7038e0069..d40f358f3 100755 --- a/tests/packaging/interactive/etag.sh +++ b/tests/packaging/interactive/etag.sh @@ -4,11 +4,7 @@ set -e socket_opt="--unix-socket /run/knot-resolver/kres-api.sock" -echo " etag" etag="$(curl --silent $socket_opt --fail http://localhost:5000/v1/config -o /dev/null -v 2>&1 | grep ETag | sed 's/< ETag: //;s/\s//')" -echo " etag OK" -echo " status" status=$(curl --silent $socket_opt --fail http://localhost:5000/v1/config --header "If-None-Match: $etag" -w "%{http_code}" -o /dev/null) test "$status" -eq 304 -echo " status OK" diff --git a/tests/packaging/interactive/metrics.sh b/tests/packaging/interactive/metrics.sh index 99b21124d..fbf2ff399 100755 --- a/tests/packaging/interactive/metrics.sh +++ b/tests/packaging/interactive/metrics.sh @@ -4,13 +4,13 @@ set -e curl --silent --fail --unix-socket /run/knot-resolver/kres-api.sock http://localhost/metrics > /dev/null -kresctl metrics +kresctl metrics > /dev/null if [ "$?" -ne "0" ]; then echo "Could not get metrics in JSON format" exit 1 fi -kresctl metrics --prometheus +kresctl metrics --prometheus > /dev/null if [ "$?" -ne "0" ]; then echo "Could not get metrics in Prometheus format" exit 1 diff --git a/tests/packaging/interactive/schema.sh b/tests/packaging/interactive/schema.sh index 3ea45d522..9f6716538 100755 --- a/tests/packaging/interactive/schema.sh +++ b/tests/packaging/interactive/schema.sh @@ -1,14 +1,14 @@ -#!/bin/bash +#!/usr/bin/env bash set -e -kresctl schema +kresctl schema > /dev/null if [ "$?" -ne "0" ]; then echo "Failed to generate JSON schema with 'kresctl'" exit 1 fi -kresctl schema --live +kresctl schema --live > /dev/null if [ "$?" -ne "0" ]; then echo "Failed to get JSON schema from the running resolver" exit 1 diff --git a/tests/packaging/interactive/watchdog.sh b/tests/packaging/interactive/watchdog.sh new file mode 100755 index 000000000..6e5e506a7 --- /dev/null +++ b/tests/packaging/interactive/watchdog.sh @@ -0,0 +1,102 @@ +#!/usr/bin/env bash + +set -e + +gitroot=$(git rev-parse --show-toplevel) +cert_file=$gitroot/modules/http/test_tls/test.crt +key_file=$gitroot/modules/http/test_tls/test.key + +tls_certificate_conf=$(cat <> $cert_file +echo "-----------" >> $key_file + +# wait for files reload to finish +sleep 6 + +if [ $(count_errors) -ne $err_count ] || [ $(count_reloads) -eq $rel_count ]; then + echo "Could not reload modified TLS certificate files." + exit 1 +fi + +# }} + +# test replacement +# {{ + +rel_count=$(count_reloads) + +# copy cert files +cp $cert_file test.crt.new +cp $key_file test.key.new + +# edit new files +echo "-----------" >> test.crt.new +echo "-----------" >> test.key.new + +# replace files +mv -f test.crt.new $cert_file +mv -f test.key.new $key_file + +# wait for files reload to finish +sleep 6 + +if [ $(count_errors) -ne $err_count ] || [ $(count_reloads) -eq $rel_count ]; then + echo "Could not reload replaced TLS certificate files." + exit 1 +fi + +# }} + +# test recovery from deletion and creation +# {{ + +rel_count=$(count_reloads) + +# backup cert files +cp $cert_file test.crt.backup +cp $key_file test.key.backup + +# delete cert files +rm $cert_file $key_file + +# create cert files +mv test.crt.backup $cert_file +mv test.key.backup $key_file + +# wait for files reload to finish +sleep 6 + +if [ $(count_errors) -ne $err_count ] || [ $(count_reloads) -eq $rel_count ]; then + echo "Could not reload created TLS certificate files." + exit 1 +fi + +# }}