Skip to content

Commit

Permalink
daemon/defer: add hard-timeout for interrupting expensive computations
Browse files Browse the repository at this point in the history
  • Loading branch information
Lukáš Ondráček committed Jan 9, 2025
1 parent d5231f0 commit 74b05f9
Show file tree
Hide file tree
Showing 8 changed files with 83 additions and 10 deletions.
36 changes: 35 additions & 1 deletion daemon/defer.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

#include <math.h>
#include <stdatomic.h>
#include <unistd.h>
#include "daemon/defer.h"
#include "daemon/session2.h"
#include "daemon/udp_queue.h"
Expand Down Expand Up @@ -62,6 +63,7 @@ struct defer {
size_t capacity;
kru_price_t max_decay;
uint32_t log_period;
uint32_t hard_timeout;
int cpus;
bool using_avx2;
_Atomic uint32_t log_time;
Expand Down Expand Up @@ -642,9 +644,36 @@ static void defer_queues_idle(uv_idle_t *handle)
VERBOSE_LOG("POLL\n");
}

static void defer_alarm(int signum)
{
if (!defer || (defer->hard_timeout == 0)) return;

uint64_t elapsed = 0;
if (defer_sample_state.is_accounting) {
elapsed = defer_get_stamp() - defer_sample_state.stamp;
VERBOSE_LOG("SIGALRM %s, host %s used %.3f s of cpu time on ongoing operation\n",
signum ? "received" : "initialized",
kr_straddr(&defer_sample_state.addr.ip), elapsed / 1000000000.0); // XXX
} else {
VERBOSE_LOG("SIGALRM %s, no measuring in progress\n",
signum ? "received" : "initialized");
}
int64_t rest_to_timeout_ms = defer->hard_timeout - elapsed / 1000000; // ms - ns
if (rest_to_timeout_ms <= 0) {
uv_update_time(uv_default_loop()); // TODO more conceptual solution?
defer_charge(elapsed, &defer_sample_state.addr, defer_sample_state.stream);
kr_log_crit(DEFER, "Host %s used %0.3f s of cpu time continuously, interrupting cresd.\n",
kr_straddr(&defer_sample_state.addr.ip), elapsed / 1000000000.0);
classify(&defer_sample_state.addr, defer_sample_state.stream); // XXX
__sync_synchronize();
abort();
}
alarm((rest_to_timeout_ms + 999) / 1000);
}

/// Initialize shared memory, queues. To be called from Lua.
int defer_init(const char *mmap_file, uint32_t log_period, int cpus) // TODO possibly remove cpus; not needed
int defer_init(const char *mmap_file, uint32_t log_period, uint32_t hard_timeout, int cpus)
// TODO possibly remove cpus; not needed
{
defer_initialized = true;
if (mmap_file == NULL) {
Expand All @@ -662,6 +691,7 @@ int defer_init(const char *mmap_file, uint32_t log_period, int cpus) // TODO po
.capacity = KRU_CAPACITY,
.max_decay = MAX_DECAY,
.log_period = log_period,
.hard_timeout = hard_timeout,
.cpus = cpus,
.using_avx2 = using_avx2(),
};
Expand All @@ -676,6 +706,7 @@ int defer_init(const char *mmap_file, uint32_t log_period, int cpus) // TODO po
sizeof(header.capacity) +
sizeof(header.max_decay) +
sizeof(header.log_period) +
sizeof(header.hard_timeout) +
sizeof(header.cpus),
"detected padding with undefined data inside mmapped header");

Expand Down Expand Up @@ -713,6 +744,9 @@ int defer_init(const char *mmap_file, uint32_t log_period, int cpus) // TODO po
for (size_t i = 0; i < QUEUES_CNT; i++)
queue_init(queues[i]);

signal(SIGALRM, defer_alarm);
defer_alarm(0);

return 0;

fail:
Expand Down
13 changes: 9 additions & 4 deletions daemon/defer.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@

/// Initialize defer, incl. shared memory with KRU, excl. idle.
KR_EXPORT
int defer_init(const char *mmap_file, uint32_t log_period, int cpus);
int defer_init(const char *mmap_file, uint32_t log_period, uint32_t hard_timeout, int cpus);

/// Initialize idle.
/// Initialize idle and SIGALRM handler.
int defer_init_idle(uv_loop_t *loop);

/// Deinitialize shared memory.
Expand Down Expand Up @@ -79,9 +79,10 @@ static inline void defer_sample_start_stamp(uint64_t stamp)
{
if (!defer) return;
kr_assert(!defer_sample_state.is_accounting);
defer_sample_state.is_accounting = true;
defer_sample_state.stamp = stamp;
defer_sample_state.addr.ip.sa_family = AF_UNSPEC;
__sync_synchronize();
defer_sample_state.is_accounting = true;
}

/// Internal; stop accounting work at specified timestamp and charge the source if applicable.
Expand All @@ -90,6 +91,7 @@ static inline void defer_sample_stop_stamp(uint64_t stamp)
if (!defer) return;
kr_assert(defer_sample_state.is_accounting);
defer_sample_state.is_accounting = false;
__sync_synchronize();

if (defer_sample_state.addr.ip.sa_family == AF_UNSPEC) return;

Expand Down Expand Up @@ -146,7 +148,10 @@ static inline void defer_sample_stop(defer_sample_state_t *prev_state, bool reus

// resume
if (prev_state) {
defer_sample_state = *prev_state;
defer_sample_state.addr = prev_state->addr;
defer_sample_state.stream = prev_state->stream;
defer_sample_state.stamp = stamp;
__sync_synchronize();
defer_sample_state.is_accounting = prev_state->is_accounting;
}
}
2 changes: 1 addition & 1 deletion daemon/lua/kres-gen-33.lua
Original file line number Diff line number Diff line change
Expand Up @@ -617,7 +617,7 @@ struct qr_task *worker_resolve_start(knot_pkt_t *, struct kr_qflags);
int zi_zone_import(const zi_config_t);
_Bool ratelimiting_request_begin(struct kr_request *);
int ratelimiting_init(const char *, size_t, uint32_t, uint32_t, uint16_t, uint32_t, _Bool);
int defer_init(const char *, uint32_t, int);
int defer_init(const char *, uint32_t, uint32_t, int);
struct engine {
char _stub[];
};
Expand Down
4 changes: 2 additions & 2 deletions daemon/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -553,7 +553,7 @@ int main(int argc, char **argv)

uv_loop_t *loop = uv_default_loop();
/* Catch some signals. */
uv_signal_t sigint, sigterm, sigchld;
uv_signal_t sigint, sigterm, sigchld; // +SIGALRM handled by defer
if (true) ret = uv_signal_init(loop, &sigint);
if (!ret) ret = uv_signal_init(loop, &sigterm);
if (!ret) ret = uv_signal_init(loop, &sigchld);
Expand Down Expand Up @@ -618,7 +618,7 @@ int main(int argc, char **argv)

if (!defer_initialized) {
kr_log_warning(SYSTEM, "Prioritization not initialized from Lua, using hardcoded default.\n");
ret = defer_init("defer", 1, 1);
ret = defer_init("defer", 1, 0, 1);
if (ret) {
ret = EXIT_FAILURE;
goto cleanup;
Expand Down
9 changes: 8 additions & 1 deletion doc/_static/config.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -1727,11 +1727,18 @@
"pattern": "^(\\d+)(us|ms|s|m|h|d)$",
"description": "Minimal time between two log messages, or '0s' to disable.",
"default": "0s"
},
"hard-timeout": {
"type": "string",
"pattern": "^(\\d+)(us|ms|s|m|h|d)$",
"description": "If a measured operation lasts longer, kresd is interrupted; use '0s' to disable.",
"default": "0s"
}
},
"default": {
"enabled": true,
"log_period": "0s"
"log_period": "0s",
"hard_timeout": "0s"
}
},
"lua": {
Expand Down
24 changes: 24 additions & 0 deletions doc/user/config-defer.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ Request prioritization (defer)
Defer tries to mitigate DoS attacks by measuring cpu time consumption of different hosts and networks
and deferring future requests from the same origin.
If there is not enough time to process all the requests, the lowest priority ones are dropped.
It also allows setting a hard timeout on a continuous computation on a single request.

The time measurements are taken into account only for TCP,
as the source address of plain UDP can be forged.
Expand Down Expand Up @@ -56,3 +57,26 @@ The detailed configuration is printed by ``defer`` group on ``info`` level on st
and logging is disabled for the :option:`log-period <defer/log-period: <time ms|s|m|h|d>`.
As long as dropping is needed, one source is logged each period
and sources with more dropped queries have greater probability to be chosen.

.. option:: defer/hard-timeout: <time ms|s|m|h|d>

:default: 0s

Time limit for a cpu time consumed continuously on a single request, or ``0s`` to disable.
It causes crash of kresd if exceeded; use carefully.

This is intended as a last resort defence against yet unknown bugs
allowing an attacker to initiate very expensive computations by a single request
resulting in freezing kresd process for several seconds or minutes.

It is based on scheduling a SIGALRM to be delivered after the timeout (or up to 1s later),
which then interrupts the computation.
After the interrupt the priority of the request's origin is decreased according to the duration,
the kresd process is terminated (dropping all pending, but probably already timeouted, requests)
and started again by manager.
To keep the data with measurements and priorities alive during restart,
it is crucial to use :ref:`multiple workers <config-multiple-workers>`
as those data are shared between them and disappear with the last one.

A continuous work on a single request usually takes under 1 ms. (TODO check)
Set the timeout at least to several seconds to avoid random crashes. (TODO or more?)
2 changes: 2 additions & 0 deletions python/knot_resolver/datamodel/defer_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@ class DeferSchema(ConfigSchema):
---
enabled: Use request prioritization.
log_period: Minimal time between two log messages, or '0s' to disable.
hard_timeout: If a measured operation lasts longer, kresd is interrupted; use '0s' to disable.
"""

enabled: bool = True
log_period: TimeUnit = TimeUnit("0s")
hard_timeout: TimeUnit = TimeUnit("0s")
3 changes: 2 additions & 1 deletion python/knot_resolver/datamodel/templates/defer.lua.j2
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
assert(C.defer_init(
'{{ cfg.rundir }}/defer',
{{ cfg.defer.log_period.millis() }},
{{ cfg.defer.hard_timeout.millis() }},
{{ cfg.workers }}) == 0)
{% else %}
assert(C.defer_init(nil, 0, 0) == 0)
assert(C.defer_init(nil, 0, 0, 0) == 0)
{%- endif %}

0 comments on commit 74b05f9

Please sign in to comment.