-
Notifications
You must be signed in to change notification settings - Fork 59
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Modules: new module to filter queries containing suspicious characters
- Loading branch information
Frantisek Tobias
committed
Dec 23, 2024
1 parent
cde3cfd
commit 238d65b
Showing
7 changed files
with
337 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
.. SPDX-License-Identifier: GPL-3.0-or-later | ||
.. _mod-filter: | ||
|
||
Filter | ||
====== | ||
|
||
This module blocks queries that contain suspicious characters. | ||
When loaded, any queries containing forbidden ascii (see RFC 1035 2.3.1. | ||
Preferred name syntax), or UTF-8 characters that | ||
aren't whitelisted, shall result in ``NXDOMAIN``. Current default | ||
whitelist consists of UTF-8 characters native to some central European languages. | ||
As of yet no configuration utility for this module is provided, therefore any | ||
changes to the whitelist have to be performed in ``modules/filter/filter.c``. | ||
|
||
This module is not loaded by default. If you'd like to enable it you can load it like so: | ||
|
||
.. code-block:: lua | ||
modules.load('filter') | ||
.. note:: Avoid writing advanced regular expressions into the whitelist, | ||
this is not the intended use and might exhibit undefined behaviour. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,214 @@ | ||
/* Copyright (C) CZ.NIC, z.s.p.o. <[email protected]> | ||
* SPDX-License-Identifier: GPL-3.0-or-later | ||
*/ | ||
|
||
/** | ||
* @file filter.c | ||
* @brief blocks queries that contain other than writelisted characters | ||
* | ||
* whitelist is for 'extra' characters, ascii characters listed in RFC 1035 | ||
* shall be added by automatically. Characters can be specified by | ||
* code point \\N{U+00DF} = ß, (\N hast to escaped, while it is | ||
* a valid pcre2 syntax it is a not valid utf code in C) | ||
*/ | ||
|
||
#include <idn2.h> | ||
#include <stdlib.h> | ||
#include <string.h> | ||
#include <lib/log.h> | ||
#include "lib/layer.h" | ||
#include "lib/resolve.h" | ||
|
||
#define PCRE2_CODE_UNIT_WIDTH 8 | ||
#include <pcre2.h> | ||
|
||
#define CHWHITELIST "ěščřžýáíéóůúďťľĺŕäôäąćęłńśźż\\N{U+00DF}\\N{U+00FC}\\N{U+00F6}" | ||
#define HEAD "^[a-z0-9" | ||
#define TAIL "-]+$" | ||
|
||
#define ASCIILIMIT 0x80 | ||
#define MAXLABELSIZE (63 * 4) + 1 | ||
|
||
struct filter_data { | ||
uint32_t option_bits; | ||
PCRE2_SIZE erroroffset; | ||
PCRE2_SIZE subject_length; | ||
pcre2_match_data *match_data; | ||
pcre2_code *re; | ||
}; | ||
|
||
static int create_mismatch_answer(kr_layer_t *ctx) | ||
{ | ||
struct kr_request *req = ctx->req; | ||
knot_pkt_t *answer = kr_request_ensure_answer(req); | ||
if (!answer) | ||
return ctx->state; | ||
|
||
knot_wire_set_rcode(answer->wire, KNOT_RCODE_NXDOMAIN); | ||
knot_wire_clear_ad(answer->wire); | ||
|
||
kr_request_set_extended_error(req, KNOT_EDNS_EDE_BLOCKED, | ||
"RIQZ: suspicious query"); | ||
ctx->state = KR_STATE_DONE; | ||
return ctx->state; | ||
} | ||
|
||
int is_ascii(char *str) | ||
{ | ||
for (; *str; str++) | ||
if (*str & ASCIILIMIT) | ||
return -1; | ||
return 0; | ||
} | ||
|
||
char *prep_regstr(const char *whitelist) | ||
{ | ||
int h_len = strlen(HEAD); | ||
int t_len = strlen(TAIL); | ||
int wl_len = strlen(whitelist); | ||
char *regstr = malloc(wl_len + h_len + t_len + 1); | ||
if (!regstr) | ||
return NULL; | ||
|
||
strcpy(regstr, HEAD); | ||
strcpy(regstr + h_len, whitelist); | ||
strcpy(regstr + h_len + wl_len, TAIL); | ||
regstr[t_len + h_len + wl_len] = '\0'; | ||
|
||
return regstr; | ||
} | ||
|
||
void regex_deinit(struct filter_data *re) | ||
{ | ||
if (re) { | ||
if (re->match_data) | ||
pcre2_match_data_free(re->match_data); | ||
|
||
if(re->re) | ||
pcre2_code_free(re->re); | ||
|
||
free(re); | ||
} | ||
} | ||
|
||
int regex_init(struct filter_data *data) | ||
{ | ||
char *regstr = NULL; | ||
int errornumber = 0; | ||
|
||
regstr = prep_regstr(CHWHITELIST); | ||
if (!regstr) | ||
return kr_error(ENOMEM); | ||
|
||
PCRE2_SPTR pattern = (PCRE2_SPTR)regstr; | ||
data->option_bits = PCRE2_UTF | PCRE2_UCP | PCRE2_CASELESS; | ||
data->re = pcre2_compile(pattern, PCRE2_ZERO_TERMINATED, data->option_bits, | ||
&errornumber, &data->erroroffset, NULL); | ||
free(regstr); | ||
|
||
if (data->re == NULL) { | ||
PCRE2_UCHAR buffer[256]; | ||
pcre2_get_error_message(errornumber, buffer, sizeof(buffer)); | ||
// kr_log_warning(FILTER, "pcre2 regex compilation failed: %s\n", buffer); | ||
return kr_error(errornumber == PCRE2_ERROR_NOMEMORY ? ENOMEM : EINVAL); | ||
} | ||
|
||
data->match_data = pcre2_match_data_create_from_pattern(data->re, NULL); | ||
if (!data->match_data) { | ||
// kr_log_warning(FILTER, "Failed to create match data from pattern (likely due to ENOMEM)\n"); | ||
pcre2_code_free(data->re); | ||
return kr_error(ENOMEM); | ||
} | ||
|
||
return kr_ok(); | ||
} | ||
|
||
static int matches(kr_layer_t *ctx) | ||
{ | ||
int ret = -1; | ||
char *output = NULL; | ||
struct kr_module *module = ctx->api->data; | ||
struct filter_data *data = module->data; | ||
struct kr_request *req = ctx->req; | ||
struct kr_query *qry = req->current_query; | ||
char label[MAXLABELSIZE] = { 0 }; | ||
|
||
if (!qry || !qry->sname || qry->flags.CACHED) | ||
return ctx->state; | ||
|
||
int ptr = 0; | ||
while (qry->sname[ptr] != '\0') { | ||
uint8_t length = qry->sname[ptr++]; | ||
|
||
strncat(label, (char *)(qry->sname + ptr), length); | ||
ptr += length; | ||
|
||
if (is_ascii(label) == -1) | ||
return create_mismatch_answer(ctx); | ||
|
||
ret = idn2_to_unicode_8z8z(label, &output, 0); | ||
if (ret != IDN2_OK) | ||
return create_mismatch_answer(ctx); | ||
|
||
PCRE2_SPTR subject = (PCRE2_SPTR)output; | ||
PCRE2_SIZE subject_length = (PCRE2_SIZE)strlen((char *)subject); | ||
|
||
ret = pcre2_match(data->re, subject, subject_length, | ||
0, 0, data->match_data, NULL); | ||
|
||
idn2_free(output); | ||
output = NULL; | ||
|
||
if (ret < 0) | ||
return create_mismatch_answer(ctx); | ||
|
||
label[0] = '\0'; | ||
} | ||
|
||
return ctx->state; | ||
} | ||
|
||
KR_EXPORT | ||
int filter_init(struct kr_module *module) | ||
{ | ||
static kr_layer_api_t layer = { | ||
.begin = &matches, | ||
}; | ||
|
||
layer.data = module; | ||
module->layer = &layer; | ||
|
||
static const struct kr_prop props[] = { | ||
{ NULL, NULL, NULL } | ||
}; | ||
module->props = props; | ||
|
||
struct filter_data *data = calloc(1, sizeof(struct filter_data)); | ||
if (!data) | ||
return kr_error(ENOMEM); | ||
|
||
data->re = NULL; | ||
data->match_data = NULL; | ||
|
||
int ret = regex_init(data); | ||
if (ret != kr_ok()) { | ||
free(data); | ||
return kr_error(ret); | ||
} | ||
|
||
module->data = data; | ||
return kr_ok(); | ||
} | ||
|
||
KR_EXPORT | ||
int filter_deinit(struct kr_module *module) | ||
{ | ||
struct filter_data *data = module->data; | ||
if (data) { | ||
regex_deinit(data); | ||
module->data = NULL; | ||
} | ||
return kr_ok(); | ||
} | ||
|
||
KR_MODULE_EXPORT(filter) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
-- SPDX-License-Identifier: GPL-3.0-or-later | ||
local kres = require('kres') | ||
local condition = require('cqueues.condition') | ||
|
||
modules = { 'filter' } | ||
|
||
-- helper to wait for query resolution | ||
local function wait_resolve(qname) | ||
local waiting, done, cond = false, false, condition.new() | ||
local rcode, answers = kres.rcode.SERVFAIL, {} | ||
resolve { | ||
name = qname, | ||
finish = function (answer, _) | ||
rcode = answer:rcode() | ||
answers = answer:section(kres.section.ANSWER) | ||
-- Signal as completed | ||
if waiting then | ||
cond:signal() | ||
end | ||
done = true | ||
end, | ||
} | ||
-- Wait if it didn't finish immediately | ||
if not done then | ||
waiting = true | ||
cond:wait() | ||
end | ||
return rcode, answers | ||
end | ||
|
||
local function test_filtered(domains, retcode, ansval) | ||
local rcodestr | ||
if retcode == kres.rcode.NOERROR then | ||
rcodestr = "NOERROR" | ||
else | ||
rcodestr = "NXDOMAIN" | ||
end | ||
|
||
for i = 1, #domains do | ||
local rcode, answers = wait_resolve(domains[i]) | ||
same(rcode, retcode, domains[i] .. ' returns ' .. rcodestr) | ||
same(#answers, ansval, domains[i] .. ' synthesised answer') | ||
end | ||
end | ||
|
||
local function test_central_eu() | ||
local domains = { | ||
'nic.cz', 'xn--hkyrky-ptac70bc.cz', 'xn--mbel-5qa.de', | ||
'xn--mller-kva.de', 'xn--strae-oqa.de', 'xn--lut-noa55d.com' | ||
-- 'nic.cz', 'háčkyčárky.cz', 'möbel.de', | ||
-- 'müller.de', 'straße.de', 'žlutý.com', | ||
} | ||
|
||
test_filtered(domains, kres.rcode.NOERROR, 1) | ||
end | ||
|
||
local function test_forbidden() | ||
local domains = { | ||
'xn--mgberp4a5d4ar.com', 'xn--h1alffa9f.xn--p1ai', 'xn--11bd3b0bc5g3dta.test', | ||
'xn--io0a7i.xn--fiqs8s', 'xn--trke-2oa7j.com', '\x82.com' | ||
-- 'السعودية.com', 'россия.рф', ' योगात्मक.test', | ||
-- '网络.中国', 'türkçe.com', '\\\x82.com' | ||
} | ||
|
||
test_filtered(domains, kres.rcode.NXDOMAIN, 0) | ||
end | ||
|
||
return { | ||
test_central_eu, | ||
test_forbidden, | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
# SPDX-License-Identifier: GPL-3.0-or-later | ||
# C module: filter | ||
|
||
filter_src = files([ | ||
'filter.c', | ||
]) | ||
c_src_lint += filter_src | ||
|
||
config_tests += [ | ||
['filter', files('filter.test.lua')], | ||
] | ||
|
||
filter_mod = shared_module( | ||
'filter', | ||
filter_src, | ||
dependencies: mod_deps + [ | ||
libidn2, | ||
libpcre2 | ||
], | ||
include_directories: mod_inc_dir, | ||
name_prefix: '', | ||
install: true, | ||
install_dir: modules_dir, | ||
link_with: kresd, | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters