Skip to content

Commit

Permalink
feat: use glob-to-regex to simplify and enhance rule_based_entry_filter
Browse files Browse the repository at this point in the history
  • Loading branch information
mhx committed Nov 17, 2024
1 parent 53cd9d5 commit 2129797
Show file tree
Hide file tree
Showing 4 changed files with 121 additions and 75 deletions.
7 changes: 4 additions & 3 deletions doc/mkdwarfs.md
Original file line number Diff line number Diff line change
Expand Up @@ -693,10 +693,11 @@ Patterns support `?` and `*` wildcards matching a single character
and any number of characters, respectively. These patterns don't match
across directory separators (`/`).

Patterns also support the `**` wildcard, which matches across directory
separators.
Patterns also support the `**` globstar wildcard, which matches across
directory separators.

Patterns also support character classes.
Patterns also support character classes (`[avt]`), ranges (`[a-h]`),
and complementation (`[!a-h]`).

Here's an exemplary rule set:

Expand Down
122 changes: 56 additions & 66 deletions src/writer/rule_based_entry_filter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@
#include <dwarfs/writer/entry_interface.h>
#include <dwarfs/writer/rule_based_entry_filter.h>

#include <dwarfs/internal/glob_to_regex.h>

namespace dwarfs::writer {

namespace internal {
Expand All @@ -51,12 +53,22 @@ struct filter_rule {
};

filter_rule(rule_type type, bool floating, std::string const& re,
std::string const& rule)
std::string_view rule, bool ignore_case = false)
: type{type}
, floating{floating}
, re{re}
, re{re, regex_flags(ignore_case)}
, rule{rule} {}

static constexpr std::regex_constants::syntax_option_type
regex_flags(bool ignore_case) {
auto flags =
std::regex_constants::ECMAScript | std::regex_constants::optimize;
if (ignore_case) {
flags |= std::regex_constants::icase;
}
return flags;
}

rule_type type;
bool floating;
std::regex re;
Expand Down Expand Up @@ -89,14 +101,34 @@ class rule_based_entry_filter_ : public rule_based_entry_filter::impl {

template <typename LoggerPolicy>
auto rule_based_entry_filter_<LoggerPolicy>::compile_filter_rule(
std::string_view rule_sv) -> filter_rule {
std::string rule{rule_sv};
std::string_view rule) -> filter_rule {
std::string re;
filter_rule::rule_type type;

auto* p = rule.c_str();
if (rule.empty()) {
throw std::runtime_error("empty filter rule");
}

auto splitpos = rule.find_first_of(' ');

if (splitpos == std::string::npos) {
throw std::runtime_error("invalid filter rule: " + std::string(rule));
}

switch (*p) {
auto pattern_start = rule.find_first_not_of(' ', splitpos);

if (pattern_start == std::string::npos) {
throw std::runtime_error("no pattern in filter rule: " + std::string(rule));
}

auto prefix = rule.substr(0, splitpos);
auto pattern = rule.substr(pattern_start);

if (prefix.empty()) {
throw std::runtime_error("no prefix in filter rule: " + std::string(rule));
}

switch (prefix[0]) {
case '+':
type = filter_rule::rule_type::include;
break;
Expand All @@ -107,76 +139,34 @@ auto rule_based_entry_filter_<LoggerPolicy>::compile_filter_rule(
throw std::runtime_error("rules must start with + or -");
}

while (*++p == ' ')
;

// If the start of the pattern is not explicitly anchored, make it floating.
bool floating = *p && *p != '/';

if (floating) {
re += ".*/";
}

while (*p) {
switch (*p) {
case '\\':
re += *p++;
if (p) {
re += *p++;
}
continue;

case '*': {
int nstar = 1;
while (*++p == '*') {
++nstar;
}
switch (nstar) {
case 1:
if (re.ends_with('/') and (*p == '/' or *p == '\0')) {
re += "[^/]+";
} else {
re += "[^/]*";
}
break;
case 2:
re += ".*";
break;
default:
throw std::runtime_error("too many *s");
}
}
continue;
bool ignore_case{false};

case '?':
re += "[^/]";
break;

case '.':
case '+':
case '^':
case '$':
case '(':
case ')':
case '{':
case '}':
case '|':
re += '\\';
re += *p;
prefix.remove_prefix(1);
for (auto opt : prefix) {
switch (opt) {
case 'i':
ignore_case = true;
break;

default:
re += *p;
break;
throw std::runtime_error(
fmt::format("unknown option '{}' in filter rule: {}", opt, rule));
}
}

++p;
// If the start of the pattern is not explicitly anchored, make it floating.
bool floating = pattern[0] != '/';

if (floating) {
re += ".*/";
}

re += dwarfs::internal::glob_to_regex_string(pattern) + "$";

LOG_DEBUG << "'" << rule << "' -> '" << re << "' [floating=" << floating
<< "]";
<< ", ignore_case=" << ignore_case << "]";

return filter_rule(type, floating, re, rule);
return filter_rule(type, floating, re, rule, ignore_case);
}

template <typename LoggerPolicy>
Expand Down
24 changes: 24 additions & 0 deletions test/filter_test_data.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,30 @@ R"(
"usr/lib/Mcrt1.o",
"usr/lib64",
"usr/lib64/gcrt1.o",
}},
{
"NoIgnoreCase",
R"(
+ mc*
+ *led*
- *
)", {
"",
}},
{
"IgnoreCase",
R"(
+i mc*
+i *led*
- *
)", {
"",
"usr",
"usr/lib",
"usr/lib/Mcrt1.o",
"usr/lib64",
"usr/lib64/xtables",
"usr/lib64/xtables/libxt_LED.so",
}},
// clang-format on
};
Expand Down
43 changes: 37 additions & 6 deletions test/tool_main_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1370,6 +1370,43 @@ TEST(mkdwarfs_test, cannot_combine_input_list_and_filter) {
::testing::HasSubstr("cannot combine --input-list and --filter"));
}

TEST(mkdwarfs_test, rules_must_start_with_plus_or_minus) {
auto t = mkdwarfs_tester::create_empty();
EXPECT_NE(0, t.run({"-i", "/", "-o", "-", "-F", "% *"}));
EXPECT_THAT(t.err(), ::testing::HasSubstr("rules must start with + or -"));
}

TEST(mkdwarfs_test, empty_filter_rule) {
auto t = mkdwarfs_tester::create_empty();
EXPECT_NE(0, t.run({"-i", "/", "-o", "-", "-F", ""}));
EXPECT_THAT(t.err(), ::testing::HasSubstr("empty filter rule"));
}

TEST(mkdwarfs_test, invalid_filter_rule) {
auto t = mkdwarfs_tester::create_empty();
EXPECT_NE(0, t.run({"-i", "/", "-o", "-", "-F", "+i"}));
EXPECT_THAT(t.err(), ::testing::HasSubstr("invalid filter rule"));
}

TEST(mkdwarfs_test, no_pattern_in_filter_rule) {
auto t = mkdwarfs_tester::create_empty();
EXPECT_NE(0, t.run({"-i", "/", "-o", "-", "-F", "+ "}));
EXPECT_THAT(t.err(), ::testing::HasSubstr("no pattern in filter rule"));
}

TEST(mkdwarfs_test, no_prefix_in_filter_rule) {
auto t = mkdwarfs_tester::create_empty();
EXPECT_NE(0, t.run({"-i", "/", "-o", "-", "-F", " foo"}));
EXPECT_THAT(t.err(), ::testing::HasSubstr("no prefix in filter rule"));
}

TEST(mkdwarfs_test, unknown_option_in_filter_rule) {
auto t = mkdwarfs_tester::create_empty();
EXPECT_NE(0, t.run({"-i", "/", "-o", "-", "-F", "+x foo"}));
EXPECT_THAT(t.err(),
::testing::HasSubstr("unknown option 'x' in filter rule"));
}

TEST(mkdwarfs_test, cannot_open_input_list_file) {
mkdwarfs_tester t;
EXPECT_NE(0, t.run({"--input-list", "missing.list", "-o", "-"}));
Expand Down Expand Up @@ -1639,12 +1676,6 @@ TEST(mkdwarfs_test, invalid_progress_mode) {
EXPECT_THAT(t.err(), ::testing::HasSubstr("invalid progress mode"));
}

TEST(mkdwarfs_test, invalid_filter_rule) {
mkdwarfs_tester t;
EXPECT_NE(0, t.run({"-i", "/", "-o", "-", "-F", "grmpf"}));
EXPECT_THAT(t.err(), ::testing::HasSubstr("could not parse filter rule"));
}

TEST(mkdwarfs_test, time_resolution_zero) {
mkdwarfs_tester t;
EXPECT_NE(0, t.run({"-i", "/", "-o", "-", "--time-resolution=0"}));
Expand Down

0 comments on commit 2129797

Please sign in to comment.