-
Notifications
You must be signed in to change notification settings - Fork 34
/
regexp_to_strings.pl
executable file
·120 lines (97 loc) · 2.69 KB
/
regexp_to_strings.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
#!/usr/bin/perl
# Author: Daniel "Trizen" Șuteu
# License: GPLv3
# Date: 21 December 2014
# Website: https://github.com/trizen
# Find the minimum sentence(s) that satisfies a regular expression
# See also: https://www.perlmonks.org/?node_id=284513
# WARNING: this script is just an idea in development
# usage: perl regex_to_strings.pl [regexp]
use utf8;
use 5.010;
use strict;
use warnings;
use Regexp::Parser;
use Data::Dump qw(pp);
binmode(STDOUT, ':utf8');
{
no warnings 'redefine';
*Regexp::Parser::anyof_class::new = sub {
my ($class, $rx, $type, $neg, $how) = @_;
my $self = bless {
rx => $rx,
flags => $rx->{flags}[-1],
family => 'anyof_class',
}, $class;
if (ref $type) {
$self->{data} = $type;
}
else {
$self->{type} = $type;
$self->{data} = 'POSIX';
$self->{neg} = $neg;
$self->{how} = ${$how}; # bug-fix
}
return $self;
};
}
my $regex = shift() // 'ab(c[12]|d(n|p)o)\w{3}[.?!]{4}';
my $parser = Regexp::Parser->new($regex);
my %conv = (
alnum => 'a',
nalnum => '#',
digit => '1',
ndigit => '+',
space => ' ',
nspace => '.',
);
my @stack;
my @strings = [];
my $ref = \@strings;
my $iter = $parser->walker;
my $min = 1;
my $last_depth = 0;
while (my ($node, $depth) = $iter->()) {
my $family = $node->family;
my $type = $node->type;
if ($depth < $last_depth) {
$min = 1;
say "MIN CHANGED";
}
if ($family eq 'quant') {
$min = $node->min;
say "MIN == $min";
}
pp $family, $type, $node->qr; # for debug
if ($type =~ /^(?:close\d|tail)/) {
$ref = pop @stack;
}
elsif (exists $conv{$type}) {
push @{$ref->[-1]}, $conv{$type} x $min;
}
elsif ($family eq 'open' or $type eq 'group' or $type eq 'suspend') {
push @stack, $ref;
push @{$ref->[-1]}, [];
$ref = $ref->[-1][-1];
push @{$ref}, [];
}
elsif ($type eq 'branch') {
$#{$ref->[-1]} == -1 or push @{$ref}, [];
}
elsif ($type eq 'exact' or $type eq 'exactf') {
push @{$ref->[-1]}, $node->data x $min;
}
elsif ($type eq 'anyof' and $min > 0) {
my $regex = $node->qr;
foreach my $c (0 .. 1000000) {
if (chr($c) =~ /$regex/) {
push @{$ref->[-1]}, chr($c) x $min;
last;
}
}
}
$last_depth = $depth;
}
use Data::Dump qw(pp);
pp @strings;
# TODO: join the @strings into real $strings