forked from sass/libsass
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlexer.cpp
133 lines (110 loc) · 4.49 KB
/
lexer.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
#include <cctype>
#include <cstddef>
#include <iostream>
#include <iomanip>
#include "lexer.hpp"
#include "constants.hpp"
namespace Sass {
using namespace Constants;
namespace Prelexer {
//####################################
// BASIC CHARACTER MATCHERS
//####################################
// Match standard control chars
const char* kwd_at(const char* src) { return exactly<'@'>(src); }
const char* kwd_dot(const char* src) { return exactly<'.'>(src); }
const char* kwd_comma(const char* src) { return exactly<','>(src); };
const char* kwd_colon(const char* src) { return exactly<':'>(src); };
const char* kwd_star(const char* src) { return exactly<'*'>(src); };
const char* kwd_plus(const char* src) { return exactly<'+'>(src); };
const char* kwd_minus(const char* src) { return exactly<'-'>(src); };
const char* kwd_slash(const char* src) { return exactly<'/'>(src); };
//####################################
// implement some function that do exist in the standard
// but those are locale aware which brought some trouble
// this even seems to improve performance by quite a bit
//####################################
const bool is_alpha(const char& chr)
{
return unsigned(chr - 'A') <= 'Z' - 'A' ||
unsigned(chr - 'a') <= 'z' - 'a';
}
const bool is_space(const char& chr)
{
// adapted the technique from is_alpha
return chr == ' ' || unsigned(chr - '\t') <= '\r' - '\t';
}
const bool is_digit(const char& chr)
{
// adapted the technique from is_alpha
return unsigned(chr - '0') <= '9' - '0';
}
const bool is_xdigit(const char& chr)
{
// adapted the technique from is_alpha
return unsigned(chr - '0') <= '9' - '0' ||
unsigned(chr - 'a') <= 'f' - 'a' ||
unsigned(chr - 'A') <= 'F' - 'A';
}
const bool is_punct(const char& chr)
{
// locale independent
return chr == '.';
}
const bool is_alnum(const char& chr)
{
return is_alpha(chr) || is_digit(chr);
}
// check if char is outside ascii range
const bool is_unicode(const char& chr)
{
// check for unicode range
return unsigned(chr) > 127;
}
// Match word character (look ahead)
const bool is_character(const char& chr)
{
// valid alpha, numeric or unicode char (plus hyphen)
return is_alnum(chr) || is_unicode(chr) || chr == '-';
}
//####################################
// BASIC CLASS MATCHERS
//####################################
// create matchers that advance the position
const char* space(const char* src) { return is_space(*src) ? src + 1 : 0; }
const char* alpha(const char* src) { return is_alpha(*src) ? src + 1 : 0; }
const char* unicode(const char* src) { return is_unicode(*src) ? src + 1 : 0; }
const char* digit(const char* src) { return is_digit(*src) ? src + 1 : 0; }
const char* xdigit(const char* src) { return is_xdigit(*src) ? src + 1 : 0; }
const char* alnum(const char* src) { return is_alnum(*src) ? src + 1 : 0; }
const char* punct(const char* src) { return is_punct(*src) ? src + 1 : 0; }
const char* character(const char* src) { return is_character(*src) ? src + 1 : 0; }
// Match multiple ctype characters.
const char* spaces(const char* src) { return one_plus<space>(src); }
const char* digits(const char* src) { return one_plus<digit>(src); }
// Whitespace handling.
const char* no_spaces(const char* src) { return negate< space >(src); }
const char* optional_spaces(const char* src) { return zero_plus< space >(src); }
// Match any single character.
const char* any_char(const char* src) { return *src ? src + 1 : src; }
// Match word boundary (zero-width lookahead).
const char* word_boundary(const char* src) { return is_character(*src) ? 0 : src; }
// Match linefeed /(?:\n|\r\n?)/
const char* re_linebreak(const char* src)
{
// end of file or unix linefeed return here
if (*src == 0 || *src == '\n') return src + 1;
// a carriage return may optionally be followed by a linefeed
if (*src == '\r') return *(src + 1) == '\n' ? src + 2 : src + 1;
// no linefeed
return 0;
}
// Assert string boundaries (/\Z|\z|\A/)
// This is a zero-width positive lookahead
const char* end_of_line(const char* src)
{
// end of file or unix linefeed return here
return *src == 0 || *src == '\n' || *src == '\r' ? src : 0;
}
}
}