-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlexer.py
120 lines (105 loc) · 4.15 KB
/
lexer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
from tokens import *
from utils import lexing_error
class Lexer:
def __init__(self, source):
self.source = source
self.tokens = []
self.start = 0
self.curr = 0
self.line = 1
def advance(self):
ch = self.source[self.curr]
self.curr = self.curr + 1
return ch
def peek(self):
if self.curr >= len(self.source):
return '\O'
return self.source[self.curr]
def lookahead(self, n=1):
if self.curr >= len(self.source):
return '\O'
return self.source[self.curr + n]
def handle_number(self):
while self.peek().isdigit():
self.advance()
if self.peek() == '.' and self.lookahead().isdigit():
self.advance()
while self.peek().isdigit():
self.advance()
self.add_token(TOK_FLOAT)
else:
self.add_token(TOK_INTEGER)
def handle_string(self, start_quote):
while self.peek() != start_quote and not (self.curr >= len(self.source)):
self.advance()
if self.curr >= len(self.source):
raise SyntaxError(f'[Line {self.line}] Unterminated string.')
self.advance()
self.add_token(TOK_STRING)
def handle_identifier(self):
while self.peek().isalnum() or self.peek() == '_':
self.advance()
# check if identifier matches a key in the keywords dic
text = self.source[self.start:self.curr]
key_type = keywords.get(text)
if key_type == None:
self.add_token(TOK_IDENTIFIER)
else:
self.add_token(key_type)
def match(self, expected):
if self.curr >= len(self.source):
return False
if self.source[self.curr] != expected:
return False
self.curr = self.curr + 1
return True
def add_token(self, token_type):
self.tokens.append(Token(token_type, self.source[self.start:self.curr], self.line))
def lexing_error(self,char):
pass
def tokenize(self):
while self.curr < len(self.source):
self.start = self.curr
ch = self.advance()
if ch == '\n': self.line = self.line + 1
elif ch == ' ': pass
elif ch == '\t': pass
elif ch == '\r': pass
elif ch == '(': self.add_token(TOK_LPAREN)
elif ch == ')': self.add_token(TOK_RPAREN)
elif ch == '{': self.add_token(TOK_LCURLY)
elif ch == '}': self.add_token(TOK_RCURLY)
elif ch == '[': self.add_token(TOK_LSQUAR)
elif ch == ']': self.add_token(TOK_RSQUAR)
elif ch == '.': self.add_token(TOK_DOT)
elif ch == ',': self.add_token(TOK_COMMA)
elif ch == '+': self.add_token(TOK_PLUS)
elif ch == '-': self.add_token(TOK_MINUS)
elif ch == '*': self.add_token(TOK_STAR)
elif ch == '^': self.add_token(TOK_CARET)
elif ch == '/': self.add_token(TOK_SLASH)
elif ch == ';': self.add_token(TOK_SEMICOLON)
elif ch == '?': self.add_token(TOK_QUESTION)
elif ch == '%': self.add_token(TOK_MOD)
elif ch == '#':
while self.peek() != '\n' and not(self.curr >= len(self.source)):
self.advance()
elif ch == '=':
self.add_token(TOK_EQEQ if self.match('=') else TOK_EQ)
elif ch == '~':
self.add_token(TOK_NE if self.match('=') else TOK_NOT)
elif ch == '<':
self.add_token(TOK_LE if self.match('=') else TOK_LT)
elif ch == '>':
self.add_token(TOK_GE if self.match('=') else TOK_GT)
elif ch == ':':
self.add_token(TOK_ASSIGN if self.match('=') else TOK_COLON)
elif ch == '"' or ch == '\'':
self.handle_string(ch)
elif ch.isdigit():
self.handle_number()
elif ch.isalpha() or ch == '_':
self.handle_identifier()
else:
lexing_error(f'Error at {ch!r}: Unexpected character.', self.line)
return self.tokens