-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtokeniser.l
73 lines (63 loc) · 1.75 KB
/
tokeniser.l
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
%{
// This is our Lexical tokeniser
// It should be compiled into cpp with :
// flex++ -d -otokeniser.cpp tokeniser.l
// And then compiled into object with
// g++ -c tokeniser.cpp
// tokens can be read using lexer->yylex()
// lexer->yylex() returns the type of the lexicon entry (see enum TOKEN in tokeniser.h)
// and lexer->YYText() returns the lexicon entry as a string
#include "tokeniser.h"
#include <iostream>
using namespace std;
%}
%option noyywrap
%option c++
%option yylineno
charconst \'\\?.\'
ws [ \t\n\r]+
alpha [A-Za-z]
digit [0-9]
number {digit}+(\.{digit}+)?
boolVal ("TRUE"|"FALSE")
keyword ("IF"|"THEN"|"ELSE"|"WHILE"|"FOR"|"DO"|"TO"|"BEGIN"|"END"|"VAR"|"INTEGER"|"BOOLEAN"|"DISPLAY"|"DOWNTO"|"DOUBLE"|"CHAR"|"TRUE"|"FALSE"|"NONE"|"CASE"|"OF"|"DEFAULT"|"STRING")
id {alpha}({alpha}|{digit})*
addop (\+|\-|\|\|)
mulop (\*|\/|%|\&\&)
relop (\<|\>|"=="|\<=|\>=|!=)
stringconst \"({charconst}|[^\"])*\"
unknown [^\"A-Za-z0-9 \n\r\t\(\)\<\>\=\!\%\&\|\}\-\;\.]+
%%
{stringconst} return STRINGCONST;
{addop} return ADDOP;
{mulop} return MULOP;
{relop} return RELOP;
{number} return NUMBER;
{keyword} return KEYWORD;
{id} return ID;
{charconst} return CHARCONST;
"[" return RBRACKET;
"]" return LBRACKET;
"," return COMMA;
";" return SEMICOLON;
"." return DOT;
":" return COLON;
":=" return ASSIGN;
"(" return RPARENT;
")" return LPARENT;
"!" return NOT;
<<EOF>> return FEOF;
{ws} {/* skip blanks and tabs */};
"(*" { /* Skip comments between '(*' and '*)' */
int c;
while((c = yyinput()) != 0){
if(c == '*'){
if((c = yyinput()) == ')')
break;
else
unput(c);
}
}
};
{unknown} return UNKNOWN;
%%