-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathread.js
347 lines (292 loc) · 9.88 KB
/
read.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
// MINIMAL LISP READER
// (adapted from Parinfer)
//
// Parses enough to find indentation points, parens, and token positions.
//
// Designed for Clojure syntax:
// - Parens: `(round), {curly}, [square]`
// - Comments: `; comment rest of line`
// - Strings: `"string"` (multi-line)
// - Characters: `\char`
//------------------------------------------------------------------------------
// Constants / Predicates
//------------------------------------------------------------------------------
const BACKSLASH = "\\";
const SPACE = " ";
const DOUBLE_QUOTE = '"';
const SEMICOLON = ";";
const TAB = "\t";
const LINE_ENDING_REGEX = /\r?\n/;
const MATCH_PAREN = {
"{": "}",
"}": "{",
"[": "]",
"]": "[",
"(": ")",
")": "("
};
//------------------------------------------------------------------------------
// State Structure
//------------------------------------------------------------------------------
// This represents the running state. As we scan through each character
// of a given text, we mutate this structure to update the state of our
// system.
function getInitialState(text, hooks = {}) {
const lines = text.split(LINE_ENDING_REGEX);
const state = {
hooks, // [object] - callbacks for each step of the reader
lines, // [string array] - input lines that we process line-by-line, char-by-char
lineNo: -1, // [integer] - the current input line number
x: -1, // [integer] - the current input x position of the current character (ch)
ch: "", // [string] - the current input character
parenStack: [], // [array of {ch,x,lineNo,children}] - current parentheses that we are nested inside of
children: [], // [array of {ch,x,lineNo,children}] - current forms that have been parsed
isEscaping: false, // [boolean] - indicates if the next character will be escaped (e.g. `\c`). This may be inside string, comment, or code.
isInStr: false, // [boolean] - indicates if we are currently inside a string
isInComment: false, // [boolean] - indicates if we are currently inside a comment
trackingTokenStart: true,
trackingIndent: false, // [boolean] - are we looking for the indentation point of the current line?
success: false, // [boolean] - was the input properly formatted enough to create a valid state?
error: null, // [object] - {name, message, lineNo, x}
errorPosCache: {} // [object] - maps error name to a potential error position
};
return state;
}
//------------------------------------------------------------------------------
// Possible Errors
//------------------------------------------------------------------------------
// `state.error.name` is set to any of these
const ERROR_UNCLOSED_QUOTE = "unclosed-quote";
const ERROR_UNCLOSED_PAREN = "unclosed-paren";
const ERROR_UNMATCHED_CLOSE_PAREN = "unmatched-close-paren";
const ERROR_UNMATCHED_OPEN_PAREN = "unmatched-open-paren";
const ERROR_UNHANDLED = "unhandled";
const errorMessages = {
[ERROR_UNCLOSED_QUOTE]: "String is missing a closing quote.",
[ERROR_UNCLOSED_PAREN]: "Unclosed open-paren.",
[ERROR_UNMATCHED_CLOSE_PAREN]: "Unmatched close-paren.",
[ERROR_UNMATCHED_OPEN_PAREN]: "Unmatched open-paren.",
[ERROR_UNHANDLED]: "Unhandled error."
};
function cacheErrorPos(state, errorName) {
const { lineNo, x } = state;
const e = { lineNo, x };
state.errorPosCache[errorName] = e;
return e;
}
function error(state, name) {
const cache = state.errorPosCache[name];
const e = {
readerError: true,
name: name,
message: errorMessages[name],
lineNo: cache ? cache.lineNo : state.lineNo,
x: cache ? cache.x : state.x
};
const opener = peek(state.parenStack, 0);
if (name === ERROR_UNMATCHED_CLOSE_PAREN) {
// extra error info for locating the open-paren that it should've matched
const cache = state.errorPosCache[ERROR_UNMATCHED_OPEN_PAREN];
if (cache || opener) {
e.extra = {
name: ERROR_UNMATCHED_OPEN_PAREN,
lineNo: cache ? cache.lineNo : opener.lineNo,
x: cache ? cache.x : opener.x
};
}
} else if (name === ERROR_UNCLOSED_PAREN) {
e.lineNo = opener.lineNo;
e.x = opener.x;
}
return e;
}
//------------------------------------------------------------------------------
// Line operations
//------------------------------------------------------------------------------
function initLine(state) {
delete state.errorPosCache[ERROR_UNMATCHED_CLOSE_PAREN];
delete state.errorPosCache[ERROR_UNMATCHED_OPEN_PAREN];
state.isInComment = false;
state.isEscaping = false;
state.trackingIndent = !state.isInStr;
state.trackingTokenStart = !state.isInStr;
}
//------------------------------------------------------------------------------
// Misc Utils
//------------------------------------------------------------------------------
export function peek(arr, idxFromBack) {
const maxIdx = arr.length - 1;
if (idxFromBack > maxIdx) {
return null;
}
return arr[maxIdx - idxFromBack];
}
//------------------------------------------------------------------------------
// Questions about characters
//------------------------------------------------------------------------------
export function isOpenParen(ch) {
return ch === "{" || ch === "(" || ch === "[";
}
function isCloseParen(ch) {
return ch === "}" || ch === ")" || ch === "]";
}
function isValidCloseParen(parenStack, ch) {
if (parenStack.length === 0) {
return false;
}
return peek(parenStack, 0).ch === MATCH_PAREN[ch];
}
//------------------------------------------------------------------------------
// Literal character events
//------------------------------------------------------------------------------
function onTokenStart(state, opener) {
state.trackingTokenStart = false;
const { lineNo, x, ch } = state;
const parent = peek(state.parenStack, 0);
(parent || state).children.push(opener || { lineNo, x, ch });
}
function onOpenParen(state) {
if (isInCode(state)) {
const opener = {
lineNo: state.lineNo,
x: state.x,
ch: state.ch,
children: []
};
onTokenStart(state, opener);
state.trackingTokenStart = true;
state.parenStack.push(opener);
}
}
function onMatchedCloseParen(state) {
const opener = state.parenStack.pop();
const { lineNo, x, ch } = state;
opener.closer = { lineNo, x, ch };
state.trackingTokenStart = true;
const f = state.hooks.onTopLevelForm
if (f && state.parenStack.length == 0) {
f(state, opener.lineNo, opener.closer.lineNo+1);
}
}
function onUnmatchedCloseParen(state) {
throw error(state, ERROR_UNMATCHED_CLOSE_PAREN);
}
function onCloseParen(state) {
if (isInCode(state)) {
if (isValidCloseParen(state.parenStack, state.ch)) {
onMatchedCloseParen(state);
} else {
onUnmatchedCloseParen(state);
}
}
}
function onTab(state) {
if (isInCode(state)) {
const { lineNo, x } = state;
// console.warn("\nTAB character found at", { lineNo, x });
}
}
function onSemicolon(state) {
if (isInCode(state)) {
onTokenStart(state);
state.isInComment = true;
}
}
function onQuote(state) {
if (state.isInStr) {
state.isInStr = false;
state.trackingTokenStart = true;
} else {
onTokenStart(state);
state.isInStr = true;
cacheErrorPos(state, ERROR_UNCLOSED_QUOTE);
}
}
function onBackslash(state) {
state.isEscaping = true;
if (!state.isInStr) onTokenStart(state);
}
function afterBackslash(state) {
state.isEscaping = false;
}
function onSpace(state) {
if (isInCode(state)) {
state.trackingTokenStart = true;
}
}
function isInCode(state) {
// indicates if we are currently in "code space" (not string or comment)
return !state.isInComment && !state.isInStr;
}
//------------------------------------------------------------------------------
// Character dispatch
//------------------------------------------------------------------------------
function onChar(state) {
const ch = state.ch;
if (state.isEscaping) afterBackslash(state);
else if (isOpenParen(ch)) onOpenParen(state);
else if (isCloseParen(ch)) onCloseParen(state);
else if (ch === DOUBLE_QUOTE) onQuote(state);
else if (ch === SEMICOLON) onSemicolon(state);
else if (ch === BACKSLASH) onBackslash(state);
else if (ch === TAB) onTab(state);
else if (ch === SPACE) onSpace(state);
else if (state.trackingTokenStart) onTokenStart(state);
}
//------------------------------------------------------------------------------
// Indentation functions
//------------------------------------------------------------------------------
function onIndent(state) {
state.trackingIndent = false;
}
function checkIndent(state) {
if (state.trackingIndent && state.ch !== SPACE && state.ch !== TAB) {
onIndent(state);
}
}
//------------------------------------------------------------------------------
// High-level processing functions
//------------------------------------------------------------------------------
function readChar(state, ch) {
state.ch = ch;
checkIndent(state);
onChar(state);
}
function readLine(state, lineNo) {
initLine(state);
for (let x = 0; x < state.lines[lineNo].length; x++) {
state.x = x;
readChar(state, state.lines[lineNo][x]);
}
}
function finalizeState(state) {
if (state.isInStr) {
throw error(state, ERROR_UNCLOSED_QUOTE);
}
if (state.parenStack.length !== 0) {
throw error(state, ERROR_UNCLOSED_PAREN);
}
state.success = true;
}
function processError(state, e) {
state.success = false;
if (e.readerError) {
delete e.readerError;
state.error = e;
} else {
state.error = { name: ERROR_UNHANDLED, message: e.stack };
}
}
export function readText(text, hooks) {
const state = getInitialState(text, hooks);
try {
for (let i = 0; i < state.lines.length; i++) {
state.lineNo = i;
readLine(state, i);
}
finalizeState(state);
} catch (e) {
processError(state, e);
}
return state;
}