-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
26 changed files
with
381 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,21 @@ | ||
<architecture system="mawp" author="meow"> | ||
|
||
<boot> | ||
<importmap> | ||
|
||
</importmap> | ||
</boot> | ||
|
||
<display> | ||
<screen name="1"> | ||
<application components="simply-actors"> | ||
<stage> | ||
<event-menu/> | ||
<zoom-menu/> | ||
<toast title="Welcome to mawp" message="Right click in the center of the screen" ttl="15"/> | ||
</stage> | ||
</application> | ||
</screen> | ||
</display> | ||
|
||
</architecture> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
Object.defineProperty(globalThis, 'XXX', { | ||
value: { | ||
prefix: 'xxx', | ||
}, // Value of the constant | ||
writable: false, // Prevent modification of the constant | ||
enumerable: false, // Make it enumerable (optional, for iteration) | ||
configurable: false // Prevent deletion or reconfiguration of the constant | ||
}); | ||
|
||
import XmlParser from './modules/xml-parser/XmlParser.js'; | ||
import AttributeTokenizer from './modules/xml-parser/AttributeTokenizer.js'; | ||
|
||
|
||
|
||
|
||
// Example usage | ||
const tokenizer = new AttributeTokenizer(); | ||
const input = `if (x > 5) then echo "Value is greater than 5" | grep 'found'; | ||
send bork | ggg -presto --verbose 3000; | ||
`; | ||
const tokens = tokenizer.tokenize(input); | ||
|
||
console.log(tokens); | ||
|
||
// load architecture | ||
// import XmlParser from 'xml-parser'; | ||
const parser = new XmlParser(); | ||
const architecture = await (await fetch('./architecture.xml')).text(); | ||
const parsedObject = parser.parse(architecture); | ||
console.log( parsedObject ); |
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
export default class AttributeTokenizer { | ||
#rules = []; | ||
|
||
constructor() { | ||
this | ||
.addRule({ type: 'KEYWORD'}, /^(?<value>if|then|else|for|while|do|done|case|esac|in|return)/, {}) | ||
.addRule({ type: 'IDENTIFIER'}, /^(?<value>[a-z]+)/, {}) | ||
.addRule({ type: 'ARGUMENT'}, /^--(?<value>[a-z]+)/, {}) | ||
.addRule({ type: 'FLAG'}, /^-(?<value>[a-z]+)/, {}) | ||
.addRule({ type: 'NUMBER'}, /^(?<value>\d+(\.\d+)?)/, {tr:v=>Number(v)}) | ||
.addRule({ type: 'STRING'}, /^(?<value>'((?:[^'\\]|\\.)*?)'|"((?:[^"\\]|\\.)*?)")/, {}) | ||
.addRule({ type: 'OPERATOR'}, /^(?<value>[+\-*/%<>=!&|^])/, {}) | ||
.addRule({ type: 'OPEN_BRACKET', enter:true}, /^(?<value>[<(\[{])/, {}) | ||
.addRule({ type: 'CLOSE_BRACKET', exit:true}, /^(?<value>[>)\]}])/, {}) | ||
.addRule({ type: 'PIPE'}, /^(?<value>\|)/, {}) | ||
.addRule({ type: 'CONTROL'}, /^(?<value>;)/, {}) | ||
.addRule({ type: 'SPACE'}, /^(?<value>\s+)/, {ignore:true}) | ||
} | ||
|
||
addRule(base, expression, options = {}) { | ||
this.#rules.push({ base, expression, options}); | ||
return this; | ||
} | ||
|
||
getPosition(str, index) { | ||
if (index === 0) return [1, 1]; | ||
const processed = str.slice(0, index + 1); | ||
const split = processed.split('\n'); | ||
const lines = split.length; | ||
const characters = split[lines - 1].length; | ||
return [lines, characters]; | ||
} | ||
|
||
tokenize(xmlStr) { | ||
const tokens = []; | ||
let currentIndex = 0; | ||
while (currentIndex < xmlStr.length) { | ||
let matched = false; | ||
for (const { base, expression, options } of this.#rules) { | ||
const string = xmlStr.slice(currentIndex); | ||
const match = expression.exec(string); | ||
if (match) { | ||
|
||
if(match[0].length == 0) throw new Error(`Zero length match (inifinite loop) in ${base.type} at index ${currentIndex} (${this.getPosition(xmlStr, currentIndex).join(':')}) - check your rules!`); | ||
|
||
console.info(`parsing: ${string.substr(0,128).replace(/\n/g,'\\n')}..`) | ||
console.info(`match: ${match[0]}`,match) | ||
matched = true; | ||
const str = match[0]; | ||
const len = str.length; | ||
const pos = this.getPosition(xmlStr, currentIndex).join(':'); | ||
|
||
if(!options.ignore){ | ||
const groups = Object.fromEntries( Object.entries(match.groups).map(([k, v]) => [k, v.trim()]) ); | ||
const token = Object.assign({ ...base, pos }, groups); | ||
if (options.value) token.value = token[options.value]; | ||
if (token.value && options.tr) token.value = options.tr(token.value); | ||
tokens.push(token); | ||
} | ||
currentIndex += len; // Move the index forward | ||
break; // Break out of the loop to restart with the new index | ||
} // if match | ||
} // evaluate rules | ||
if (!matched) { | ||
// If no rule matched, we have an invalid XML string section | ||
throw new Error(`Unexpected token "${xmlStr.slice(currentIndex).substr(0,8).replace(/\n/g,'\\n')}..." at index ${currentIndex} (${this.getPosition(xmlStr, currentIndex).join(':')})`); | ||
} | ||
} | ||
return tokens; | ||
} | ||
|
||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
# The Big Attribute Parser | ||
|
||
```JavaScript | ||
|
||
#parseAttributes2(attributesString) { | ||
const attributes = {}; | ||
const length = attributesString.length; | ||
let i = 0; | ||
|
||
while (i < length) { | ||
// Skip whitespace | ||
while (i < length && /\s/.test(attributesString[i])) i++; | ||
|
||
// Extract the attribute name | ||
const nameStart = i; | ||
while (i < length && /\w/.test(attributesString[i])) i++; | ||
const name = attributesString.slice(nameStart, i).trim(); | ||
|
||
// Skip whitespace and expect an equals sign | ||
while (i < length && /\s/.test(attributesString[i])) i++; | ||
if (attributesString[i] !== '=') { | ||
throw new Error(`Expected '=' after attribute name: ${name}`); | ||
} | ||
i++; // Move past '=' | ||
|
||
// Skip whitespace | ||
while (i < length && /\s/.test(attributesString[i])) i++; | ||
|
||
// Initialize empty value | ||
let value = ''; | ||
const quote = attributesString[i]; // This will be either '"' or "'" | ||
if (quote === '"' || quote === "'") { | ||
i++; // Move past the opening quote | ||
let isEscaped = false; | ||
|
||
// Process the value until the closing quote | ||
while (i < length) { | ||
const currentChar = attributesString[i]; | ||
if (currentChar === '\\' && !isEscaped) { | ||
isEscaped = true; // Mark the escape character | ||
} else if (currentChar === quote && !isEscaped) { | ||
break; // End of attribute value | ||
} else { | ||
value += currentChar; // Append to the value | ||
isEscaped = false; // Reset escape marker | ||
} | ||
i++; // Move to the next character | ||
} | ||
// Move past the closing quote | ||
if (attributesString[i] === quote) i++; | ||
} else { | ||
// If no quotes, consider it an error or handle accordingly if you expect bare values | ||
throw new Error(`Expected quote for attribute value for ${name}`); | ||
} | ||
|
||
// Store the attribute | ||
attributes[name] = value; | ||
} | ||
|
||
return attributes; | ||
} | ||
|
||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
## AttributeTokenizer | ||
|
||
### Key Features of `AttributeTokenizer` | ||
1. **Rule-Based Tokenization**: | ||
- The tokenizer uses a rules-based approach by storing a collection of rules that map token types to regex patterns. This makes it easy to add, modify, or remove token types in the future. | ||
- Each rule includes a regex expression, a basic type, and options for further processing (like transformations and whether to ignore certain matches). | ||
|
||
2. **Named Capture Groups**: | ||
- Using named capture groups within regex patterns allows for easy extraction of token values during matching. | ||
|
||
3. **Position Tracking**: | ||
- The `getPosition` method provides a means to track the character and line position of tokens in the input string, which is useful for debugging when an error occurs. | ||
|
||
4. **Error Handling**: | ||
- The implementation includes error handling that reports unmatched tokens or zero-length matches, preventing infinite loops in the tokenizer. | ||
|
||
5. **Whitespace Handling**: | ||
- The tokenizer can ignore tokens that match the whitespace pattern by utilizing the `ignore` option. | ||
|
||
6. **Token Transformation**: | ||
- The ability to transform token values using a provided function in the options (e.g., converting a string number to a numerical value). |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
export default class XmlLexer { | ||
constructor() {} | ||
|
||
createAst(tokens) { | ||
const ast = { | ||
type: 'Document', | ||
children: [], | ||
}; | ||
|
||
const stack = [ast]; | ||
let currentParent = ast; | ||
|
||
for (const token of tokens) { | ||
if (token.type === 'OPEN_TAG') { | ||
const newElement = { | ||
type: 'Element', | ||
name: token.tagName, | ||
attributes: token.attributes, | ||
children: [], | ||
}; | ||
|
||
currentParent.children.push(newElement); | ||
stack.push(newElement); | ||
currentParent = newElement; | ||
|
||
} else if (token.type === 'CLOSE_TAG') { | ||
if (stack.length === 0 || currentParent.name !== token.tagName) { | ||
throw new Error(`Unexpected closing tag: </${token.tagName}>`); | ||
} | ||
stack.pop(); | ||
currentParent = stack[stack.length - 1]; // Move back to the parent | ||
|
||
} else if (token.type === 'SELFCLOSE_TAG') { | ||
const newElement = { | ||
type: 'Element', | ||
name: token.tagName, | ||
attributes: token.attributes, | ||
children: [], | ||
}; | ||
|
||
currentParent.children.push(newElement); | ||
// Self-closing tags do not change the current parent | ||
|
||
} else if (token.type === 'TEXT') { | ||
if (token.value) { // Ensure there's actual text | ||
currentParent.children.push({ | ||
type: 'Text', | ||
value: token.value, | ||
}); | ||
} | ||
} else if (token.type === 'COMMENT') { | ||
currentParent.children.push({ | ||
type: 'Comment', | ||
value: token.value, | ||
}); | ||
} | ||
} | ||
|
||
if (stack.length !== 1) { | ||
throw new Error(`Unmatched opening tags left in the stack.`); | ||
} | ||
|
||
return ast; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
import XmlLexer from './XmlLexer.js'; | ||
import XmlTokenizer from './XmlTokenizer.js'; | ||
|
||
export default class XmlParser { | ||
#xmlTokenizer; | ||
#xmlLexer; | ||
|
||
constructor() { | ||
this.#xmlTokenizer = new XmlTokenizer(); | ||
this.#xmlLexer = new XmlLexer(); | ||
} | ||
parse(xmlStr) { | ||
const tokens = this.#xmlTokenizer.tokenize(xmlStr); | ||
const ast = this.#xmlLexer.createAst(tokens); | ||
// console.dir(tokens); | ||
// console.dir(ast); | ||
console.log(JSON.stringify(ast, null, 2)); | ||
return ast; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
export default class XmlTokenizer { | ||
#rules = []; | ||
|
||
constructor() { | ||
this | ||
.addRule({ type: 'WHITESPACE' }, /^\s+/, {ignore:true}) | ||
.addRule({ type: 'SELFCLOSE_TAG'}, /^<(?<tagName>[a-zA-Z_-]+)\s*(?<attributes>[^>]*)\/>/) | ||
.addRule({ type: 'OPEN_TAG'}, /^<(?<tagName>[a-zA-Z_-]+)\s*(?<attributes>[^\/>]*)>/) | ||
.addRule({ type: 'COMMENT' }, /^<!--(?<comment>[\s\S]*?)-->/, {value:'comment'}) | ||
.addRule({ type: 'CLOSE_TAG'}, /^<\/(?<tagName>[a-zA-Z_-]+)>/) | ||
.addRule({ type: 'TEXT'}, /^(?<text>[^<]+)/, {value:'text'}) | ||
} | ||
|
||
addRule(base, expression, options = {}) { | ||
this.#rules.push({ base, expression, options}); | ||
return this; | ||
} | ||
|
||
getPosition(str, index) { | ||
if (index === 0) return [1, 1]; | ||
const processed = str.slice(0, index + 1); | ||
const split = processed.split('\n'); | ||
const lines = split.length; | ||
const characters = split[lines - 1].length; | ||
return [lines, characters]; | ||
} | ||
|
||
tokenize(xmlStr) { | ||
const tokens = []; | ||
let currentIndex = 0; | ||
|
||
while (currentIndex < xmlStr.length) { | ||
|
||
let matched = false; | ||
|
||
for (const { base, expression, options } of this.#rules) { | ||
|
||
const string = xmlStr.slice(currentIndex); | ||
const match = expression.exec(string); | ||
|
||
|
||
if (match) { | ||
// console.info(`parsing: ${string.substr(0,128).replace(/\n/g,'\\n')}..`) | ||
// console.info(`match: ${match[0]}`,match) | ||
|
||
matched = true; | ||
|
||
const str = match[0]; | ||
const len = str.length; | ||
const pos = this.getPosition(xmlStr, currentIndex).join(':'); | ||
|
||
if(!options.ignore){ | ||
// const nxt = xmlStr.slice(currentIndex+len); | ||
// cons/t token1 = Object.assign({ DATA:string, MATCH:str,len, pos }, { ...base }, match.groups); | ||
const groups = Object.fromEntries( Object.entries(match.groups).map(([k, v]) => [k, v.trim()]) ); | ||
const token = Object.assign({ ...base }, groups); | ||
if (token.attributes) token.attributes = this.parseAttributes(token.attributes); | ||
if (options.value) token.value = token[options.value]; | ||
tokens.push(token); | ||
} | ||
|
||
currentIndex += len; // Move the index forward | ||
break; // Break out of the loop to restart with the new index | ||
} // if match | ||
|
||
} // evaluate rules | ||
|
||
if (!matched) { | ||
// If no rule matched, we have an invalid XML string section | ||
throw new Error(`Unexpected token at index ${currentIndex} (${this.getPosition(xmlStr, currentIndex).join(':')})`); | ||
} | ||
|
||
} | ||
|
||
return tokens; | ||
} | ||
|
||
parseAttributes(attributesString) { | ||
const attributes = {}; | ||
const attrPattern = /(\w+)=("[^"\\]*(?:\\.[^"\\]*)*"|'[^'\\]*(?:\\.[^'\\]*)*')/g; | ||
let match; | ||
while ((match = attrPattern.exec(attributesString)) !== null) { | ||
const name = match[1]; | ||
const value = match[2].slice(1, -1).replace(/\\(["'])/g, "$1"); // Remove quotes and unescape | ||
attributes[name] = value; | ||
} | ||
return attributes; | ||
} | ||
} |
Oops, something went wrong.