From 9e09d0d4c7d6634099882900f2cde30d23a12cb5 Mon Sep 17 00:00:00 2001 From: James M Snell Date: Fri, 25 Feb 2022 12:57:07 -0800 Subject: [PATCH] fixup! url: add experimental URLPattern --- lib/internal/urlpattern.js | 127 ++++++++++++------------------------- 1 file changed, 41 insertions(+), 86 deletions(-) diff --git a/lib/internal/urlpattern.js b/lib/internal/urlpattern.js index aed086f33b18ba..2f3ebcb1e8b29a 100644 --- a/lib/internal/urlpattern.js +++ b/lib/internal/urlpattern.js @@ -33,6 +33,7 @@ const { const { customInspectSymbol: kInspect, + toUSVString, } = require('internal/util'); const { inspect } = require('internal/util/inspect'); @@ -91,7 +92,7 @@ const kASCII = /^[\x00-\x7F]*$/; * */ -function parseConstructorString(input) { +function parseConstructorString(input, baseUrl) { const parser = { input, tokenList: tokenize(input, 'lenient'), @@ -106,11 +107,8 @@ function parseConstructorString(input) { }; function changeState(state, skip) { - if (parser.state !== 'init' && - parser.state !== 'authority' && - parser.state !== 'done') { + if (parser.state !== 'init' && parser.state !== 'authority' && parser.state !== 'done') parser.result[parser.state] = makeComponentString(); - } parser.state = state; parser.tokenIndex += skip; parser.componentStart = parser.tokenIndex; @@ -140,9 +138,7 @@ function parseConstructorString(input) { function isNonSpecialPatternChar(index, value) { const token = getSafeToken(index); if (token.value !== value) return false; - return token.type === 'char' || - token.type === 'escaped-char' || - token.type === 'invalid-char'; + return token.type === 'char' || token.type === 'escaped-char' || token.type === 'invalid-char'; } function isProtocolSuffix() { @@ -214,11 +210,8 @@ function parseConstructorString(input) { } function computeProtocolMatchesSpecialSchemeFlag() { - const protocol = makeComponentString(); - if (protocolMatchesSpecialScheme( - compileAndCanonicalize(protocol, 'protocol'))) { - parser.protocolMatchesSpecialScheme = true; - } + parser.protocolMatchesSpecialScheme = + protocolMatchesSpecialScheme(compileAndCanonicalize(makeComponentString(), 'protocol')); } while (parser.tokenIndex < parser.tokenList.length) { @@ -486,8 +479,7 @@ function tokenize(input = '', policy = 'strict') { function addToken(type, nextPosition, valuePosition, valueLength) { const index = tokenizer.index; - const value = - tokenizer.input.slice(valuePosition, valuePosition + valueLength); + const value = tokenizer.input.slice(valuePosition, valuePosition + valueLength); ArrayPrototypePush(tokenizer.tokenList, { type, index, @@ -547,10 +539,7 @@ function tokenize(input = '', policy = 'strict') { } const escapedIndex = tokenizer.nextIndex; getNextCodepoint(); - addTokenWithDefaultLength( - 'escaped-char', - tokenizer.nextIndex, - escapedIndex); + addTokenWithDefaultLength('escaped-char', tokenizer.nextIndex, escapedIndex); continue; } case '{': { @@ -664,10 +653,8 @@ function tryConsumeToken(parser, type) { function tryConsumeRegExpOrWildcardToken(nameToken, parser) { const token = tryConsumeToken(parser, 'regexp'); - if (nameToken == null && token == null) { - return tryConsumeToken(parser, 'asterisk'); - } - return token; + return nameToken == null && token == null ? + tryConsumeToken(parser, 'asterisk') : token; } function tryConsumeModifierToken(parser) { @@ -724,7 +711,6 @@ function runEncoding(parser, value) { // TODO(@jasnell): Throw proper error. Should be an assert throw new TypeError('Unknown type'); } catch (err) { -console.log(err); // TODO(@jasnell): Throw proper error throw new TypeError(`Invalid ${parser.type}`); } @@ -764,9 +750,7 @@ function addPart( case '+': modifier = 'one-or-more'; } } - if (nameToken === undefined && - regexpOrWildcardToken === undefined && - modifier === 'none') { + if (nameToken === undefined && regexpOrWildcardToken === undefined && modifier === 'none') { parser.pendingFixedValue += prefix; return; } @@ -825,9 +809,8 @@ function consumeText(parser) { let result = ''; while (true) { let token = tryConsumeToken(parser, 'char'); - if (token === undefined) { + if (token === undefined) token = tryConsumeToken(parser, 'escaped-char'); - } if (token === undefined) break; result += token.value; } @@ -851,9 +834,7 @@ function parsePatternString(input, type, options) { const parser = { index: 0, type, - segmentWildcard: - `[^${StringPrototypeReplaceAll( - delimiterCodePoint, kEscapeRegexp, '$1')}]+?`, + segmentWildcard: `[^${StringPrototypeReplaceAll(delimiterCodePoint, kEscapeRegexp, '$1')}]+?`, tokenList: tokenize(input, 'strict'), partList: [], pendingFixedValue: '', @@ -863,27 +844,22 @@ function parsePatternString(input, type, options) { while(parser.index < parser.tokenList.length) { const charToken = tryConsumeToken(parser, 'char'); let nameToken = tryConsumeToken(parser, 'name'); - let regexpOrWildcardToken = - tryConsumeRegExpOrWildcardToken(nameToken, parser); + let regexpOrWildcardToken = tryConsumeRegExpOrWildcardToken(nameToken, parser); if (nameToken != null || regexpOrWildcardToken != null) { let prefix = ''; - if (charToken != null) { + if (charToken != null) prefix = charToken.value; - } if (prefix !== '' && prefix !== prefixCodePoint) { parser.pendingFixedValue += prefix; prefix = ''; } maybeAddPartFromPendingFixedValue(parser); const modifierToken = tryConsumeModifierToken(parser); - addPart(parser, prefix, nameToken, regexpOrWildcardToken, - '', modifierToken); + addPart(parser, prefix, nameToken, regexpOrWildcardToken, '', modifierToken); continue; } - const fixedToken = - charToken != null ? - charToken : tryConsumeToken(parser, 'escaped-char'); + const fixedToken = charToken != null ? charToken : tryConsumeToken(parser, 'escaped-char'); if (fixedToken != null) { parser.pendingFixedValue += fixedToken.value; @@ -894,13 +870,11 @@ function parsePatternString(input, type, options) { if (openToken != null) { prefix = consumeText(parser); nameToken = tryConsumeToken(parser, 'name'); - regexpOrWildcardToken = - tryConsumeRegExpOrWildcardToken(nameToken, parser); + regexpOrWildcardToken = tryConsumeRegExpOrWildcardToken(nameToken, parser); let suffix = consumeText(parser); consumeRequiredToken(parser, 'close'); const modifierToken = tryConsumeModifierToken(parser); - addPart(parser, prefix, nameToken, regexpOrWildcardToken, - suffix, modifierToken); + addPart(parser, prefix, nameToken, regexpOrWildcardToken, suffix, modifierToken); continue; } @@ -920,8 +894,7 @@ function generateRegexAndNameList(partList, options) { for (let n = 0; n < partList.length; n++) { const part = partList[n]; if (part.type === 'fixed-text') { - const escapedValue = - StringPrototypeReplaceAll(part.value, kEscapeRegexp, '$1'); + const escapedValue = StringPrototypeReplaceAll(part.value, kEscapeRegexp, '$1'); regexString += part.modifier === 'none' ? escapedValue : `(?:${escapedValue})${kModifiers[part.modifier] || ''}`; @@ -931,8 +904,7 @@ function generateRegexAndNameList(partList, options) { ArrayPrototypePush(nameList, part.name); let regexValue = part.value; if (part.type === 'segment-wildcard') { - regexValue = `([^${StringPrototypeReplaceAll( - delimiterCodePoint, kEscapeRegexp, '$1')}]+?)` + regexValue = `([^${StringPrototypeReplaceAll(delimiterCodePoint, kEscapeRegexp, '$1')}]+?)` } else if (part.type === 'full-wildcard') { regexValue = '.*'; } @@ -945,15 +917,12 @@ function generateRegexAndNameList(partList, options) { const pfx = StringPrototypeReplaceAll(part.prefix, kEscapeRegexp, '$1'); const sfx = StringPrototypeReplaceAll(part.suffix, kEscapeRegexp, '$1'); if (part.modifier === 'none' || part.modifier === 'optional') { - regexString += - `(?:${pfx}(${regexValue})${sfx})${kModifiers[part.modifier] || ''}`; + regexString += `(?:${pfx}(${regexValue})${sfx})${kModifiers[part.modifier] || ''}`; continue; } // TODO(@jasnell): assert part.modifier is 'zero-or-more' or 'one-or-more' // assert part.prefix and part.suffix are not empty string - regexString += - `(?:${pfx}((?:${regexValue})(?:${sfx}${pfx}(?:{regexValue}))*)` + - `${sfx})`; + regexString += `(?:${pfx}((?:${regexValue})(?:${sfx}${pfx}(?:{regexValue}))*)${sfx})`; if (part.modifier === 'zero-or-more') regexString += '?'; } regexString += '$'; @@ -997,8 +966,7 @@ function generatePatternString(partList, options) { result += escapePatternString(part.value); continue; } - result += `{${escapePatternString(part.value)}}` + - kModifiers[part.modifier] || ''; + result += `{${escapePatternString(part.value)}}${kModifiers[part.modifier] || ''}`; continue; } const needsGrouping = @@ -1009,14 +977,12 @@ function generatePatternString(partList, options) { if (needsGrouping) result += '{'; result += escapePatternString(part.prefix); if (customName) result += `:${part.name}`; - if (part.type === 'regexp') { + if (part.type === 'regexp') result += `(${part.value})`; - } else if (part.type === 'segment-wildcard' && !customName) { - result += `([^${StringPrototypeReplaceAll( - delimiterCodePoint, kEscapeRegexp, '$1')}]+?)` - } else if (part.type === 'full-wildcard') { + else if (part.type === 'segment-wildcard' && !customName) + result += `([^${StringPrototypeReplaceAll(delimiterCodePoint, kEscapeRegexp, '$1')}]+?)` + else if (part.type === 'full-wildcard') result += customName ? '(.*)' : '*'; - } result += escapePatternString(part.suffix); if (needsGrouping) result += '}'; result += kModifiers[part.modifier] || ''; @@ -1025,9 +991,8 @@ function generatePatternString(partList, options) { } function compileAndCanonicalize(component, type, options = {}) { - if (component == null) { + if (component == null) component = '*'; - } const partList = parsePatternString(component, type, options); const { @@ -1123,22 +1088,14 @@ function match(urlPattern, input, baseURLString) { hash = url.hash.slice(1); // Trim the leading # } - const protocolExecResult = - RegExpPrototypeExec(urlPattern.protocol.regex, protocol); - const usernameExecResult = - RegExpPrototypeExec(urlPattern.username.regex, username); - const passwordExecResult = - RegExpPrototypeExec(urlPattern.password.regex, password); - const hostnameExecResult = - RegExpPrototypeExec(urlPattern.hostname.regex, hostname); - const portExecResult = - RegExpPrototypeExec(urlPattern.port.regex, port); - const pathnameExecResult = - RegExpPrototypeExec(urlPattern.pathname.regex, pathname); - const searchExecResult = - RegExpPrototypeExec(urlPattern.search.regex, search); - const hashExecResult = - RegExpPrototypeExec(urlPattern.hash.regex, hash); + const protocolExecResult = RegExpPrototypeExec(urlPattern.protocol.regex, protocol); + const usernameExecResult = RegExpPrototypeExec(urlPattern.username.regex, username); + const passwordExecResult = RegExpPrototypeExec(urlPattern.password.regex, password); + const hostnameExecResult = RegExpPrototypeExec(urlPattern.hostname.regex, hostname); + const portExecResult = RegExpPrototypeExec(urlPattern.port.regex, port); + const pathnameExecResult = RegExpPrototypeExec(urlPattern.pathname.regex, pathname); + const searchExecResult = RegExpPrototypeExec(urlPattern.search.regex, search); + const hashExecResult = RegExpPrototypeExec(urlPattern.hash.regex, hash); if (protocolExecResult == null && usernameExecResult == null && passwordExecResult == null && @@ -1183,19 +1140,17 @@ class URLPattern { */ constructor(input = {}, baseURL = undefined) { if (typeof input === 'string') { - input = parseConstructorString(input, baseURL); + input = parseConstructorString(toUsvString(input), baseURL); } else { - if (baseURL !== undefined) { + validateObject(input, 'input'); + if (baseURL !== undefined) throw new ERR_INVALID_ARG_VALUE('baseURL', baseURL); - } } - validateObject(input, 'input'); input = processURLPatternInit(input, 'pattern'); const defaultPort = kSpecialSchemes[input.protocol]; - if (defaultPort !== undefined && `${input.port}` === `${defaultPort}`) { + if (defaultPort !== undefined && `${input.port}` === `${defaultPort}`) input.port = ''; - } const protocol = compileAndCanonicalize(input.protocol, 'protocol');