Skip to content

Commit

Permalink
Optimize the any-character parser again.
Browse files Browse the repository at this point in the history
  • Loading branch information
renggli committed Jan 2, 2025
1 parent e909594 commit 517c63b
Show file tree
Hide file tree
Showing 3 changed files with 94 additions and 9 deletions.
3 changes: 3 additions & 0 deletions lib/src/parser/predicate/character.dart
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import 'package:meta/meta.dart';

import '../../../parser.dart';

/// Abstract parser for character classes.
Expand All @@ -18,6 +20,7 @@ abstract class CharacterParser extends Parser<String> {
: SingleCharacterParser(predicate, message);

/// Internal constructor
@internal
CharacterParser.internal(this.predicate, this.message);

/// Predicate indicating whether a character can be consumed.
Expand Down
32 changes: 31 additions & 1 deletion lib/src/parser/predicate/single_character.dart
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ import 'package:meta/meta.dart';

import '../../core/context.dart';
import '../../core/result.dart';
import '../character/predicate.dart';
import '../character/predicates/constant.dart';
import 'character.dart';
import 'unicode_character.dart';

Expand All @@ -12,7 +14,14 @@ import 'unicode_character.dart';
/// cannot be expressed as a single 16-bit value), [UnicodeCharacterParser]
/// should be used instead.
class SingleCharacterParser extends CharacterParser {
SingleCharacterParser(super.predicate, super.message) : super.internal();
factory SingleCharacterParser(CharacterPredicate predicate, String message) =>
const ConstantCharPredicate(true) == predicate
? AnySingleCharacterParser.internal(predicate, message)
: SingleCharacterParser.internal(predicate, message);

@internal
SingleCharacterParser.internal(super.predicate, super.message)
: super.internal();

@override
Result<String> parseOn(Context context) {
Expand All @@ -36,3 +45,24 @@ class SingleCharacterParser extends CharacterParser {
@override
SingleCharacterParser copy() => SingleCharacterParser(predicate, message);
}

/// Internal parser specialization of the [SingleCharacterParser] that assumes
/// its `predicate` always returns `true`.
class AnySingleCharacterParser extends SingleCharacterParser {
@internal
AnySingleCharacterParser.internal(super.predicate, super.message)
: super.internal();

@override
Result<String> parseOn(Context context) {
final buffer = context.buffer;
final position = context.position;
return position < buffer.length
? context.success(buffer[position], position + 1)
: context.failure(message);
}

@override
int fastParseOn(String buffer, int position) =>
position < buffer.length ? position + 1 : -1;
}
68 changes: 60 additions & 8 deletions lib/src/parser/predicate/unicode_character.dart
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
import 'package:meta/meta.dart';

import '../../core/context.dart';
import '../../core/result.dart';
import '../../shared/annotations.dart';
import '../character/predicate.dart';
import '../character/predicates/constant.dart';
import 'character.dart';
import 'single_character.dart';

Expand All @@ -11,24 +15,31 @@ import 'single_character.dart';
/// single 16-bit value) comes at an extra cost, to avoid this consider using
/// [SingleCharacterParser] instead.
class UnicodeCharacterParser extends CharacterParser {
UnicodeCharacterParser(super.predicate, super.message) : super.internal();
factory UnicodeCharacterParser(
CharacterPredicate predicate, String message) =>
const ConstantCharPredicate(true) == predicate
? AnyUnicodeCharacterParser.internal(predicate, message)
: UnicodeCharacterParser.internal(predicate, message);

@internal
UnicodeCharacterParser.internal(super.predicate, super.message)
: super.internal();

@override
Result<String> parseOn(Context context) {
final buffer = context.buffer;
final position = context.position;
var position = context.position;
if (position < buffer.length) {
var codeUnit = buffer.codeUnitAt(position);
var nextPosition = position + 1;
if (_isLeadSurrogate(codeUnit) && nextPosition < buffer.length) {
final nextCodeUnit = buffer.codeUnitAt(nextPosition);
var codeUnit = buffer.codeUnitAt(position++);
if (_isLeadSurrogate(codeUnit) && position < buffer.length) {
final nextCodeUnit = buffer.codeUnitAt(position);
if (_isTrailSurrogate(nextCodeUnit)) {
codeUnit = _combineSurrogatePair(codeUnit, nextCodeUnit);
nextPosition++;
position++;
}
}
if (predicate.test(codeUnit)) {
return context.success(String.fromCharCode(codeUnit), nextPosition);
return context.success(String.fromCharCode(codeUnit), position);
}
}
return context.failure(message);
Expand Down Expand Up @@ -56,6 +67,47 @@ class UnicodeCharacterParser extends CharacterParser {
UnicodeCharacterParser copy() => UnicodeCharacterParser(predicate, message);
}

/// Internal parser specialization of the [UnicodeCharacterParser] that assumes
/// its `predicate` always returns `true`.
class AnyUnicodeCharacterParser extends UnicodeCharacterParser {
@internal
AnyUnicodeCharacterParser.internal(super.predicate, super.message)
: super.internal();

@override
Result<String> parseOn(Context context) {
final buffer = context.buffer;
var position = context.position;
if (position < buffer.length) {
var codeUnit = buffer.codeUnitAt(position++);
if (_isLeadSurrogate(codeUnit) && position < buffer.length) {
final nextCodeUnit = buffer.codeUnitAt(position);
if (_isTrailSurrogate(nextCodeUnit)) {
codeUnit = _combineSurrogatePair(codeUnit, nextCodeUnit);
position++;
}
}
return context.success(String.fromCharCode(codeUnit), position);
}
return context.failure(message);
}

@override
int fastParseOn(String buffer, int position) {
if (position < buffer.length) {
final codeUnit = buffer.codeUnitAt(position++);
if (_isLeadSurrogate(codeUnit) && position < buffer.length) {
final nextCodeUnit = buffer.codeUnitAt(position);
if (_isTrailSurrogate(nextCodeUnit)) {
position++;
}
}
return position;
}
return -1;
}
}

// The following tests are adapted from the Dart SDK:
// https://github.com/dart-lang/sdk/blob/1207250b0d5687f9016cf115068addf6593dba58/sdk/lib/core/string.dart#L932-L955

Expand Down

0 comments on commit 517c63b

Please sign in to comment.