Skip to content
This repository has been archived by the owner on Nov 1, 2024. It is now read-only.

Commit

Permalink
Put entities trie in compile-time const
Browse files Browse the repository at this point in the history
It avoids runtime tree building. And lets the tree be shared between isolates (multiple MB)
  • Loading branch information
moffatman committed Aug 10, 2024
1 parent be90498 commit 91b93b5
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 18 deletions.
21 changes: 3 additions & 18 deletions lib/src/tokenizer.dart
Original file line number Diff line number Diff line change
Expand Up @@ -5,24 +5,9 @@ import '../parser.dart' show HtmlParser;
import 'constants.dart';
import 'html_input_stream.dart';
import 'token.dart';
import 'trie.dart';
import 'utils.dart';

// Group entities by their first character, for faster lookups

class _EntityTrieNode {
final Map<String, _EntityTrieNode> children = {};
}

final _entitiesTrieRoot = () {
final root = _EntityTrieNode();
for (final entity in entities.keys) {
var node = root;
for (var i = 0; i < entity.length; i++) {
node = node.children.putIfAbsent(entity[i], _EntityTrieNode.new);
}
}
return root;
}();

// TODO(jmesserly): lots of ways to make this faster:
// - use switch instead of contains, indexOf
Expand Down Expand Up @@ -297,11 +282,11 @@ class HtmlTokenizer implements Iterator<Token> {
//
// Consume characters and compare to these to a substring of the
// entity names in the list until the substring no longer matches.
var node = _entitiesTrieRoot.children[charStack.last];
dynamic node = entitiesTrieRoot[charStack.last?.codeUnitAt(0)];

while (node != null && charStack.last != eof) {
charStack.add(stream.char());
node = node.children[charStack.last];
node = (node as Map)[charStack.last?.codeUnitAt(0)];
}

// At this point we have a string that starts with some characters
Expand Down
Loading

0 comments on commit 91b93b5

Please sign in to comment.