Skip to content

Commit

Permalink
Optimized the ~ (any preceding sibling) selector
Browse files Browse the repository at this point in the history
Changed the order of evaluation so previous matches return earlier - for positive hits this significantly reduces the number of sub-queries.

Also memoize the results -- speeds negative matches.

Fixes #1956
  • Loading branch information
jhy committed May 29, 2023
1 parent e4f4b86 commit 10ef981
Show file tree
Hide file tree
Showing 4 changed files with 65 additions and 46 deletions.
3 changes: 3 additions & 0 deletions CHANGES
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ Release 1.16.2 [PENDING]
`W3CDom#namespaceAware(false)`.
<https://github.com/jhy/jsoup/pull/1848>

* Improvement: speed optimized the `~` (any preceeding sibling) and `:nth-of-type` selectors.
<https://github.com/jhy/jsoup/issues/1956>

* Bugfix: `form` elements and empty elements (such as `img`) did not have their attributes de-duplicated.
<https://github.com/jhy/jsoup/pull/1950>

Expand Down
2 changes: 1 addition & 1 deletion src/main/java/org/jsoup/nodes/Comment.java
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ private static boolean isXmlDeclarationData(String data) {
String fragment = "<" + declContent + ">";
// use the HTML parser not XML, so we don't get into a recursive XML Declaration on contrived data
Document doc = Parser.htmlParser().settings(ParseSettings.preserveCase).parseInput(fragment, baseUri());
if (doc.body().children().size() > 0) {
if (doc.body().childrenSize() > 0) {
Element el = doc.body().child(0);
decl = new XmlDeclaration(NodeUtils.parser(doc).settings().normalizeTag(el.tagName()), data.startsWith("!"));
decl.attributes().addAll(el.attributes());
Expand Down
84 changes: 46 additions & 38 deletions src/main/java/org/jsoup/select/Evaluator.java
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,7 @@ public static final class IsLastChild extends Evaluator {
@Override
public boolean matches(Element root, Element element) {
final Element p = element.parent();
return p != null && !(p instanceof Document) && element.elementSiblingIndex() == p.children().size()-1;
return p != null && !(p instanceof Document) && element.elementSiblingIndex() == p.childrenSize()-1;
}

@Override
Expand Down Expand Up @@ -508,7 +508,7 @@ public IsNthLastChild(int a, int b) {
protected int calculatePosition(Element root, Element element) {
if (element.parent() == null)
return 0;
return element.parent().children().size() - element.elementSiblingIndex();
return element.parent().childrenSize()- element.elementSiblingIndex();
}

@Override
Expand All @@ -522,50 +522,55 @@ protected String getPseudoClass() {
*
*/
public static class IsNthOfType extends CssNthEvaluator {
public IsNthOfType(int a, int b) {
super(a,b);
}
public IsNthOfType(int a, int b) {
super(a, b);
}

protected int calculatePosition(Element root, Element element) {
int pos = 0;
if (element.parent() == null)
protected int calculatePosition(Element root, Element element) {
Element parent = element.parent();
if (parent == null)
return 0;
Elements family = element.parent().children();
for (Element el : family) {
if (el.tag().equals(element.tag())) pos++;
if (el == element) break;

int pos = 0;
final int size = parent.childNodeSize();
for (int i = 0; i < size; i++) {
Node node = parent.childNode(i);
if (node.normalName().equals(element.normalName())) pos++;
if (node == element) break;
}
return pos;
}
return pos;
}

@Override
protected String getPseudoClass() {
return "nth-of-type";
}
@Override
protected String getPseudoClass() {
return "nth-of-type";
}
}

public static class IsNthLastOfType extends CssNthEvaluator {

public IsNthLastOfType(int a, int b) {
super(a, b);
}
public IsNthLastOfType(int a, int b) {
super(a, b);
}

@Override
protected int calculatePosition(Element root, Element element) {
int pos = 0;
if (element.parent() == null)
@Override
protected int calculatePosition(Element root, Element element) {
Element parent = element.parent();
if (parent == null)
return 0;
Elements family = element.parent().children();
for (int i = element.elementSiblingIndex(); i < family.size(); i++) {
if (family.get(i).tag().equals(element.tag())) pos++;
}
return pos;
}

@Override
protected String getPseudoClass() {
return "nth-last-of-type";
}
int pos = 0;
int size = parent.childrenSize();
for (int i = element.elementSiblingIndex(); i < size; i++) {
if (parent.child(i).normalName().equals(element.normalName())) pos++;
}
return pos;
}

@Override
protected String getPseudoClass() {
return "nth-last-of-type";
}
}

/**
Expand Down Expand Up @@ -620,9 +625,12 @@ public boolean matches(Element root, Element element) {
if (p==null || p instanceof Document) return false;

int pos = 0;
Elements family = p.children();
for (Element el : family) {
if (el.tag().equals(element.tag())) pos++;
int size = p.childrenSize();
for (int i = 0; i < size; i++) {
if (p.child(i).normalName().equals(element.normalName()))
pos++;
if (pos > 1)
break;
}
return pos == 1;
}
Expand Down
22 changes: 15 additions & 7 deletions src/main/java/org/jsoup/select/StructuralEvaluator.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;

import java.util.IdentityHashMap;

/**
* Base structural evaluator.
*/
Expand Down Expand Up @@ -108,22 +110,28 @@ public String toString() {
}

static class PreviousSibling extends StructuralEvaluator {
private final IdentityHashMap<Element, Boolean> memo = new IdentityHashMap<>(); // memoize results

public PreviousSibling(Evaluator evaluator) {
this.evaluator = evaluator;
}

@Override
public boolean matches(Element root, Element element) {
if (root == element)
final Element parent = element.parent();
if (root == element || parent == null)
return false;

Element prev = element.previousElementSibling();

while (prev != null) {
if (evaluator.matches(root, prev))
final int size = element.elementSiblingIndex();
for (int i = 0; i < size; i++) {
final Element el = parent.child(i);
Boolean matches = memo.get(el);
if (matches == null) {
matches = evaluator.matches(root, el);
memo.put(el, matches);
}
if (matches)
return true;

prev = prev.previousElementSibling();
}
return false;
}
Expand Down

0 comments on commit 10ef981

Please sign in to comment.