package org.jsoup.select; import org.jsoup.helper.Validate; import org.jsoup.nodes.Comment; import org.jsoup.nodes.Document; import org.jsoup.nodes.DocumentType; import org.jsoup.nodes.Element; import org.jsoup.nodes.Node; import org.jsoup.nodes.PseudoTextElement; import org.jsoup.nodes.TextNode; import org.jsoup.nodes.XmlDeclaration; import org.jsoup.parser.ParseSettings; import java.util.List; import java.util.function.Predicate; import java.util.regex.Matcher; import java.util.regex.Pattern; import static org.jsoup.internal.Normalizer.lowerCase; import static org.jsoup.internal.Normalizer.normalize; import static org.jsoup.internal.StringUtil.normaliseWhitespace; /** * Evaluates that an element matches the selector. */ public abstract class Evaluator { protected Evaluator() { } /** Provides a Predicate for this Evaluator, matching the test Element. * @param root the root Element, for match evaluation * @return a predicate that accepts an Element to test for matches with this Evaluator * @since 1.17.1 */ public Predicate asPredicate(Element root) { return element -> matches(root, element); } /** * Test if the element meets the evaluator's requirements. * * @param root Root of the matching subtree * @param element tested element * @return Returns true if the requirements are met or * false otherwise */ public abstract boolean matches(Element root, Element element); /** Reset any internal state in this Evaluator before executing a new Collector evaluation. */ protected void reset() { } /** A relative evaluator cost function. During evaluation, Evaluators are sorted by ascending cost as an optimization. * @return the relative cost of this Evaluator */ protected int cost() { return 5; // a nominal default cost } /** * Evaluator for tag name */ public static final class Tag extends Evaluator { private final String tagName; public Tag(String tagName) { this.tagName = tagName; } @Override public boolean matches(Element root, Element element) { return (element.nameIs(tagName)); } @Override protected int cost() { return 1; } @Override public String toString() { return String.format("%s", tagName); } } /** * Evaluator for tag name that ends with */ public static final class TagEndsWith extends Evaluator { private final String tagName; public TagEndsWith(String tagName) { this.tagName = tagName; } @Override public boolean matches(Element root, Element element) { return (element.normalName().endsWith(tagName)); } @Override public String toString() { return String.format("%s", tagName); } } /** * Evaluator for element id */ public static final class Id extends Evaluator { private final String id; public Id(String id) { this.id = id; } @Override public boolean matches(Element root, Element element) { return (id.equals(element.id())); } @Override protected int cost() { return 2; } @Override public String toString() { return String.format("#%s", id); } } /** * Evaluator for element class */ public static final class Class extends Evaluator { private final String className; public Class(String className) { this.className = className; } @Override public boolean matches(Element root, Element element) { return (element.hasClass(className)); } @Override protected int cost() { return 6; // does whitespace scanning } @Override public String toString() { return String.format(".%s", className); } } /** * Evaluator for attribute name matching */ public static final class Attribute extends Evaluator { private final String key; public Attribute(String key) { this.key = key; } @Override public boolean matches(Element root, Element element) { return element.hasAttr(key); } @Override protected int cost() { return 2; } @Override public String toString() { return String.format("[%s]", key); } } /** * Evaluator for attribute name prefix matching */ public static final class AttributeStarting extends Evaluator { private final String keyPrefix; public AttributeStarting(String keyPrefix) { Validate.notNull(keyPrefix); // OK to be empty - will find elements with any attributes this.keyPrefix = lowerCase(keyPrefix); } @Override public boolean matches(Element root, Element element) { List values = element.attributes().asList(); for (org.jsoup.nodes.Attribute attribute : values) { if (lowerCase(attribute.getKey()).startsWith(keyPrefix)) return true; } return false; } @Override protected int cost() { return 6; } @Override public String toString() { return String.format("[^%s]", keyPrefix); } } /** * Evaluator for attribute name/value matching */ public static final class AttributeWithValue extends AttributeKeyPair { public AttributeWithValue(String key, String value) { super(key, value); } @Override public boolean matches(Element root, Element element) { return element.hasAttr(key) && value.equalsIgnoreCase(element.attr(key).trim()); } @Override protected int cost() { return 3; } @Override public String toString() { return String.format("[%s=%s]", key, value); } } /** * Evaluator for attribute name != value matching */ public static final class AttributeWithValueNot extends AttributeKeyPair { public AttributeWithValueNot(String key, String value) { super(key, value); } @Override public boolean matches(Element root, Element element) { return !value.equalsIgnoreCase(element.attr(key)); } @Override protected int cost() { return 3; } @Override public String toString() { return String.format("[%s!=%s]", key, value); } } /** * Evaluator for attribute name/value matching (value prefix) */ public static final class AttributeWithValueStarting extends AttributeKeyPair { public AttributeWithValueStarting(String key, String value) { super(key, value, false); } @Override public boolean matches(Element root, Element element) { return element.hasAttr(key) && lowerCase(element.attr(key)).startsWith(value); // value is lower case already } @Override protected int cost() { return 4; } @Override public String toString() { return String.format("[%s^=%s]", key, value); } } /** * Evaluator for attribute name/value matching (value ending) */ public static final class AttributeWithValueEnding extends AttributeKeyPair { public AttributeWithValueEnding(String key, String value) { super(key, value, false); } @Override public boolean matches(Element root, Element element) { return element.hasAttr(key) && lowerCase(element.attr(key)).endsWith(value); // value is lower case } @Override protected int cost() { return 4; } @Override public String toString() { return String.format("[%s$=%s]", key, value); } } /** * Evaluator for attribute name/value matching (value containing) */ public static final class AttributeWithValueContaining extends AttributeKeyPair { public AttributeWithValueContaining(String key, String value) { super(key, value); } @Override public boolean matches(Element root, Element element) { return element.hasAttr(key) && lowerCase(element.attr(key)).contains(value); // value is lower case } @Override protected int cost() { return 6; } @Override public String toString() { return String.format("[%s*=%s]", key, value); } } /** * Evaluator for attribute name/value matching (value regex matching) */ public static final class AttributeWithValueMatching extends Evaluator { final String key; final Pattern pattern; public AttributeWithValueMatching(String key, Pattern pattern) { this.key = normalize(key); this.pattern = pattern; } @Override public boolean matches(Element root, Element element) { return element.hasAttr(key) && pattern.matcher(element.attr(key)).find(); } @Override protected int cost() { return 8; } @Override public String toString() { return String.format("[%s~=%s]", key, pattern.toString()); } } /** * Abstract evaluator for attribute name/value matching */ public abstract static class AttributeKeyPair extends Evaluator { final String key; final String value; public AttributeKeyPair(String key, String value) { this(key, value, true); } public AttributeKeyPair(String key, String value, boolean trimValue) { Validate.notEmpty(key); Validate.notEmpty(value); this.key = normalize(key); boolean isStringLiteral = value.startsWith("'") && value.endsWith("'") || value.startsWith("\"") && value.endsWith("\""); if (isStringLiteral) { value = value.substring(1, value.length()-1); } this.value = trimValue ? normalize(value) : normalize(value, isStringLiteral); } } /** * Evaluator for any / all element matching */ public static final class AllElements extends Evaluator { @Override public boolean matches(Element root, Element element) { return true; } @Override protected int cost() { return 10; } @Override public String toString() { return "*"; } } /** * Evaluator for matching by sibling index number (e {@literal <} idx) */ public static final class IndexLessThan extends IndexEvaluator { public IndexLessThan(int index) { super(index); } @Override public boolean matches(Element root, Element element) { return root != element && element.elementSiblingIndex() < index; } @Override public String toString() { return String.format(":lt(%d)", index); } } /** * Evaluator for matching by sibling index number (e {@literal >} idx) */ public static final class IndexGreaterThan extends IndexEvaluator { public IndexGreaterThan(int index) { super(index); } @Override public boolean matches(Element root, Element element) { return element.elementSiblingIndex() > index; } @Override public String toString() { return String.format(":gt(%d)", index); } } /** * Evaluator for matching by sibling index number (e = idx) */ public static final class IndexEquals extends IndexEvaluator { public IndexEquals(int index) { super(index); } @Override public boolean matches(Element root, Element element) { return element.elementSiblingIndex() == index; } @Override public String toString() { return String.format(":eq(%d)", index); } } /** * Evaluator for matching the last sibling (css :last-child) */ public static final class IsLastChild extends Evaluator { @Override public boolean matches(Element root, Element element) { final Element p = element.parent(); return p != null && !(p instanceof Document) && element == p.lastElementChild(); } @Override public String toString() { return ":last-child"; } } public static final class IsFirstOfType extends IsNthOfType { public IsFirstOfType() { super(0,1); } @Override public String toString() { return ":first-of-type"; } } public static final class IsLastOfType extends IsNthLastOfType { public IsLastOfType() { super(0,1); } @Override public String toString() { return ":last-of-type"; } } public static abstract class CssNthEvaluator extends Evaluator { protected final int a, b; public CssNthEvaluator(int a, int b) { this.a = a; this.b = b; } public CssNthEvaluator(int b) { this(0,b); } @Override public boolean matches(Element root, Element element) { final Element p = element.parent(); if (p == null || (p instanceof Document)) return false; final int pos = calculatePosition(root, element); if (a == 0) return pos == b; return (pos-b)*a >= 0 && (pos-b)%a==0; } @Override public String toString() { if (a == 0) return String.format(":%s(%d)",getPseudoClass(), b); if (b == 0) return String.format(":%s(%dn)",getPseudoClass(), a); return String.format(":%s(%dn%+d)", getPseudoClass(),a, b); } protected abstract String getPseudoClass(); protected abstract int calculatePosition(Element root, Element element); } /** * css-compatible Evaluator for :eq (css :nth-child) * * @see IndexEquals */ public static final class IsNthChild extends CssNthEvaluator { public IsNthChild(int a, int b) { super(a,b); } @Override protected int calculatePosition(Element root, Element element) { return element.elementSiblingIndex()+1; } @Override protected String getPseudoClass() { return "nth-child"; } } /** * css pseudo class :nth-last-child) * * @see IndexEquals */ public static final class IsNthLastChild extends CssNthEvaluator { public IsNthLastChild(int a, int b) { super(a,b); } @Override protected int calculatePosition(Element root, Element element) { if (element.parent() == null) return 0; return element.parent().childrenSize()- element.elementSiblingIndex(); } @Override protected String getPseudoClass() { return "nth-last-child"; } } /** * css pseudo class nth-of-type * */ public static class IsNthOfType extends CssNthEvaluator { public IsNthOfType(int a, int b) { super(a, b); } @Override protected int calculatePosition(Element root, Element element) { Element parent = element.parent(); if (parent == null) return 0; int pos = 0; final int size = parent.childNodeSize(); for (int i = 0; i < size; i++) { Node node = parent.childNode(i); if (node.normalName().equals(element.normalName())) pos++; if (node == element) break; } return pos; } @Override protected String getPseudoClass() { return "nth-of-type"; } } public static class IsNthLastOfType extends CssNthEvaluator { public IsNthLastOfType(int a, int b) { super(a, b); } @Override protected int calculatePosition(Element root, Element element) { Element parent = element.parent(); if (parent == null) return 0; int pos = 0; Element next = element; while (next != null) { if (next.normalName().equals(element.normalName())) pos++; next = next.nextElementSibling(); } return pos; } @Override protected String getPseudoClass() { return "nth-last-of-type"; } } /** * Evaluator for matching the first sibling (css :first-child) */ public static final class IsFirstChild extends Evaluator { @Override public boolean matches(Element root, Element element) { final Element p = element.parent(); return p != null && !(p instanceof Document) && element == p.firstElementChild(); } @Override public String toString() { return ":first-child"; } } /** * css3 pseudo-class :root * @see :root selector * */ public static final class IsRoot extends Evaluator { @Override public boolean matches(Element root, Element element) { final Element r = root instanceof Document ? root.firstElementChild() : root; return element == r; } @Override protected int cost() { return 1; } @Override public String toString() { return ":root"; } } public static final class IsOnlyChild extends Evaluator { @Override public boolean matches(Element root, Element element) { final Element p = element.parent(); return p!=null && !(p instanceof Document) && element.siblingElements().isEmpty(); } @Override public String toString() { return ":only-child"; } } public static final class IsOnlyOfType extends Evaluator { @Override public boolean matches(Element root, Element element) { final Element p = element.parent(); if (p==null || p instanceof Document) return false; int pos = 0; Element next = p.firstElementChild(); while (next != null) { if (next.normalName().equals(element.normalName())) pos++; if (pos > 1) break; next = next.nextElementSibling(); } return pos == 1; } @Override public String toString() { return ":only-of-type"; } } public static final class IsEmpty extends Evaluator { @Override public boolean matches(Element root, Element element) { List family = element.childNodes(); for (Node n : family) { if (n instanceof TextNode) return ((TextNode)n).isBlank(); if (!(n instanceof Comment || n instanceof XmlDeclaration || n instanceof DocumentType)) return false; } return true; } @Override public String toString() { return ":empty"; } } /** * Abstract evaluator for sibling index matching * * @author ant */ public abstract static class IndexEvaluator extends Evaluator { final int index; public IndexEvaluator(int index) { this.index = index; } } /** * Evaluator for matching Element (and its descendants) text */ public static final class ContainsText extends Evaluator { private final String searchText; public ContainsText(String searchText) { this.searchText = lowerCase(normaliseWhitespace(searchText)); } @Override public boolean matches(Element root, Element element) { return lowerCase(element.text()).contains(searchText); } @Override protected int cost() { return 10; } @Override public String toString() { return String.format(":contains(%s)", searchText); } } /** * Evaluator for matching Element (and its descendants) wholeText. Neither the input nor the element text is * normalized. :containsWholeText() * @since 1.15.1. */ public static final class ContainsWholeText extends Evaluator { private final String searchText; public ContainsWholeText(String searchText) { this.searchText = searchText; } @Override public boolean matches(Element root, Element element) { return element.wholeText().contains(searchText); } @Override protected int cost() { return 10; } @Override public String toString() { return String.format(":containsWholeText(%s)", searchText); } } /** * Evaluator for matching Element (but not its descendants) wholeText. Neither the input nor the element text is * normalized. :containsWholeOwnText() * @since 1.15.1. */ public static final class ContainsWholeOwnText extends Evaluator { private final String searchText; public ContainsWholeOwnText(String searchText) { this.searchText = searchText; } @Override public boolean matches(Element root, Element element) { return element.wholeOwnText().contains(searchText); } @Override public String toString() { return String.format(":containsWholeOwnText(%s)", searchText); } } /** * Evaluator for matching Element (and its descendants) data */ public static final class ContainsData extends Evaluator { private final String searchText; public ContainsData(String searchText) { this.searchText = lowerCase(searchText); } @Override public boolean matches(Element root, Element element) { return lowerCase(element.data()).contains(searchText); // not whitespace normalized } @Override public String toString() { return String.format(":containsData(%s)", searchText); } } /** * Evaluator for matching Element's own text */ public static final class ContainsOwnText extends Evaluator { private final String searchText; public ContainsOwnText(String searchText) { this.searchText = lowerCase(normaliseWhitespace(searchText)); } @Override public boolean matches(Element root, Element element) { return lowerCase(element.ownText()).contains(searchText); } @Override public String toString() { return String.format(":containsOwn(%s)", searchText); } } /** * Evaluator for matching Element (and its descendants) text with regex */ public static final class Matches extends Evaluator { private final Pattern pattern; public Matches(Pattern pattern) { this.pattern = pattern; } @Override public boolean matches(Element root, Element element) { Matcher m = pattern.matcher(element.text()); return m.find(); } @Override protected int cost() { return 8; } @Override public String toString() { return String.format(":matches(%s)", pattern); } } /** * Evaluator for matching Element's own text with regex */ public static final class MatchesOwn extends Evaluator { private final Pattern pattern; public MatchesOwn(Pattern pattern) { this.pattern = pattern; } @Override public boolean matches(Element root, Element element) { Matcher m = pattern.matcher(element.ownText()); return m.find(); } @Override protected int cost() { return 7; } @Override public String toString() { return String.format(":matchesOwn(%s)", pattern); } } /** * Evaluator for matching Element (and its descendants) whole text with regex. * @since 1.15.1. */ public static final class MatchesWholeText extends Evaluator { private final Pattern pattern; public MatchesWholeText(Pattern pattern) { this.pattern = pattern; } @Override public boolean matches(Element root, Element element) { Matcher m = pattern.matcher(element.wholeText()); return m.find(); } @Override protected int cost() { return 8; } @Override public String toString() { return String.format(":matchesWholeText(%s)", pattern); } } /** * Evaluator for matching Element's own whole text with regex. * @since 1.15.1. */ public static final class MatchesWholeOwnText extends Evaluator { private final Pattern pattern; public MatchesWholeOwnText(Pattern pattern) { this.pattern = pattern; } @Override public boolean matches(Element root, Element element) { Matcher m = pattern.matcher(element.wholeOwnText()); return m.find(); } @Override protected int cost() { return 7; } @Override public String toString() { return String.format(":matchesWholeOwnText(%s)", pattern); } } public static final class MatchText extends Evaluator { @Override public boolean matches(Element root, Element element) { if (element instanceof PseudoTextElement) return true; List textNodes = element.textNodes(); for (TextNode textNode : textNodes) { PseudoTextElement pel = new PseudoTextElement( org.jsoup.parser.Tag.valueOf(element.tagName(), element.tag().namespace(), ParseSettings.preserveCase), element.baseUri(), element.attributes()); textNode.replaceWith(pel); pel.appendChild(textNode); } return false; } @Override protected int cost() { return -1; // forces first evaluation, which prepares the DOM for later evaluator matches } @Override public String toString() { return ":matchText"; } } }