• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.jsoup.parser;
2 
3 import org.jsoup.Jsoup;
4 import org.jsoup.nodes.Attribute;
5 import org.jsoup.nodes.Attributes;
6 import org.jsoup.nodes.Document;
7 import org.jsoup.nodes.Element;
8 import org.jsoup.select.Elements;
9 import org.junit.jupiter.api.Test;
10 
11 import java.util.List;
12 
13 import static org.junit.jupiter.api.Assertions.*;
14 
15 /**
16  Test suite for attribute parser.
17 
18  @author Jonathan Hedley, jonathan@hedley.net */
19 public class AttributeParseTest {
20 
parsesRoughAttributeString()21     @Test public void parsesRoughAttributeString() {
22         String html = "<a id=\"123\" class=\"baz = 'bar'\" style = 'border: 2px'qux zim foo = 12 mux=18 />";
23         // should be: <id=123>, <class=baz = 'bar'>, <qux=>, <zim=>, <foo=12>, <mux.=18>
24 
25         Element el = Jsoup.parse(html).getElementsByTag("a").get(0);
26         Attributes attr = el.attributes();
27         assertEquals(7, attr.size());
28         assertEquals("123", attr.get("id"));
29         assertEquals("baz = 'bar'", attr.get("class"));
30         assertEquals("border: 2px", attr.get("style"));
31         assertEquals("", attr.get("qux"));
32         assertEquals("", attr.get("zim"));
33         assertEquals("12", attr.get("foo"));
34         assertEquals("18", attr.get("mux"));
35     }
36 
handlesNewLinesAndReturns()37     @Test public void handlesNewLinesAndReturns() {
38         String html = "<a\r\nfoo='bar\r\nqux'\r\nbar\r\n=\r\ntwo>One</a>";
39         Element el = Jsoup.parse(html).select("a").first();
40         assertEquals(2, el.attributes().size());
41         assertEquals("bar\r\nqux", el.attr("foo")); // currently preserves newlines in quoted attributes. todo confirm if should.
42         assertEquals("two", el.attr("bar"));
43     }
44 
parsesEmptyString()45     @Test public void parsesEmptyString() {
46         String html = "<a />";
47         Element el = Jsoup.parse(html).getElementsByTag("a").get(0);
48         Attributes attr = el.attributes();
49         assertEquals(0, attr.size());
50     }
51 
canStartWithEq()52     @Test public void canStartWithEq() {
53         String html = "<a =empty />";
54         // TODO this is the weirdest thing in the spec - why not consider this an attribute with an empty name, not where name is '='?
55         // am I reading it wrong? https://html.spec.whatwg.org/multipage/parsing.html#before-attribute-name-state
56         Element el = Jsoup.parse(html).getElementsByTag("a").get(0);
57         Attributes attr = el.attributes();
58         assertEquals(1, attr.size());
59         assertTrue(attr.hasKey("=empty"));
60         assertEquals("", attr.get("=empty"));
61     }
62 
strictAttributeUnescapes()63     @Test public void strictAttributeUnescapes() {
64         String html = "<a id=1 href='?foo=bar&mid&lt=true'>One</a> <a id=2 href='?foo=bar&lt;qux&lg=1'>Two</a>";
65         Elements els = Jsoup.parse(html).select("a");
66         assertEquals("?foo=bar&mid&lt=true", els.first().attr("href"));
67         assertEquals("?foo=bar<qux&lg=1", els.last().attr("href"));
68     }
69 
moreAttributeUnescapes()70     @Test public void moreAttributeUnescapes() {
71         String html = "<a href='&wr_id=123&mid-size=true&ok=&wr'>Check</a>";
72         Elements els = Jsoup.parse(html).select("a");
73         assertEquals("&wr_id=123&mid-size=true&ok=&wr", els.first().attr("href"));
74     }
75 
parsesBooleanAttributes()76     @Test public void parsesBooleanAttributes() {
77         String html = "<a normal=\"123\" boolean empty=\"\"></a>";
78         Element el = Jsoup.parse(html).select("a").first();
79 
80         assertEquals("123", el.attr("normal"));
81         assertEquals("", el.attr("boolean"));
82         assertEquals("", el.attr("empty"));
83 
84         List<Attribute> attributes = el.attributes().asList();
85         assertEquals(3, attributes.size(), "There should be 3 attribute present");
86 
87         assertEquals(html, el.outerHtml()); // vets boolean syntax
88     }
89 
dropsSlashFromAttributeName()90     @Test public void dropsSlashFromAttributeName() {
91         String html = "<img /onerror='doMyJob'/>";
92         Document doc = Jsoup.parse(html);
93         assertFalse(doc.select("img[onerror]").isEmpty(), "SelfClosingStartTag ignores last character");
94         assertEquals("<img onerror=\"doMyJob\">", doc.body().html());
95 
96         doc = Jsoup.parse(html, "", Parser.xmlParser());
97         assertEquals("<img onerror=\"doMyJob\" />", doc.html());
98     }
99 }
100