1 package org.jsoup.parser; 2 3 import org.jsoup.Jsoup; 4 import org.jsoup.nodes.Attribute; 5 import org.jsoup.nodes.Attributes; 6 import org.jsoup.nodes.Document; 7 import org.jsoup.nodes.Element; 8 import org.jsoup.select.Elements; 9 import org.junit.jupiter.api.Test; 10 11 import java.util.List; 12 13 import static org.junit.jupiter.api.Assertions.*; 14 15 /** 16 Test suite for attribute parser. 17 18 @author Jonathan Hedley, jonathan@hedley.net */ 19 public class AttributeParseTest { 20 parsesRoughAttributeString()21 @Test public void parsesRoughAttributeString() { 22 String html = "<a id=\"123\" class=\"baz = 'bar'\" style = 'border: 2px'qux zim foo = 12 mux=18 />"; 23 // should be: <id=123>, <class=baz = 'bar'>, <qux=>, <zim=>, <foo=12>, <mux.=18> 24 25 Element el = Jsoup.parse(html).getElementsByTag("a").get(0); 26 Attributes attr = el.attributes(); 27 assertEquals(7, attr.size()); 28 assertEquals("123", attr.get("id")); 29 assertEquals("baz = 'bar'", attr.get("class")); 30 assertEquals("border: 2px", attr.get("style")); 31 assertEquals("", attr.get("qux")); 32 assertEquals("", attr.get("zim")); 33 assertEquals("12", attr.get("foo")); 34 assertEquals("18", attr.get("mux")); 35 } 36 handlesNewLinesAndReturns()37 @Test public void handlesNewLinesAndReturns() { 38 String html = "<a\r\nfoo='bar\r\nqux'\r\nbar\r\n=\r\ntwo>One</a>"; 39 Element el = Jsoup.parse(html).select("a").first(); 40 assertEquals(2, el.attributes().size()); 41 assertEquals("bar\r\nqux", el.attr("foo")); // currently preserves newlines in quoted attributes. todo confirm if should. 42 assertEquals("two", el.attr("bar")); 43 } 44 parsesEmptyString()45 @Test public void parsesEmptyString() { 46 String html = "<a />"; 47 Element el = Jsoup.parse(html).getElementsByTag("a").get(0); 48 Attributes attr = el.attributes(); 49 assertEquals(0, attr.size()); 50 } 51 canStartWithEq()52 @Test public void canStartWithEq() { 53 String html = "<a =empty />"; 54 // TODO this is the weirdest thing in the spec - why not consider this an attribute with an empty name, not where name is '='? 55 // am I reading it wrong? https://html.spec.whatwg.org/multipage/parsing.html#before-attribute-name-state 56 Element el = Jsoup.parse(html).getElementsByTag("a").get(0); 57 Attributes attr = el.attributes(); 58 assertEquals(1, attr.size()); 59 assertTrue(attr.hasKey("=empty")); 60 assertEquals("", attr.get("=empty")); 61 } 62 strictAttributeUnescapes()63 @Test public void strictAttributeUnescapes() { 64 String html = "<a id=1 href='?foo=bar&mid<=true'>One</a> <a id=2 href='?foo=bar<qux&lg=1'>Two</a>"; 65 Elements els = Jsoup.parse(html).select("a"); 66 assertEquals("?foo=bar&mid<=true", els.first().attr("href")); 67 assertEquals("?foo=bar<qux&lg=1", els.last().attr("href")); 68 } 69 moreAttributeUnescapes()70 @Test public void moreAttributeUnescapes() { 71 String html = "<a href='&wr_id=123&mid-size=true&ok=&wr'>Check</a>"; 72 Elements els = Jsoup.parse(html).select("a"); 73 assertEquals("&wr_id=123&mid-size=true&ok=&wr", els.first().attr("href")); 74 } 75 parsesBooleanAttributes()76 @Test public void parsesBooleanAttributes() { 77 String html = "<a normal=\"123\" boolean empty=\"\"></a>"; 78 Element el = Jsoup.parse(html).select("a").first(); 79 80 assertEquals("123", el.attr("normal")); 81 assertEquals("", el.attr("boolean")); 82 assertEquals("", el.attr("empty")); 83 84 List<Attribute> attributes = el.attributes().asList(); 85 assertEquals(3, attributes.size(), "There should be 3 attribute present"); 86 87 assertEquals(html, el.outerHtml()); // vets boolean syntax 88 } 89 dropsSlashFromAttributeName()90 @Test public void dropsSlashFromAttributeName() { 91 String html = "<img /onerror='doMyJob'/>"; 92 Document doc = Jsoup.parse(html); 93 assertFalse(doc.select("img[onerror]").isEmpty(), "SelfClosingStartTag ignores last character"); 94 assertEquals("<img onerror=\"doMyJob\">", doc.body().html()); 95 96 doc = Jsoup.parse(html, "", Parser.xmlParser()); 97 assertEquals("<img onerror=\"doMyJob\" />", doc.html()); 98 } 99 } 100