1 package org.jsoup.nodes; 2 3 import org.jsoup.Jsoup; 4 import org.junit.jupiter.api.Test; 5 6 import java.util.NoSuchElementException; 7 8 import static org.junit.jupiter.api.Assertions.*; 9 10 class NodeIteratorTest { 11 String html = "<div id=1><p>One<p>Two</div><div id=2><p>Three<p>Four</div>"; 12 canIterateNodes()13 @Test void canIterateNodes() { 14 Document doc = Jsoup.parse(html); 15 NodeIterator<Node> it = NodeIterator.from(doc); 16 assertIterates(it, "#root;html;head;body;div#1;p;One;p;Two;div#2;p;Three;p;Four;"); 17 // todo - need to review that the Document object #root holds the html element as child. Why not have document root == html element? 18 assertFalse(it.hasNext()); 19 20 boolean threw = false; 21 try { 22 it.next(); 23 } catch (NoSuchElementException e) { 24 threw = true; 25 } 26 assertTrue(threw); 27 } 28 hasNextIsPure()29 @Test void hasNextIsPure() { 30 Document doc = Jsoup.parse(html); 31 NodeIterator<Node> it = NodeIterator.from(doc); 32 assertTrue(it.hasNext()); 33 assertTrue(it.hasNext()); 34 assertIterates(it, "#root;html;head;body;div#1;p;One;p;Two;div#2;p;Three;p;Four;"); 35 assertFalse(it.hasNext()); 36 } 37 iterateSubTree()38 @Test void iterateSubTree() { 39 Document doc = Jsoup.parse(html); 40 41 Element div1 = doc.expectFirst("div#1"); 42 NodeIterator<Node> it = NodeIterator.from(div1); 43 assertIterates(it, "div#1;p;One;p;Two;"); 44 assertFalse(it.hasNext()); 45 46 Element div2 = doc.expectFirst("div#2"); 47 NodeIterator<Node> it2 = NodeIterator.from(div2); 48 assertIterates(it2, "div#2;p;Three;p;Four;"); 49 assertFalse(it2.hasNext()); 50 } 51 canRestart()52 @Test void canRestart() { 53 Document doc = Jsoup.parse(html); 54 55 NodeIterator<Node> it = NodeIterator.from(doc); 56 assertIterates(it, "#root;html;head;body;div#1;p;One;p;Two;div#2;p;Three;p;Four;"); 57 58 it.restart(doc.expectFirst("div#2")); 59 assertIterates(it, "div#2;p;Three;p;Four;"); 60 } 61 canIterateJustOneSibling()62 @Test void canIterateJustOneSibling() { 63 Document doc = Jsoup.parse(html); 64 Element p2 = doc.expectFirst("p:contains(Two)"); 65 assertEquals("Two", p2.text()); 66 67 NodeIterator<Node> it = NodeIterator.from(p2); 68 assertIterates(it, "p;Two;"); 69 70 NodeIterator<Element> elIt = new NodeIterator<>(p2, Element.class); 71 Element found = elIt.next(); 72 assertSame(p2, found); 73 assertFalse(elIt.hasNext()); 74 } 75 canIterateFirstEmptySibling()76 @Test void canIterateFirstEmptySibling() { 77 Document doc = Jsoup.parse("<div><p id=1></p><p id=2>.</p><p id=3>..</p>"); 78 Element p1 = doc.expectFirst("p#1"); 79 assertEquals("", p1.ownText()); 80 81 NodeIterator<Node> it = NodeIterator.from(p1); 82 assertTrue(it.hasNext()); 83 Node node = it.next(); 84 assertSame(p1, node); 85 assertFalse(it.hasNext()); 86 } 87 canRemoveViaIterator()88 @Test void canRemoveViaIterator() { 89 String html = "<div id=out1><div id=1><p>One<p>Two</div><div id=2><p>Three<p>Four</div></div><div id=out2>Out2"; 90 Document doc = Jsoup.parse(html); 91 92 NodeIterator<Node> it = NodeIterator.from(doc); 93 StringBuilder seen = new StringBuilder(); 94 while (it.hasNext()) { 95 Node node = it.next(); 96 if (node.attr("id").equals("1")) 97 it.remove(); 98 trackSeen(node, seen); 99 } 100 assertEquals("#root;html;head;body;div#out1;div#1;div#2;p;Three;p;Four;div#out2;Out2;", seen.toString()); 101 assertContents(doc, "#root;html;head;body;div#out1;div#2;p;Three;p;Four;div#out2;Out2;"); 102 103 it = NodeIterator.from(doc); 104 seen = new StringBuilder(); 105 while (it.hasNext()) { 106 Node node = it.next(); 107 if (node.attr("id").equals("2")) 108 it.remove(); 109 trackSeen(node, seen); 110 } 111 assertEquals("#root;html;head;body;div#out1;div#2;div#out2;Out2;", seen.toString()); 112 assertContents(doc, "#root;html;head;body;div#out1;div#out2;Out2;"); 113 } 114 canRemoveViaNode()115 @Test void canRemoveViaNode() { 116 String html = "<div id=out1><div id=1><p>One<p>Two</div><div id=2><p>Three<p>Four</div></div><div id=out2>Out2"; 117 Document doc = Jsoup.parse(html); 118 119 NodeIterator<Node> it = NodeIterator.from(doc); 120 StringBuilder seen = new StringBuilder(); 121 while (it.hasNext()) { 122 Node node = it.next(); 123 if (node.attr("id").equals("1")) 124 node.remove(); 125 trackSeen(node, seen); 126 } 127 assertEquals("#root;html;head;body;div#out1;div#1;div#2;p;Three;p;Four;div#out2;Out2;", seen.toString()); 128 assertContents(doc, "#root;html;head;body;div#out1;div#2;p;Three;p;Four;div#out2;Out2;"); 129 130 it = NodeIterator.from(doc); 131 seen = new StringBuilder(); 132 while (it.hasNext()) { 133 Node node = it.next(); 134 if (node.attr("id").equals("2")) 135 node.remove(); 136 trackSeen(node, seen); 137 } 138 assertEquals("#root;html;head;body;div#out1;div#2;div#out2;Out2;", seen.toString()); 139 assertContents(doc, "#root;html;head;body;div#out1;div#out2;Out2;"); 140 } 141 canReplace()142 @Test void canReplace() { 143 String html = "<div id=out1><div id=1><p>One<p>Two</div><div id=2><p>Three<p>Four</div></div><div id=out2>Out2"; 144 Document doc = Jsoup.parse(html); 145 146 NodeIterator<Node> it = NodeIterator.from(doc); 147 StringBuilder seen = new StringBuilder(); 148 while (it.hasNext()) { 149 Node node = it.next(); 150 trackSeen(node, seen); 151 if (node.attr("id").equals("1")) { 152 node.replaceWith(new Element("span").text("Foo")); 153 } 154 } 155 assertEquals("#root;html;head;body;div#out1;div#1;span;Foo;div#2;p;Three;p;Four;div#out2;Out2;", seen.toString()); 156 // ^^ we don't see <p>One, do see the replaced in <span>, and the subsequent nodes 157 assertContents(doc, "#root;html;head;body;div#out1;span;Foo;div#2;p;Three;p;Four;div#out2;Out2;"); 158 159 it = NodeIterator.from(doc); 160 seen = new StringBuilder(); 161 while (it.hasNext()) { 162 Node node = it.next(); 163 trackSeen(node, seen); 164 if (node.attr("id").equals("2")) { 165 node.replaceWith(new Element("span").text("Bar")); 166 } 167 } 168 assertEquals("#root;html;head;body;div#out1;span;Foo;div#2;span;Bar;div#out2;Out2;", seen.toString()); 169 assertContents(doc, "#root;html;head;body;div#out1;span;Foo;span;Bar;div#out2;Out2;"); 170 } 171 canWrap()172 @Test void canWrap() { 173 Document doc = Jsoup.parse(html); 174 NodeIterator<Node> it = NodeIterator.from(doc); 175 boolean sawInner = false; 176 while (it.hasNext()) { 177 Node node = it.next(); 178 if (node.attr("id").equals("1")) { 179 node.wrap("<div id=outer>"); 180 } 181 if (node instanceof TextNode && ((TextNode) node).text().equals("One")) 182 sawInner = true; 183 } 184 assertContents(doc, "#root;html;head;body;div#outer;div#1;p;One;p;Two;div#2;p;Three;p;Four;"); 185 assertTrue(sawInner); 186 } 187 canFilterForElements()188 @Test void canFilterForElements() { 189 Document doc = Jsoup.parse(html); 190 NodeIterator<Element> it = new NodeIterator<>(doc, Element.class); 191 192 StringBuilder seen = new StringBuilder(); 193 while (it.hasNext()) { 194 Element el = it.next(); 195 assertNotNull(el); 196 trackSeen(el, seen); 197 } 198 199 assertEquals("#root;html;head;body;div#1;p;p;div#2;p;p;", seen.toString()); 200 } 201 canFilterForTextNodes()202 @Test void canFilterForTextNodes() { 203 Document doc = Jsoup.parse(html); 204 NodeIterator<TextNode> it = new NodeIterator<>(doc, TextNode.class); 205 206 StringBuilder seen = new StringBuilder(); 207 while (it.hasNext()) { 208 TextNode text = it.next(); 209 assertNotNull(text); 210 trackSeen(text, seen); 211 } 212 213 assertEquals("One;Two;Three;Four;", seen.toString()); 214 assertContents(doc, "#root;html;head;body;div#1;p;One;p;Two;div#2;p;Three;p;Four;"); 215 } 216 canModifyFilteredElements()217 @Test void canModifyFilteredElements() { 218 Document doc = Jsoup.parse(html); 219 NodeIterator<Element> it = new NodeIterator<>(doc, Element.class); 220 221 StringBuilder seen = new StringBuilder(); 222 while (it.hasNext()) { 223 Element el = it.next(); 224 if (!el.ownText().isEmpty()) 225 el.text(el.ownText() + "++"); 226 trackSeen(el, seen); 227 } 228 229 assertEquals("#root;html;head;body;div#1;p;p;div#2;p;p;", seen.toString()); 230 assertContents(doc, "#root;html;head;body;div#1;p;One++;p;Two++;div#2;p;Three++;p;Four++;"); 231 } 232 assertIterates(NodeIterator<T> it, String expected)233 static <T extends Node> void assertIterates(NodeIterator<T> it, String expected) { 234 Node previous = null; 235 StringBuilder actual = new StringBuilder(); 236 while (it.hasNext()) { 237 Node node = it.next(); 238 assertNotNull(node); 239 assertNotSame(previous, node); 240 241 trackSeen(node, actual); 242 previous = node; 243 } 244 assertEquals(expected, actual.toString()); 245 } 246 assertContents(Element el, String expected)247 static void assertContents(Element el, String expected) { 248 NodeIterator<Node> it = NodeIterator.from(el); 249 assertIterates(it, expected); 250 } 251 trackSeen(Node node, StringBuilder actual)252 static void trackSeen(Node node, StringBuilder actual) { 253 if (node instanceof Element) { 254 Element el = (Element) node; 255 actual.append(el.tagName()); 256 if (el.hasAttr("id")) 257 actual.append("#").append(el.id()); 258 } 259 else if (node instanceof TextNode) 260 actual.append(((TextNode) node).text()); 261 else 262 actual.append(node.nodeName()); 263 actual.append(";"); 264 } 265 266 }