• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.jsoup.nodes;
2 
3 import org.jsoup.Jsoup;
4 import org.junit.jupiter.api.Test;
5 
6 import java.util.NoSuchElementException;
7 
8 import static org.junit.jupiter.api.Assertions.*;
9 
10 class NodeIteratorTest {
11     String html = "<div id=1><p>One<p>Two</div><div id=2><p>Three<p>Four</div>";
12 
canIterateNodes()13     @Test void canIterateNodes() {
14         Document doc = Jsoup.parse(html);
15         NodeIterator<Node> it = NodeIterator.from(doc);
16         assertIterates(it, "#root;html;head;body;div#1;p;One;p;Two;div#2;p;Three;p;Four;");
17         // todo - need to review that the Document object #root holds the html element as child. Why not have document root == html element?
18         assertFalse(it.hasNext());
19 
20         boolean threw = false;
21         try {
22             it.next();
23         } catch (NoSuchElementException e) {
24             threw = true;
25         }
26         assertTrue(threw);
27     }
28 
hasNextIsPure()29     @Test void hasNextIsPure() {
30         Document doc = Jsoup.parse(html);
31         NodeIterator<Node> it = NodeIterator.from(doc);
32         assertTrue(it.hasNext());
33         assertTrue(it.hasNext());
34         assertIterates(it, "#root;html;head;body;div#1;p;One;p;Two;div#2;p;Three;p;Four;");
35         assertFalse(it.hasNext());
36     }
37 
iterateSubTree()38     @Test void iterateSubTree() {
39         Document doc = Jsoup.parse(html);
40 
41         Element div1 = doc.expectFirst("div#1");
42         NodeIterator<Node> it = NodeIterator.from(div1);
43         assertIterates(it, "div#1;p;One;p;Two;");
44         assertFalse(it.hasNext());
45 
46         Element div2 = doc.expectFirst("div#2");
47         NodeIterator<Node> it2 = NodeIterator.from(div2);
48         assertIterates(it2, "div#2;p;Three;p;Four;");
49         assertFalse(it2.hasNext());
50     }
51 
canRestart()52     @Test void canRestart() {
53         Document doc = Jsoup.parse(html);
54 
55         NodeIterator<Node> it = NodeIterator.from(doc);
56         assertIterates(it, "#root;html;head;body;div#1;p;One;p;Two;div#2;p;Three;p;Four;");
57 
58         it.restart(doc.expectFirst("div#2"));
59         assertIterates(it, "div#2;p;Three;p;Four;");
60     }
61 
canIterateJustOneSibling()62     @Test void canIterateJustOneSibling() {
63         Document doc = Jsoup.parse(html);
64         Element p2 = doc.expectFirst("p:contains(Two)");
65         assertEquals("Two", p2.text());
66 
67         NodeIterator<Node> it = NodeIterator.from(p2);
68         assertIterates(it, "p;Two;");
69 
70         NodeIterator<Element> elIt = new NodeIterator<>(p2, Element.class);
71         Element found = elIt.next();
72         assertSame(p2, found);
73         assertFalse(elIt.hasNext());
74     }
75 
canIterateFirstEmptySibling()76     @Test void canIterateFirstEmptySibling() {
77         Document doc = Jsoup.parse("<div><p id=1></p><p id=2>.</p><p id=3>..</p>");
78         Element p1 = doc.expectFirst("p#1");
79         assertEquals("", p1.ownText());
80 
81         NodeIterator<Node> it = NodeIterator.from(p1);
82         assertTrue(it.hasNext());
83         Node node = it.next();
84         assertSame(p1, node);
85         assertFalse(it.hasNext());
86     }
87 
canRemoveViaIterator()88     @Test void canRemoveViaIterator() {
89         String html = "<div id=out1><div id=1><p>One<p>Two</div><div id=2><p>Three<p>Four</div></div><div id=out2>Out2";
90         Document doc = Jsoup.parse(html);
91 
92         NodeIterator<Node> it = NodeIterator.from(doc);
93         StringBuilder seen = new StringBuilder();
94         while (it.hasNext()) {
95             Node node = it.next();
96             if (node.attr("id").equals("1"))
97                 it.remove();
98             trackSeen(node, seen);
99         }
100         assertEquals("#root;html;head;body;div#out1;div#1;div#2;p;Three;p;Four;div#out2;Out2;", seen.toString());
101         assertContents(doc, "#root;html;head;body;div#out1;div#2;p;Three;p;Four;div#out2;Out2;");
102 
103         it = NodeIterator.from(doc);
104         seen = new StringBuilder();
105         while (it.hasNext()) {
106             Node node = it.next();
107             if (node.attr("id").equals("2"))
108                 it.remove();
109             trackSeen(node, seen);
110         }
111         assertEquals("#root;html;head;body;div#out1;div#2;div#out2;Out2;", seen.toString());
112         assertContents(doc, "#root;html;head;body;div#out1;div#out2;Out2;");
113     }
114 
canRemoveViaNode()115     @Test void canRemoveViaNode() {
116         String html = "<div id=out1><div id=1><p>One<p>Two</div><div id=2><p>Three<p>Four</div></div><div id=out2>Out2";
117         Document doc = Jsoup.parse(html);
118 
119         NodeIterator<Node> it = NodeIterator.from(doc);
120         StringBuilder seen = new StringBuilder();
121         while (it.hasNext()) {
122             Node node = it.next();
123             if (node.attr("id").equals("1"))
124                 node.remove();
125             trackSeen(node, seen);
126         }
127         assertEquals("#root;html;head;body;div#out1;div#1;div#2;p;Three;p;Four;div#out2;Out2;", seen.toString());
128         assertContents(doc, "#root;html;head;body;div#out1;div#2;p;Three;p;Four;div#out2;Out2;");
129 
130         it = NodeIterator.from(doc);
131         seen = new StringBuilder();
132         while (it.hasNext()) {
133             Node node = it.next();
134             if (node.attr("id").equals("2"))
135                 node.remove();
136             trackSeen(node, seen);
137         }
138         assertEquals("#root;html;head;body;div#out1;div#2;div#out2;Out2;", seen.toString());
139         assertContents(doc, "#root;html;head;body;div#out1;div#out2;Out2;");
140     }
141 
canReplace()142     @Test void canReplace() {
143         String html = "<div id=out1><div id=1><p>One<p>Two</div><div id=2><p>Three<p>Four</div></div><div id=out2>Out2";
144         Document doc = Jsoup.parse(html);
145 
146         NodeIterator<Node> it = NodeIterator.from(doc);
147         StringBuilder seen = new StringBuilder();
148         while (it.hasNext()) {
149             Node node = it.next();
150             trackSeen(node, seen);
151             if (node.attr("id").equals("1")) {
152                 node.replaceWith(new Element("span").text("Foo"));
153             }
154         }
155         assertEquals("#root;html;head;body;div#out1;div#1;span;Foo;div#2;p;Three;p;Four;div#out2;Out2;", seen.toString());
156         // ^^ we don't see <p>One, do see the replaced in <span>, and the subsequent nodes
157         assertContents(doc, "#root;html;head;body;div#out1;span;Foo;div#2;p;Three;p;Four;div#out2;Out2;");
158 
159         it = NodeIterator.from(doc);
160         seen = new StringBuilder();
161         while (it.hasNext()) {
162             Node node = it.next();
163             trackSeen(node, seen);
164             if (node.attr("id").equals("2")) {
165                 node.replaceWith(new Element("span").text("Bar"));
166             }
167         }
168         assertEquals("#root;html;head;body;div#out1;span;Foo;div#2;span;Bar;div#out2;Out2;", seen.toString());
169         assertContents(doc, "#root;html;head;body;div#out1;span;Foo;span;Bar;div#out2;Out2;");
170     }
171 
canWrap()172     @Test void canWrap() {
173         Document doc = Jsoup.parse(html);
174         NodeIterator<Node> it = NodeIterator.from(doc);
175         boolean sawInner = false;
176         while (it.hasNext()) {
177             Node node = it.next();
178             if (node.attr("id").equals("1")) {
179                 node.wrap("<div id=outer>");
180             }
181             if (node instanceof TextNode && ((TextNode) node).text().equals("One"))
182                 sawInner = true;
183         }
184         assertContents(doc, "#root;html;head;body;div#outer;div#1;p;One;p;Two;div#2;p;Three;p;Four;");
185         assertTrue(sawInner);
186     }
187 
canFilterForElements()188     @Test void canFilterForElements() {
189         Document doc = Jsoup.parse(html);
190         NodeIterator<Element> it = new NodeIterator<>(doc, Element.class);
191 
192         StringBuilder seen = new StringBuilder();
193         while (it.hasNext()) {
194             Element el = it.next();
195             assertNotNull(el);
196             trackSeen(el, seen);
197         }
198 
199         assertEquals("#root;html;head;body;div#1;p;p;div#2;p;p;", seen.toString());
200     }
201 
canFilterForTextNodes()202     @Test void canFilterForTextNodes() {
203         Document doc = Jsoup.parse(html);
204         NodeIterator<TextNode> it = new NodeIterator<>(doc, TextNode.class);
205 
206         StringBuilder seen = new StringBuilder();
207         while (it.hasNext()) {
208             TextNode text = it.next();
209             assertNotNull(text);
210             trackSeen(text, seen);
211         }
212 
213         assertEquals("One;Two;Three;Four;", seen.toString());
214         assertContents(doc, "#root;html;head;body;div#1;p;One;p;Two;div#2;p;Three;p;Four;");
215     }
216 
canModifyFilteredElements()217     @Test void canModifyFilteredElements() {
218         Document doc = Jsoup.parse(html);
219         NodeIterator<Element> it = new NodeIterator<>(doc, Element.class);
220 
221         StringBuilder seen = new StringBuilder();
222         while (it.hasNext()) {
223             Element el = it.next();
224             if (!el.ownText().isEmpty())
225                 el.text(el.ownText() + "++");
226             trackSeen(el, seen);
227         }
228 
229         assertEquals("#root;html;head;body;div#1;p;p;div#2;p;p;", seen.toString());
230         assertContents(doc, "#root;html;head;body;div#1;p;One++;p;Two++;div#2;p;Three++;p;Four++;");
231     }
232 
assertIterates(NodeIterator<T> it, String expected)233     static <T extends Node> void assertIterates(NodeIterator<T> it, String expected) {
234         Node previous = null;
235         StringBuilder actual = new StringBuilder();
236         while (it.hasNext()) {
237             Node node = it.next();
238             assertNotNull(node);
239             assertNotSame(previous, node);
240 
241             trackSeen(node, actual);
242             previous = node;
243         }
244         assertEquals(expected, actual.toString());
245     }
246 
assertContents(Element el, String expected)247     static void assertContents(Element el, String expected) {
248         NodeIterator<Node> it = NodeIterator.from(el);
249         assertIterates(it, expected);
250     }
251 
trackSeen(Node node, StringBuilder actual)252     static void trackSeen(Node node, StringBuilder actual) {
253         if (node instanceof Element) {
254             Element el = (Element) node;
255             actual.append(el.tagName());
256             if (el.hasAttr("id"))
257                 actual.append("#").append(el.id());
258         }
259         else if (node instanceof TextNode)
260             actual.append(((TextNode) node).text());
261         else
262             actual.append(node.nodeName());
263         actual.append(";");
264     }
265 
266 }