• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.jsoup.integration;
2 
3 import org.jsoup.Connection;
4 import org.jsoup.Jsoup;
5 import org.jsoup.helper.DataUtil;
6 import org.jsoup.integration.servlets.FileServlet;
7 import org.jsoup.integration.servlets.SlowRider;
8 import org.jsoup.nodes.Document;
9 import org.jsoup.nodes.Element;
10 import org.junit.jupiter.api.Test;
11 
12 import java.io.BufferedInputStream;
13 import java.io.IOException;
14 import java.net.SocketTimeoutException;
15 import java.nio.ByteBuffer;
16 import java.nio.charset.StandardCharsets;
17 import java.util.concurrent.atomic.AtomicBoolean;
18 
19 import static org.junit.jupiter.api.Assertions.*;
20 
21 /**
22  * Failsafe integration tests for Connect methods. These take a bit longer to run, so included as Integ, not Unit, tests.
23  */
24 public class ConnectIT {
25     // Slow Rider tests.
26     @Test
canInterruptBodyStringRead()27     public void canInterruptBodyStringRead() throws InterruptedException {
28         // todo - implement in interruptable channels, so it's immediate
29         final String[] body = new String[1];
30         Thread runner = new Thread(() -> {
31             try {
32                 Connection.Response res = Jsoup.connect(SlowRider.Url)
33                     .timeout(15 * 1000)
34                     .execute();
35                 body[0] = res.body();
36             } catch (IOException e) {
37                 throw new RuntimeException(e);
38             }
39 
40         });
41 
42         runner.start();
43         Thread.sleep(1000 * 3);
44         runner.interrupt();
45         assertTrue(runner.isInterrupted());
46         runner.join();
47 
48         assertTrue(body[0].length() > 0);
49         assertTrue(body[0].contains("<p>Are you still there?"));
50     }
51 
52     @Test
canInterruptDocumentRead()53     public void canInterruptDocumentRead() throws InterruptedException {
54         // todo - implement in interruptable channels, so it's immediate
55         final String[] body = new String[1];
56         Thread runner = new Thread(() -> {
57             try {
58                 Connection.Response res = Jsoup.connect(SlowRider.Url)
59                     .timeout(15 * 1000)
60                     .execute();
61                 body[0] = res.parse().text();
62             } catch (IOException e) {
63                 throw new RuntimeException(e);
64             }
65 
66         });
67 
68         runner.start();
69         Thread.sleep(1000 * 3);
70         runner.interrupt();
71         assertTrue(runner.isInterrupted());
72         runner.join();
73 
74         assertEquals(0, body[0].length()); // doesn't read a failed doc
75     }
76 
canInterruptThenJoinASpawnedThread()77     @Test public void canInterruptThenJoinASpawnedThread() throws InterruptedException {
78         // https://github.com/jhy/jsoup/issues/1991
79         AtomicBoolean ioException = new AtomicBoolean();
80         Thread runner = new Thread(() -> {
81             try {
82                 while (!Thread.currentThread().isInterrupted()) {
83                     Document doc  = Jsoup.connect(SlowRider.Url)
84                         .timeout(30000)
85                         .get();
86                 }
87             } catch (IOException e) {
88                 ioException.set(true); // don't expect to catch, because the outer sleep will complete before this timeout
89             }
90         });
91 
92         runner.start();
93         Thread.sleep(2 * 1000);
94         runner.interrupt();
95         runner.join();
96         assertFalse(ioException.get());
97     }
98 
99     @Test
totalTimeout()100     public void totalTimeout() throws IOException {
101         int timeout = 3 * 1000;
102         long start = System.currentTimeMillis();
103         boolean threw = false;
104         try {
105             Jsoup.connect(SlowRider.Url).timeout(timeout).get();
106         } catch (SocketTimeoutException e) {
107             long end = System.currentTimeMillis();
108             long took = end - start;
109             assertTrue(took > timeout, ("Time taken was " + took));
110             assertTrue(took < timeout * 1.8, ("Time taken was " + took));
111             threw = true;
112         }
113 
114         assertTrue(threw);
115     }
116 
117     @Test
slowReadOk()118     public void slowReadOk() throws IOException {
119         // make sure that a slow read that is under the request timeout is still OK
120         Document doc = Jsoup.connect(SlowRider.Url)
121             .data(SlowRider.MaxTimeParam, "2000") // the request completes in 2 seconds
122             .get();
123 
124         Element h1 = doc.selectFirst("h1");
125         assertEquals("outatime", h1.text());
126     }
127 
128     @Test
infiniteReadSupported()129     public void infiniteReadSupported() throws IOException {
130         Document doc = Jsoup.connect(SlowRider.Url)
131             .timeout(0)
132             .data(SlowRider.MaxTimeParam, "2000")
133             .get();
134 
135         Element h1 = doc.selectFirst("h1");
136         assertEquals("outatime", h1.text());
137     }
138 
139     @Test
remainingAfterFirstRead()140     public void remainingAfterFirstRead() throws IOException {
141         int bufferSize = 5 * 1024;
142         int capSize = 100 * 1024;
143 
144         String url = FileServlet.urlTo("/htmltests/large.html"); // 280 K
145 
146         try (BufferedInputStream stream = Jsoup.connect(url).maxBodySize(capSize)
147             .execute().bodyStream()) {
148 
149             // simulates parse which does a limited read first
150             stream.mark(bufferSize);
151             ByteBuffer firstBytes = DataUtil.readToByteBuffer(stream, bufferSize);
152 
153             byte[] array = firstBytes.array();
154             String firstText = new String(array, StandardCharsets.UTF_8);
155             assertTrue(firstText.startsWith("<html><head><title>Large"));
156             assertEquals(bufferSize, array.length);
157 
158             boolean fullyRead = stream.read() == -1;
159             assertFalse(fullyRead);
160 
161             // reset and read again
162             stream.reset();
163             ByteBuffer fullRead = DataUtil.readToByteBuffer(stream, 0);
164             byte[] fullArray = fullRead.array();
165 
166             // bodyStream is not capped to body size - only for jsoup consumed stream
167             assertTrue(fullArray.length > capSize);
168 
169             assertEquals(280735, fullArray.length);
170             String fullText = new String(fullArray, StandardCharsets.UTF_8);
171             assertTrue(fullText.startsWith(firstText));
172         }
173     }
174 
175     @Test
noLimitAfterFirstRead()176     public void noLimitAfterFirstRead() throws IOException {
177         int bufferSize = 5 * 1024;
178 
179         String url = FileServlet.urlTo("/htmltests/large.html"); // 280 K
180         try (BufferedInputStream stream = Jsoup.connect(url).execute().bodyStream()) {
181             // simulates parse which does a limited read first
182             stream.mark(bufferSize);
183             ByteBuffer firstBytes = DataUtil.readToByteBuffer(stream, bufferSize);
184             byte[] array = firstBytes.array();
185             String firstText = new String(array, StandardCharsets.UTF_8);
186             assertTrue(firstText.startsWith("<html><head><title>Large"));
187             assertEquals(bufferSize, array.length);
188 
189             // reset and read fully
190             stream.reset();
191             ByteBuffer fullRead = DataUtil.readToByteBuffer(stream, 0);
192             byte[] fullArray = fullRead.array();
193             assertEquals(280735, fullArray.length);
194             String fullText = new String(fullArray, StandardCharsets.UTF_8);
195             assertTrue(fullText.startsWith(firstText));
196         }
197     }
198 
bodyStreamConstrainedViaBufferUp()199     @Test public void bodyStreamConstrainedViaBufferUp() throws IOException {
200         int cap = 5 * 1024;
201         String url = FileServlet.urlTo("/htmltests/large.html"); // 280 K
202         try (BufferedInputStream stream = Jsoup
203             .connect(url)
204             .maxBodySize(cap)
205             .execute()
206             .bufferUp()
207             .bodyStream()) {
208 
209             ByteBuffer cappedRead = DataUtil.readToByteBuffer(stream, 0);
210             byte[] cappedArray = cappedRead.array();
211             assertEquals(cap, cappedArray.length);
212         }
213     }
214 }
215