1 package org.jsoup.integration; 2 3 import org.jsoup.Connection; 4 import org.jsoup.Jsoup; 5 import org.jsoup.helper.DataUtil; 6 import org.jsoup.integration.servlets.FileServlet; 7 import org.jsoup.integration.servlets.SlowRider; 8 import org.jsoup.nodes.Document; 9 import org.jsoup.nodes.Element; 10 import org.junit.jupiter.api.Test; 11 12 import java.io.BufferedInputStream; 13 import java.io.IOException; 14 import java.net.SocketTimeoutException; 15 import java.nio.ByteBuffer; 16 import java.nio.charset.StandardCharsets; 17 import java.util.concurrent.atomic.AtomicBoolean; 18 19 import static org.junit.jupiter.api.Assertions.*; 20 21 /** 22 * Failsafe integration tests for Connect methods. These take a bit longer to run, so included as Integ, not Unit, tests. 23 */ 24 public class ConnectIT { 25 // Slow Rider tests. 26 @Test canInterruptBodyStringRead()27 public void canInterruptBodyStringRead() throws InterruptedException { 28 // todo - implement in interruptable channels, so it's immediate 29 final String[] body = new String[1]; 30 Thread runner = new Thread(() -> { 31 try { 32 Connection.Response res = Jsoup.connect(SlowRider.Url) 33 .timeout(15 * 1000) 34 .execute(); 35 body[0] = res.body(); 36 } catch (IOException e) { 37 throw new RuntimeException(e); 38 } 39 40 }); 41 42 runner.start(); 43 Thread.sleep(1000 * 3); 44 runner.interrupt(); 45 assertTrue(runner.isInterrupted()); 46 runner.join(); 47 48 assertTrue(body[0].length() > 0); 49 assertTrue(body[0].contains("<p>Are you still there?")); 50 } 51 52 @Test canInterruptDocumentRead()53 public void canInterruptDocumentRead() throws InterruptedException { 54 // todo - implement in interruptable channels, so it's immediate 55 final String[] body = new String[1]; 56 Thread runner = new Thread(() -> { 57 try { 58 Connection.Response res = Jsoup.connect(SlowRider.Url) 59 .timeout(15 * 1000) 60 .execute(); 61 body[0] = res.parse().text(); 62 } catch (IOException e) { 63 throw new RuntimeException(e); 64 } 65 66 }); 67 68 runner.start(); 69 Thread.sleep(1000 * 3); 70 runner.interrupt(); 71 assertTrue(runner.isInterrupted()); 72 runner.join(); 73 74 assertEquals(0, body[0].length()); // doesn't read a failed doc 75 } 76 canInterruptThenJoinASpawnedThread()77 @Test public void canInterruptThenJoinASpawnedThread() throws InterruptedException { 78 // https://github.com/jhy/jsoup/issues/1991 79 AtomicBoolean ioException = new AtomicBoolean(); 80 Thread runner = new Thread(() -> { 81 try { 82 while (!Thread.currentThread().isInterrupted()) { 83 Document doc = Jsoup.connect(SlowRider.Url) 84 .timeout(30000) 85 .get(); 86 } 87 } catch (IOException e) { 88 ioException.set(true); // don't expect to catch, because the outer sleep will complete before this timeout 89 } 90 }); 91 92 runner.start(); 93 Thread.sleep(2 * 1000); 94 runner.interrupt(); 95 runner.join(); 96 assertFalse(ioException.get()); 97 } 98 99 @Test totalTimeout()100 public void totalTimeout() throws IOException { 101 int timeout = 3 * 1000; 102 long start = System.currentTimeMillis(); 103 boolean threw = false; 104 try { 105 Jsoup.connect(SlowRider.Url).timeout(timeout).get(); 106 } catch (SocketTimeoutException e) { 107 long end = System.currentTimeMillis(); 108 long took = end - start; 109 assertTrue(took > timeout, ("Time taken was " + took)); 110 assertTrue(took < timeout * 1.8, ("Time taken was " + took)); 111 threw = true; 112 } 113 114 assertTrue(threw); 115 } 116 117 @Test slowReadOk()118 public void slowReadOk() throws IOException { 119 // make sure that a slow read that is under the request timeout is still OK 120 Document doc = Jsoup.connect(SlowRider.Url) 121 .data(SlowRider.MaxTimeParam, "2000") // the request completes in 2 seconds 122 .get(); 123 124 Element h1 = doc.selectFirst("h1"); 125 assertEquals("outatime", h1.text()); 126 } 127 128 @Test infiniteReadSupported()129 public void infiniteReadSupported() throws IOException { 130 Document doc = Jsoup.connect(SlowRider.Url) 131 .timeout(0) 132 .data(SlowRider.MaxTimeParam, "2000") 133 .get(); 134 135 Element h1 = doc.selectFirst("h1"); 136 assertEquals("outatime", h1.text()); 137 } 138 139 @Test remainingAfterFirstRead()140 public void remainingAfterFirstRead() throws IOException { 141 int bufferSize = 5 * 1024; 142 int capSize = 100 * 1024; 143 144 String url = FileServlet.urlTo("/htmltests/large.html"); // 280 K 145 146 try (BufferedInputStream stream = Jsoup.connect(url).maxBodySize(capSize) 147 .execute().bodyStream()) { 148 149 // simulates parse which does a limited read first 150 stream.mark(bufferSize); 151 ByteBuffer firstBytes = DataUtil.readToByteBuffer(stream, bufferSize); 152 153 byte[] array = firstBytes.array(); 154 String firstText = new String(array, StandardCharsets.UTF_8); 155 assertTrue(firstText.startsWith("<html><head><title>Large")); 156 assertEquals(bufferSize, array.length); 157 158 boolean fullyRead = stream.read() == -1; 159 assertFalse(fullyRead); 160 161 // reset and read again 162 stream.reset(); 163 ByteBuffer fullRead = DataUtil.readToByteBuffer(stream, 0); 164 byte[] fullArray = fullRead.array(); 165 166 // bodyStream is not capped to body size - only for jsoup consumed stream 167 assertTrue(fullArray.length > capSize); 168 169 assertEquals(280735, fullArray.length); 170 String fullText = new String(fullArray, StandardCharsets.UTF_8); 171 assertTrue(fullText.startsWith(firstText)); 172 } 173 } 174 175 @Test noLimitAfterFirstRead()176 public void noLimitAfterFirstRead() throws IOException { 177 int bufferSize = 5 * 1024; 178 179 String url = FileServlet.urlTo("/htmltests/large.html"); // 280 K 180 try (BufferedInputStream stream = Jsoup.connect(url).execute().bodyStream()) { 181 // simulates parse which does a limited read first 182 stream.mark(bufferSize); 183 ByteBuffer firstBytes = DataUtil.readToByteBuffer(stream, bufferSize); 184 byte[] array = firstBytes.array(); 185 String firstText = new String(array, StandardCharsets.UTF_8); 186 assertTrue(firstText.startsWith("<html><head><title>Large")); 187 assertEquals(bufferSize, array.length); 188 189 // reset and read fully 190 stream.reset(); 191 ByteBuffer fullRead = DataUtil.readToByteBuffer(stream, 0); 192 byte[] fullArray = fullRead.array(); 193 assertEquals(280735, fullArray.length); 194 String fullText = new String(fullArray, StandardCharsets.UTF_8); 195 assertTrue(fullText.startsWith(firstText)); 196 } 197 } 198 bodyStreamConstrainedViaBufferUp()199 @Test public void bodyStreamConstrainedViaBufferUp() throws IOException { 200 int cap = 5 * 1024; 201 String url = FileServlet.urlTo("/htmltests/large.html"); // 280 K 202 try (BufferedInputStream stream = Jsoup 203 .connect(url) 204 .maxBodySize(cap) 205 .execute() 206 .bufferUp() 207 .bodyStream()) { 208 209 ByteBuffer cappedRead = DataUtil.readToByteBuffer(stream, 0); 210 byte[] cappedArray = cappedRead.array(); 211 assertEquals(cap, cappedArray.length); 212 } 213 } 214 } 215