• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 package org.jsoup.helper;
2 
3 import org.jsoup.Connection;
4 import org.jsoup.HttpStatusException;
5 import org.jsoup.UncheckedIOException;
6 import org.jsoup.UnsupportedMimeTypeException;
7 import org.jsoup.internal.ControllableInputStream;
8 import org.jsoup.internal.SharedConstants;
9 import org.jsoup.internal.StringUtil;
10 import org.jsoup.nodes.Document;
11 import org.jsoup.parser.Parser;
12 import org.jsoup.parser.TokenQueue;
13 import org.jspecify.annotations.Nullable;
14 
15 import javax.net.ssl.HttpsURLConnection;
16 import javax.net.ssl.SSLSocketFactory;
17 import java.io.BufferedInputStream;
18 import java.io.BufferedWriter;
19 import java.io.ByteArrayInputStream;
20 import java.io.IOException;
21 import java.io.InputStream;
22 import java.io.OutputStream;
23 import java.io.OutputStreamWriter;
24 import java.net.CookieManager;
25 import java.net.CookieStore;
26 import java.net.HttpURLConnection;
27 import java.net.InetSocketAddress;
28 import java.net.MalformedURLException;
29 import java.net.Proxy;
30 import java.net.URL;
31 import java.net.URLEncoder;
32 import java.nio.Buffer;
33 import java.nio.ByteBuffer;
34 import java.nio.charset.Charset;
35 import java.nio.charset.IllegalCharsetNameException;
36 import java.util.ArrayList;
37 import java.util.Collection;
38 import java.util.Collections;
39 import java.util.LinkedHashMap;
40 import java.util.List;
41 import java.util.Map;
42 import java.util.regex.Pattern;
43 import java.util.zip.GZIPInputStream;
44 import java.util.zip.Inflater;
45 import java.util.zip.InflaterInputStream;
46 
47 import static org.jsoup.Connection.Method.HEAD;
48 import static org.jsoup.helper.DataUtil.UTF_8;
49 import static org.jsoup.internal.Normalizer.lowerCase;
50 
51 /**
52  * Implementation of {@link Connection}.
53  * @see org.jsoup.Jsoup#connect(String)
54  */
55 @SuppressWarnings("CharsetObjectCanBeUsed")
56 public class HttpConnection implements Connection {
57     public static final String CONTENT_ENCODING = "Content-Encoding";
58     /**
59      * Many users would get caught by not setting a user-agent and therefore getting different responses on their desktop
60      * vs in jsoup, which would otherwise default to {@code Java}. So by default, use a desktop UA.
61      */
62     public static final String DEFAULT_UA =
63         "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36";
64     private static final String USER_AGENT = "User-Agent";
65     public static final String CONTENT_TYPE = "Content-Type";
66     public static final String MULTIPART_FORM_DATA = "multipart/form-data";
67     public static final String FORM_URL_ENCODED = "application/x-www-form-urlencoded";
68     private static final int HTTP_TEMP_REDIR = 307; // http/1.1 temporary redirect, not in Java's set.
69     private static final String DefaultUploadType = "application/octet-stream";
70     private static final Charset ISO_8859_1 = Charset.forName("ISO-8859-1");
71 
72     /**
73      Create a new Connection, with the request URL specified.
74      @param url the URL to fetch from
75      @return a new Connection object
76      */
connect(String url)77     public static Connection connect(String url) {
78         Connection con = new HttpConnection();
79         con.url(url);
80         return con;
81     }
82 
83     /**
84      Create a new Connection, with the request URL specified.
85      @param url the URL to fetch from
86      @return a new Connection object
87      */
connect(URL url)88     public static Connection connect(URL url) {
89         Connection con = new HttpConnection();
90         con.url(url);
91         return con;
92     }
93 
94     /**
95      Create a new, empty HttpConnection.
96      */
HttpConnection()97     public HttpConnection() {
98         req = new Request();
99     }
100 
101     /**
102      Create a new Request by deep-copying an existing Request. Note that the data and body of the original are not
103      copied. All other settings (proxy, parser, cookies, etc) are copied.
104      @param copy the request to copy
105      */
HttpConnection(Request copy)106     HttpConnection(Request copy) {
107         req = new Request(copy);
108     }
109 
encodeMimeName(String val)110     private static String encodeMimeName(String val) {
111         return val.replace("\"", "%22");
112     }
113 
114     private HttpConnection.Request req;
115     private Connection.@Nullable Response res;
116 
117     @Override
newRequest()118     public Connection newRequest() {
119         // copy the prototype request for the different settings, cookie manager, etc
120         return new HttpConnection(req);
121     }
122 
123     /** Create a new Connection that just wraps the provided Request and Response */
HttpConnection(Request req, Response res)124     private HttpConnection(Request req, Response res) {
125         this.req = req;
126         this.res = res;
127     }
128 
129     @Override
url(URL url)130     public Connection url(URL url) {
131         req.url(url);
132         return this;
133     }
134 
135     @Override
url(String url)136     public Connection url(String url) {
137         Validate.notEmptyParam(url, "url");
138         try {
139             req.url(new URL(url));
140         } catch (MalformedURLException e) {
141             throw new IllegalArgumentException(String.format("The supplied URL, '%s', is malformed. Make sure it is an absolute URL, and starts with 'http://' or 'https://'. See https://jsoup.org/cookbook/extracting-data/working-with-urls", url), e);
142         }
143         return this;
144     }
145 
146     @Override
proxy(@ullable Proxy proxy)147     public Connection proxy(@Nullable Proxy proxy) {
148         req.proxy(proxy);
149         return this;
150     }
151 
152     @Override
proxy(String host, int port)153     public Connection proxy(String host, int port) {
154         req.proxy(host, port);
155         return this;
156     }
157 
158     @Override
userAgent(String userAgent)159     public Connection userAgent(String userAgent) {
160         Validate.notNullParam(userAgent, "userAgent");
161         req.header(USER_AGENT, userAgent);
162         return this;
163     }
164 
165     @Override
timeout(int millis)166     public Connection timeout(int millis) {
167         req.timeout(millis);
168         return this;
169     }
170 
171     @Override
maxBodySize(int bytes)172     public Connection maxBodySize(int bytes) {
173         req.maxBodySize(bytes);
174         return this;
175     }
176 
177     @Override
followRedirects(boolean followRedirects)178     public Connection followRedirects(boolean followRedirects) {
179         req.followRedirects(followRedirects);
180         return this;
181     }
182 
183     @Override
referrer(String referrer)184     public Connection referrer(String referrer) {
185         Validate.notNullParam(referrer, "referrer");
186         req.header("Referer", referrer);
187         return this;
188     }
189 
190     @Override
method(Method method)191     public Connection method(Method method) {
192         req.method(method);
193         return this;
194     }
195 
196     @Override
ignoreHttpErrors(boolean ignoreHttpErrors)197     public Connection ignoreHttpErrors(boolean ignoreHttpErrors) {
198 		req.ignoreHttpErrors(ignoreHttpErrors);
199 		return this;
200 	}
201 
202     @Override
ignoreContentType(boolean ignoreContentType)203     public Connection ignoreContentType(boolean ignoreContentType) {
204         req.ignoreContentType(ignoreContentType);
205         return this;
206     }
207 
208     @Override
data(String key, String value)209     public Connection data(String key, String value) {
210         req.data(KeyVal.create(key, value));
211         return this;
212     }
213 
214     @Override
sslSocketFactory(SSLSocketFactory sslSocketFactory)215     public Connection sslSocketFactory(SSLSocketFactory sslSocketFactory) {
216 	    req.sslSocketFactory(sslSocketFactory);
217 	    return this;
218     }
219 
220     @Override
data(String key, String filename, InputStream inputStream)221     public Connection data(String key, String filename, InputStream inputStream) {
222         req.data(KeyVal.create(key, filename, inputStream));
223         return this;
224     }
225 
226     @Override
data(String key, String filename, InputStream inputStream, String contentType)227     public Connection data(String key, String filename, InputStream inputStream, String contentType) {
228         req.data(KeyVal.create(key, filename, inputStream).contentType(contentType));
229         return this;
230     }
231 
232     @Override
data(Map<String, String> data)233     public Connection data(Map<String, String> data) {
234         Validate.notNullParam(data, "data");
235         for (Map.Entry<String, String> entry : data.entrySet()) {
236             req.data(KeyVal.create(entry.getKey(), entry.getValue()));
237         }
238         return this;
239     }
240 
241     @Override
data(String... keyvals)242     public Connection data(String... keyvals) {
243         Validate.notNullParam(keyvals, "keyvals");
244         Validate.isTrue(keyvals.length %2 == 0, "Must supply an even number of key value pairs");
245         for (int i = 0; i < keyvals.length; i += 2) {
246             String key = keyvals[i];
247             String value = keyvals[i+1];
248             Validate.notEmpty(key, "Data key must not be empty");
249             Validate.notNull(value, "Data value must not be null");
250             req.data(KeyVal.create(key, value));
251         }
252         return this;
253     }
254 
255     @Override
data(Collection<Connection.KeyVal> data)256     public Connection data(Collection<Connection.KeyVal> data) {
257         Validate.notNullParam(data, "data");
258         for (Connection.KeyVal entry: data) {
259             req.data(entry);
260         }
261         return this;
262     }
263 
264     @Override
data(String key)265     public Connection.KeyVal data(String key) {
266         Validate.notEmptyParam(key, "key");
267         for (Connection.KeyVal keyVal : request().data()) {
268             if (keyVal.key().equals(key))
269                 return keyVal;
270         }
271         return null;
272     }
273 
274     @Override
requestBody(String body)275     public Connection requestBody(String body) {
276         req.requestBody(body);
277         return this;
278     }
279 
280     @Override
header(String name, String value)281     public Connection header(String name, String value) {
282         req.header(name, value);
283         return this;
284     }
285 
286     @Override
headers(Map<String,String> headers)287     public Connection headers(Map<String,String> headers) {
288         Validate.notNullParam(headers, "headers");
289         for (Map.Entry<String,String> entry : headers.entrySet()) {
290             req.header(entry.getKey(),entry.getValue());
291         }
292         return this;
293     }
294 
295     @Override
cookie(String name, String value)296     public Connection cookie(String name, String value) {
297         req.cookie(name, value);
298         return this;
299     }
300 
301     @Override
cookies(Map<String, String> cookies)302     public Connection cookies(Map<String, String> cookies) {
303         Validate.notNullParam(cookies, "cookies");
304         for (Map.Entry<String, String> entry : cookies.entrySet()) {
305             req.cookie(entry.getKey(), entry.getValue());
306         }
307         return this;
308     }
309 
310     @Override
cookieStore(CookieStore cookieStore)311     public Connection cookieStore(CookieStore cookieStore) {
312         // create a new cookie manager using the new store
313         req.cookieManager = new CookieManager(cookieStore, null);
314         return this;
315     }
316 
317     @Override
cookieStore()318     public CookieStore cookieStore() {
319         return req.cookieManager.getCookieStore();
320     }
321 
322     @Override
parser(Parser parser)323     public Connection parser(Parser parser) {
324         req.parser(parser);
325         return this;
326     }
327 
328     @Override
get()329     public Document get() throws IOException {
330         req.method(Method.GET);
331         execute();
332         Validate.notNull(res);
333         return res.parse();
334     }
335 
336     @Override
post()337     public Document post() throws IOException {
338         req.method(Method.POST);
339         execute();
340         Validate.notNull(res);
341         return res.parse();
342     }
343 
344     @Override
execute()345     public Connection.Response execute() throws IOException {
346         res = Response.execute(req);
347         return res;
348     }
349 
350     @Override
request()351     public Connection.Request request() {
352         return req;
353     }
354 
355     @Override
request(Connection.Request request)356     public Connection request(Connection.Request request) {
357         req = (HttpConnection.Request) request; // will throw a class-cast exception if the user has extended some but not all of Connection; that's desired
358         return this;
359     }
360 
361     @Override
response()362     public Connection.Response response() {
363         if (res == null) {
364             throw new IllegalArgumentException("You must execute the request before getting a response.");
365         }
366         return res;
367     }
368 
369     @Override
response(Connection.Response response)370     public Connection response(Connection.Response response) {
371         res = response;
372         return this;
373     }
374 
375     @Override
postDataCharset(String charset)376     public Connection postDataCharset(String charset) {
377         req.postDataCharset(charset);
378         return this;
379     }
380 
auth(RequestAuthenticator authenticator)381     @Override public Connection auth(RequestAuthenticator authenticator) {
382         req.auth(authenticator);
383         return this;
384     }
385 
386     @SuppressWarnings("unchecked")
387     private static abstract class Base<T extends Connection.Base<T>> implements Connection.Base<T> {
388         private static final URL UnsetUrl; // only used if you created a new Request()
389         static {
390             try {
391                 UnsetUrl = new URL("http://undefined/");
392             } catch (MalformedURLException e) {
393                 throw new IllegalStateException(e);
394             }
395         }
396 
397         URL url = UnsetUrl;
398         Method method = Method.GET;
399         Map<String, List<String>> headers;
400         Map<String, String> cookies;
401 
Base()402         private Base() {
403             headers = new LinkedHashMap<>();
404             cookies = new LinkedHashMap<>();
405         }
406 
Base(Base<T> copy)407         private Base(Base<T> copy) {
408             url = copy.url; // unmodifiable object
409             method = copy.method;
410             headers = new LinkedHashMap<>();
411             for (Map.Entry<String, List<String>> entry : copy.headers.entrySet()) {
412                 headers.put(entry.getKey(), new ArrayList<>(entry.getValue()));
413             }
414             cookies = new LinkedHashMap<>(); cookies.putAll(copy.cookies); // just holds strings
415         }
416 
417         @Override
url()418         public URL url() {
419             if (url == UnsetUrl)
420                 throw new IllegalArgumentException("URL not set. Make sure to call #url(...) before executing the request.");
421             return url;
422         }
423 
424         @Override
url(URL url)425         public T url(URL url) {
426             Validate.notNullParam(url, "url");
427             this.url = new UrlBuilder(url).build();
428             return (T) this;
429         }
430 
431         @Override
method()432         public Method method() {
433             return method;
434         }
435 
436         @Override
method(Method method)437         public T method(Method method) {
438             Validate.notNullParam(method, "method");
439             this.method = method;
440             return (T) this;
441         }
442 
443         @Override
header(String name)444         public String header(String name) {
445             Validate.notNullParam(name, "name");
446             List<String> vals = getHeadersCaseInsensitive(name);
447             if (vals.size() > 0) {
448                 // https://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2
449                 return StringUtil.join(vals, ", ");
450             }
451 
452             return null;
453         }
454 
455         @Override
addHeader(String name, @Nullable String value)456         public T addHeader(String name, @Nullable String value) {
457             Validate.notEmptyParam(name, "name");
458             //noinspection ConstantConditions
459             value = value == null ? "" : value;
460 
461             List<String> values = headers(name);
462             if (values.isEmpty()) {
463                 values = new ArrayList<>();
464                 headers.put(name, values);
465             }
466             values.add(value);
467 
468             return (T) this;
469         }
470 
471         @Override
headers(String name)472         public List<String> headers(String name) {
473             Validate.notEmptyParam(name, "name");
474             return getHeadersCaseInsensitive(name);
475         }
476 
477         @Override
header(String name, String value)478         public T header(String name, String value) {
479             Validate.notEmptyParam(name, "name");
480             removeHeader(name); // ensures we don't get an "accept-encoding" and a "Accept-Encoding"
481             addHeader(name, value);
482             return (T) this;
483         }
484 
485         @Override
hasHeader(String name)486         public boolean hasHeader(String name) {
487             Validate.notEmptyParam(name, "name");
488             return !getHeadersCaseInsensitive(name).isEmpty();
489         }
490 
491         /**
492          * Test if the request has a header with this value (case insensitive).
493          */
494         @Override
hasHeaderWithValue(String name, String value)495         public boolean hasHeaderWithValue(String name, String value) {
496             Validate.notEmpty(name);
497             Validate.notEmpty(value);
498             List<String> values = headers(name);
499             for (String candidate : values) {
500                 if (value.equalsIgnoreCase(candidate))
501                     return true;
502             }
503             return false;
504         }
505 
506         @Override
removeHeader(String name)507         public T removeHeader(String name) {
508             Validate.notEmptyParam(name, "name");
509             Map.Entry<String, List<String>> entry = scanHeaders(name); // remove is case-insensitive too
510             if (entry != null)
511                 headers.remove(entry.getKey()); // ensures correct case
512             return (T) this;
513         }
514 
515         @Override
headers()516         public Map<String, String> headers() {
517             LinkedHashMap<String, String> map = new LinkedHashMap<>(headers.size());
518             for (Map.Entry<String, List<String>> entry : headers.entrySet()) {
519                 String header = entry.getKey();
520                 List<String> values = entry.getValue();
521                 if (values.size() > 0)
522                     map.put(header, values.get(0));
523             }
524             return map;
525         }
526 
527         @Override
multiHeaders()528         public Map<String, List<String>> multiHeaders() {
529             return headers;
530         }
531 
getHeadersCaseInsensitive(String name)532         private List<String> getHeadersCaseInsensitive(String name) {
533             Validate.notNull(name);
534 
535             for (Map.Entry<String, List<String>> entry : headers.entrySet()) {
536                 if (name.equalsIgnoreCase(entry.getKey()))
537                     return entry.getValue();
538             }
539 
540             return Collections.emptyList();
541         }
542 
scanHeaders(String name)543         private Map.@Nullable Entry<String, List<String>> scanHeaders(String name) {
544             String lc = lowerCase(name);
545             for (Map.Entry<String, List<String>> entry : headers.entrySet()) {
546                 if (lowerCase(entry.getKey()).equals(lc))
547                     return entry;
548             }
549             return null;
550         }
551 
552         @Override
cookie(String name)553         public String cookie(String name) {
554             Validate.notEmptyParam(name, "name");
555             return cookies.get(name);
556         }
557 
558         @Override
cookie(String name, String value)559         public T cookie(String name, String value) {
560             Validate.notEmptyParam(name, "name");
561             Validate.notNullParam(value, "value");
562             cookies.put(name, value);
563             return (T) this;
564         }
565 
566         @Override
hasCookie(String name)567         public boolean hasCookie(String name) {
568             Validate.notEmptyParam(name, "name");
569             return cookies.containsKey(name);
570         }
571 
572         @Override
removeCookie(String name)573         public T removeCookie(String name) {
574             Validate.notEmptyParam(name, "name");
575             cookies.remove(name);
576             return (T) this;
577         }
578 
579         @Override
cookies()580         public Map<String, String> cookies() {
581             return cookies;
582         }
583     }
584 
585     public static class Request extends HttpConnection.Base<Connection.Request> implements Connection.Request {
586         static {
587             System.setProperty("sun.net.http.allowRestrictedHeaders", "true");
588             // make sure that we can send Sec-Fetch-Site headers etc.
589         }
590 
591         private @Nullable Proxy proxy;
592         private int timeoutMilliseconds;
593         private int maxBodySizeBytes;
594         private boolean followRedirects;
595         private final Collection<Connection.KeyVal> data;
596         private @Nullable String body = null;
597         private boolean ignoreHttpErrors = false;
598         private boolean ignoreContentType = false;
599         private Parser parser;
600         private boolean parserDefined = false; // called parser(...) vs initialized in ctor
601         private String postDataCharset = DataUtil.defaultCharsetName;
602         private @Nullable SSLSocketFactory sslSocketFactory;
603         private CookieManager cookieManager;
604         private @Nullable RequestAuthenticator authenticator;
605         private volatile boolean executing = false;
606 
Request()607         Request() {
608             super();
609             timeoutMilliseconds = 30000; // 30 seconds
610             maxBodySizeBytes = 1024 * 1024 * 2; // 2MB
611             followRedirects = true;
612             data = new ArrayList<>();
613             method = Method.GET;
614             addHeader("Accept-Encoding", "gzip");
615             addHeader(USER_AGENT, DEFAULT_UA);
616             parser = Parser.htmlParser();
617             cookieManager = new CookieManager(); // creates a default InMemoryCookieStore
618         }
619 
Request(Request copy)620         Request(Request copy) {
621             super(copy);
622             proxy = copy.proxy;
623             postDataCharset = copy.postDataCharset;
624             timeoutMilliseconds = copy.timeoutMilliseconds;
625             maxBodySizeBytes = copy.maxBodySizeBytes;
626             followRedirects = copy.followRedirects;
627             data = new ArrayList<>(); // data not copied
628             //body not copied
629             ignoreHttpErrors = copy.ignoreHttpErrors;
630             ignoreContentType = copy.ignoreContentType;
631             parser = copy.parser.newInstance(); // parsers and their tree-builders maintain state, so need a fresh copy
632             parserDefined = copy.parserDefined;
633             sslSocketFactory = copy.sslSocketFactory; // these are all synchronized so safe to share
634             cookieManager = copy.cookieManager;
635             authenticator = copy.authenticator;
636             executing = false;
637         }
638 
639         @Override
proxy()640         public Proxy proxy() {
641             return proxy;
642         }
643 
644         @Override
proxy(@ullable Proxy proxy)645         public Request proxy(@Nullable Proxy proxy) {
646             this.proxy = proxy;
647             return this;
648         }
649 
650         @Override
proxy(String host, int port)651         public Request proxy(String host, int port) {
652             this.proxy = new Proxy(Proxy.Type.HTTP, InetSocketAddress.createUnresolved(host, port));
653             return this;
654         }
655 
656         @Override
timeout()657         public int timeout() {
658             return timeoutMilliseconds;
659         }
660 
661         @Override
timeout(int millis)662         public Request timeout(int millis) {
663             Validate.isTrue(millis >= 0, "Timeout milliseconds must be 0 (infinite) or greater");
664             timeoutMilliseconds = millis;
665             return this;
666         }
667 
668         @Override
maxBodySize()669         public int maxBodySize() {
670             return maxBodySizeBytes;
671         }
672 
673         @Override
maxBodySize(int bytes)674         public Connection.Request maxBodySize(int bytes) {
675             Validate.isTrue(bytes >= 0, "maxSize must be 0 (unlimited) or larger");
676             maxBodySizeBytes = bytes;
677             return this;
678         }
679 
680         @Override
followRedirects()681         public boolean followRedirects() {
682             return followRedirects;
683         }
684 
685         @Override
followRedirects(boolean followRedirects)686         public Connection.Request followRedirects(boolean followRedirects) {
687             this.followRedirects = followRedirects;
688             return this;
689         }
690 
691         @Override
ignoreHttpErrors()692         public boolean ignoreHttpErrors() {
693             return ignoreHttpErrors;
694         }
695 
696         @Override
sslSocketFactory()697         public SSLSocketFactory sslSocketFactory() {
698             return sslSocketFactory;
699         }
700 
701         @Override
sslSocketFactory(SSLSocketFactory sslSocketFactory)702         public void sslSocketFactory(SSLSocketFactory sslSocketFactory) {
703             this.sslSocketFactory = sslSocketFactory;
704         }
705 
706         @Override
ignoreHttpErrors(boolean ignoreHttpErrors)707         public Connection.Request ignoreHttpErrors(boolean ignoreHttpErrors) {
708             this.ignoreHttpErrors = ignoreHttpErrors;
709             return this;
710         }
711 
712         @Override
ignoreContentType()713         public boolean ignoreContentType() {
714             return ignoreContentType;
715         }
716 
717         @Override
ignoreContentType(boolean ignoreContentType)718         public Connection.Request ignoreContentType(boolean ignoreContentType) {
719             this.ignoreContentType = ignoreContentType;
720             return this;
721         }
722 
723         @Override
data(Connection.KeyVal keyval)724         public Request data(Connection.KeyVal keyval) {
725             Validate.notNullParam(keyval, "keyval");
726             data.add(keyval);
727             return this;
728         }
729 
730         @Override
data()731         public Collection<Connection.KeyVal> data() {
732             return data;
733         }
734 
735         @Override
requestBody(@ullable String body)736         public Connection.Request requestBody(@Nullable String body) {
737             this.body = body;
738             return this;
739         }
740 
741         @Override
requestBody()742         public String requestBody() {
743             return body;
744         }
745 
746         @Override
parser(Parser parser)747         public Request parser(Parser parser) {
748             this.parser = parser;
749             parserDefined = true;
750             return this;
751         }
752 
753         @Override
parser()754         public Parser parser() {
755             return parser;
756         }
757 
758         @Override
postDataCharset(String charset)759         public Connection.Request postDataCharset(String charset) {
760             Validate.notNullParam(charset, "charset");
761             if (!Charset.isSupported(charset)) throw new IllegalCharsetNameException(charset);
762             this.postDataCharset = charset;
763             return this;
764         }
765 
766         @Override
postDataCharset()767         public String postDataCharset() {
768             return postDataCharset;
769         }
770 
cookieManager()771         CookieManager cookieManager() {
772             return cookieManager;
773         }
774 
auth(@ullable RequestAuthenticator authenticator)775         @Override public Connection.Request auth(@Nullable RequestAuthenticator authenticator) {
776             this.authenticator = authenticator;
777             return this;
778         }
779 
auth()780         @Override @Nullable public RequestAuthenticator auth() {
781             return authenticator;
782         }
783     }
784 
785     public static class Response extends HttpConnection.Base<Connection.Response> implements Connection.Response {
786         private static final int MAX_REDIRECTS = 20;
787         private static final String LOCATION = "Location";
788         private final int statusCode;
789         private final String statusMessage;
790         private @Nullable ByteBuffer byteData;
791         private @Nullable ControllableInputStream bodyStream;
792         private @Nullable HttpURLConnection conn;
793         private @Nullable String charset;
794         private @Nullable final String contentType;
795         private boolean executed = false;
796         private boolean inputStreamRead = false;
797         private int numRedirects = 0;
798         private final HttpConnection.Request req;
799 
800         /*
801          * Matches XML content types (like text/xml, image/svg+xml, application/xhtml+xml;charset=UTF8, etc)
802          */
803         private static final Pattern xmlContentTypeRxp = Pattern.compile("(\\w+)/\\w*\\+?xml.*");
804 
805         /**
806          <b>Internal only! </b>Creates a dummy HttpConnection.Response, useful for testing. All actual responses
807          are created from the HttpURLConnection and fields defined.
808          */
Response()809         Response() {
810             super();
811             statusCode = 400;
812             statusMessage = "Request not made";
813             req = new Request();
814             contentType = null;
815         }
816 
execute(HttpConnection.Request req)817         static Response execute(HttpConnection.Request req) throws IOException {
818             return execute(req, null);
819         }
820 
execute(HttpConnection.Request req, @Nullable Response previousResponse)821         static Response execute(HttpConnection.Request req, @Nullable Response previousResponse) throws IOException {
822             synchronized (req) {
823                 Validate.isFalse(req.executing, "Multiple threads were detected trying to execute the same request concurrently. Make sure to use Connection#newRequest() and do not share an executing request between threads.");
824                 req.executing = true;
825             }
826             Validate.notNullParam(req, "req");
827             URL url = req.url();
828             Validate.notNull(url, "URL must be specified to connect");
829             String protocol = url.getProtocol();
830             if (!protocol.equals("http") && !protocol.equals("https"))
831                 throw new MalformedURLException("Only http & https protocols supported");
832             final boolean methodHasBody = req.method().hasBody();
833             final boolean hasRequestBody = req.requestBody() != null;
834             if (!methodHasBody)
835                 Validate.isFalse(hasRequestBody, "Cannot set a request body for HTTP method " + req.method());
836 
837             // set up the request for execution
838             String mimeBoundary = null;
839             if (req.data().size() > 0 && (!methodHasBody || hasRequestBody))
840                 serialiseRequestUrl(req);
841             else if (methodHasBody)
842                 mimeBoundary = setOutputContentType(req);
843 
844             long startTime = System.nanoTime();
845             HttpURLConnection conn = createConnection(req);
846             Response res = null;
847             try {
848                 conn.connect();
849                 if (conn.getDoOutput()) {
850                     OutputStream out = conn.getOutputStream();
851                     try { writePost(req, out, mimeBoundary); }
852                     catch (IOException e) { conn.disconnect(); throw e; }
853                     finally { out.close(); }
854                 }
855 
856                 int status = conn.getResponseCode();
857                 res = new Response(conn, req, previousResponse);
858 
859                 // redirect if there's a location header (from 3xx, or 201 etc)
860                 if (res.hasHeader(LOCATION) && req.followRedirects()) {
861                     if (status != HTTP_TEMP_REDIR) {
862                         req.method(Method.GET); // always redirect with a get. any data param from original req are dropped.
863                         req.data().clear();
864                         req.requestBody(null);
865                         req.removeHeader(CONTENT_TYPE);
866                     }
867 
868                     String location = res.header(LOCATION);
869                     Validate.notNull(location);
870                     if (location.startsWith("http:/") && location.charAt(6) != '/') // fix broken Location: http:/temp/AAG_New/en/index.php
871                         location = location.substring(6);
872                     URL redir = StringUtil.resolve(req.url(), location);
873                     req.url(redir);
874 
875                     req.executing = false;
876                     return execute(req, res);
877                 }
878                 if ((status < 200 || status >= 400) && !req.ignoreHttpErrors())
879                         throw new HttpStatusException("HTTP error fetching URL", status, req.url().toString());
880 
881                 // check that we can handle the returned content type; if not, abort before fetching it
882                 String contentType = res.contentType();
883                 if (contentType != null
884                         && !req.ignoreContentType()
885                         && !contentType.startsWith("text/")
886                         && !xmlContentTypeRxp.matcher(contentType).matches()
887                         )
888                     throw new UnsupportedMimeTypeException("Unhandled content type. Must be text/*, */xml, or */*+xml",
889                             contentType, req.url().toString());
890 
891                 // switch to the XML parser if content type is xml and not parser not explicitly set
892                 if (contentType != null && xmlContentTypeRxp.matcher(contentType).matches()) {
893                     if (!req.parserDefined) req.parser(Parser.xmlParser());
894                 }
895 
896                 res.charset = DataUtil.getCharsetFromContentType(res.contentType); // may be null, readInputStream deals with it
897                 if (conn.getContentLength() != 0 && req.method() != HEAD) { // -1 means unknown, chunked. sun throws an IO exception on 500 response with no content when trying to read body
898                     InputStream stream = conn.getErrorStream() != null ? conn.getErrorStream() : conn.getInputStream();
899                     if (res.hasHeaderWithValue(CONTENT_ENCODING, "gzip"))
900                         stream = new GZIPInputStream(stream);
901                     else if (res.hasHeaderWithValue(CONTENT_ENCODING, "deflate"))
902                         stream = new InflaterInputStream(stream, new Inflater(true));
903 
904                     res.bodyStream = ControllableInputStream.wrap(
905                         stream, SharedConstants.DefaultBufferSize, req.maxBodySize())
906                         .timeout(startTime, req.timeout());
907                 } else {
908                     res.byteData = DataUtil.emptyByteBuffer();
909                 }
910             } catch (IOException e) {
911                 if (res != null) res.safeClose(); // will be non-null if got to conn
912                 throw e;
913             } finally {
914                 req.executing = false;
915 
916                 // detach any thread local auth delegate
917                 if (req.authenticator != null)
918                     AuthenticationHandler.handler.remove();
919             }
920 
921             res.executed = true;
922             return res;
923         }
924 
925         @Override
statusCode()926         public int statusCode() {
927             return statusCode;
928         }
929 
930         @Override
statusMessage()931         public String statusMessage() {
932             return statusMessage;
933         }
934 
935         @Override
charset()936         public String charset() {
937             return charset;
938         }
939 
940         @Override
charset(String charset)941         public Response charset(String charset) {
942             this.charset = charset;
943             return this;
944         }
945 
946         @Override
contentType()947         public String contentType() {
948             return contentType;
949         }
950 
parse()951         public Document parse() throws IOException {
952             Validate.isTrue(executed, "Request must be executed (with .execute(), .get(), or .post() before parsing response");
953             InputStream stream = bodyStream;
954             if (byteData != null) { // bytes have been read in to the buffer, parse that
955                 stream = new ByteArrayInputStream(byteData.array());
956                 inputStreamRead = false; // ok to reparse if in bytes
957             }
958             Validate.isFalse(inputStreamRead, "Input stream already read and parsed, cannot re-read.");
959             Document doc = DataUtil.parseInputStream(stream, charset, url.toExternalForm(), req.parser());
960             doc.connection(new HttpConnection(req, this)); // because we're static, don't have the connection obj. // todo - maybe hold in the req?
961             charset = doc.outputSettings().charset().name(); // update charset from meta-equiv, possibly
962             inputStreamRead = true;
963             safeClose();
964             return doc;
965         }
966 
prepareByteData()967         private void prepareByteData() {
968             Validate.isTrue(executed, "Request must be executed (with .execute(), .get(), or .post() before getting response body");
969             if (bodyStream != null && byteData == null) {
970                 Validate.isFalse(inputStreamRead, "Request has already been read (with .parse())");
971                 try {
972                     byteData = DataUtil.readToByteBuffer(bodyStream, req.maxBodySize());
973                 } catch (IOException e) {
974                     throw new UncheckedIOException(e);
975                 } finally {
976                     inputStreamRead = true;
977                     safeClose();
978                 }
979             }
980         }
981 
982         @Override
body()983         public String body() {
984             prepareByteData();
985             Validate.notNull(byteData);
986             // charset gets set from header on execute, and from meta-equiv on parse. parse may not have happened yet
987             String body = (charset == null ? UTF_8 : Charset.forName(charset))
988                 .decode(byteData).toString();
989             ((Buffer)byteData).rewind(); // cast to avoid covariant return type change in jdk9
990             return body;
991         }
992 
993         @Override
bodyAsBytes()994         public byte[] bodyAsBytes() {
995             prepareByteData();
996             Validate.notNull(byteData);
997             return byteData.array();
998         }
999 
1000         @Override
bufferUp()1001         public Connection.Response bufferUp() {
1002             prepareByteData();
1003             return this;
1004         }
1005 
1006         @Override
bodyStream()1007         public BufferedInputStream bodyStream() {
1008             Validate.isTrue(executed, "Request must be executed (with .execute(), .get(), or .post() before getting response body");
1009 
1010             // if we have read to bytes (via buffer up), return those as a stream.
1011             if (byteData != null) {
1012                 return new BufferedInputStream(new ByteArrayInputStream(byteData.array()), SharedConstants.DefaultBufferSize);
1013             }
1014 
1015             Validate.isFalse(inputStreamRead, "Request has already been read");
1016             Validate.notNull(bodyStream);
1017             inputStreamRead = true;
1018             return bodyStream.inputStream();
1019         }
1020 
1021         // set up connection defaults, and details from request
createConnection(HttpConnection.Request req)1022         private static HttpURLConnection createConnection(HttpConnection.Request req) throws IOException {
1023             Proxy proxy = req.proxy();
1024             final HttpURLConnection conn = (HttpURLConnection) (
1025                 proxy == null ?
1026                 req.url().openConnection() :
1027                 req.url().openConnection(proxy)
1028             );
1029 
1030             conn.setRequestMethod(req.method().name());
1031             conn.setInstanceFollowRedirects(false); // don't rely on native redirection support
1032             conn.setConnectTimeout(req.timeout());
1033             conn.setReadTimeout(req.timeout() / 2); // gets reduced after connection is made and status is read
1034 
1035             if (req.sslSocketFactory() != null && conn instanceof HttpsURLConnection)
1036                 ((HttpsURLConnection) conn).setSSLSocketFactory(req.sslSocketFactory());
1037             if (req.authenticator != null)
1038                 AuthenticationHandler.handler.enable(req.authenticator, conn); // removed in finally
1039             if (req.method().hasBody())
1040                 conn.setDoOutput(true);
1041             CookieUtil.applyCookiesToRequest(req, conn); // from the Request key/val cookies and the Cookie Store
1042             for (Map.Entry<String, List<String>> header : req.multiHeaders().entrySet()) {
1043                 for (String value : header.getValue()) {
1044                     conn.addRequestProperty(header.getKey(), value);
1045                 }
1046             }
1047             return conn;
1048         }
1049 
1050         /**
1051          * Call on completion of stream read, to close the body (or error) stream. The connection.disconnect allows
1052          * keep-alives to work (as the underlying connection is actually held open, despite the name).
1053          */
safeClose()1054         private void safeClose() {
1055             if (bodyStream != null) {
1056                 try {
1057                     bodyStream.close();
1058                 } catch (IOException e) {
1059                     // no-op
1060                 } finally {
1061                     bodyStream = null;
1062                 }
1063             }
1064             if (conn != null) {
1065                 conn.disconnect();
1066                 conn = null;
1067             }
1068         }
1069 
1070         // set up url, method, header, cookies
Response(HttpURLConnection conn, HttpConnection.Request request, HttpConnection.@Nullable Response previousResponse)1071         private Response(HttpURLConnection conn, HttpConnection.Request request, HttpConnection.@Nullable Response previousResponse) throws IOException {
1072             this.conn = conn;
1073             this.req = request;
1074             method = Method.valueOf(conn.getRequestMethod());
1075             url = conn.getURL();
1076             statusCode = conn.getResponseCode();
1077             statusMessage = conn.getResponseMessage();
1078             contentType = conn.getContentType();
1079 
1080             Map<String, List<String>> resHeaders = createHeaderMap(conn);
1081             processResponseHeaders(resHeaders); // includes cookie key/val read during header scan
1082             CookieUtil.storeCookies(req, url, resHeaders); // add set cookies to cookie store
1083 
1084             if (previousResponse != null) { // was redirected
1085                 // map previous response cookies into this response cookies() object
1086                 for (Map.Entry<String, String> prevCookie : previousResponse.cookies().entrySet()) {
1087                     if (!hasCookie(prevCookie.getKey()))
1088                         cookie(prevCookie.getKey(), prevCookie.getValue());
1089                 }
1090                 previousResponse.safeClose();
1091 
1092                 // enforce too many redirects:
1093                 numRedirects = previousResponse.numRedirects + 1;
1094                 if (numRedirects >= MAX_REDIRECTS)
1095                     throw new IOException(String.format("Too many redirects occurred trying to load URL %s", previousResponse.url()));
1096             }
1097         }
1098 
createHeaderMap(HttpURLConnection conn)1099         private static LinkedHashMap<String, List<String>> createHeaderMap(HttpURLConnection conn) {
1100             // the default sun impl of conn.getHeaderFields() returns header values out of order
1101             final LinkedHashMap<String, List<String>> headers = new LinkedHashMap<>();
1102             int i = 0;
1103             while (true) {
1104                 final String key = conn.getHeaderFieldKey(i);
1105                 final String val = conn.getHeaderField(i);
1106                 if (key == null && val == null)
1107                     break;
1108                 i++;
1109                 if (key == null || val == null)
1110                     continue; // skip http1.1 line
1111 
1112                 if (headers.containsKey(key))
1113                     headers.get(key).add(val);
1114                 else {
1115                     final ArrayList<String> vals = new ArrayList<>();
1116                     vals.add(val);
1117                     headers.put(key, vals);
1118                 }
1119             }
1120             return headers;
1121         }
1122 
processResponseHeaders(Map<String, List<String>> resHeaders)1123         void processResponseHeaders(Map<String, List<String>> resHeaders) {
1124             for (Map.Entry<String, List<String>> entry : resHeaders.entrySet()) {
1125                 String name = entry.getKey();
1126                 if (name == null)
1127                     continue; // http/1.1 line
1128 
1129                 List<String> values = entry.getValue();
1130                 if (name.equalsIgnoreCase("Set-Cookie")) {
1131                     for (String value : values) {
1132                         if (value == null)
1133                             continue;
1134                         TokenQueue cd = new TokenQueue(value);
1135                         String cookieName = cd.chompTo("=").trim();
1136                         String cookieVal = cd.consumeTo(";").trim();
1137                         // ignores path, date, domain, validateTLSCertificates et al. full details will be available in cookiestore if required
1138                         // name not blank, value not null
1139                         if (cookieName.length() > 0 && !cookies.containsKey(cookieName)) // if duplicates, only keep the first
1140                             cookie(cookieName, cookieVal);
1141                     }
1142                 }
1143                 for (String value : values) {
1144                     addHeader(name, fixHeaderEncoding(value));
1145                 }
1146             }
1147         }
1148 
1149         /**
1150          Servers may encode response headers in UTF-8 instead of RFC defined 8859. This method attempts to detect that
1151          and re-decode the string as UTF-8.
1152          * @param val a header value string that may have been incorrectly decoded as 8859.
1153          * @return a potentially re-decoded string.
1154          */
1155         @Nullable
fixHeaderEncoding(@ullable String val)1156         private static String fixHeaderEncoding(@Nullable String val) {
1157             if (val == null) return val;
1158             byte[] bytes = val.getBytes(ISO_8859_1);
1159             if (looksLikeUtf8(bytes))
1160                 return new String(bytes, UTF_8);
1161             else
1162                 return val;
1163         }
1164 
looksLikeUtf8(byte[] input)1165         private static boolean looksLikeUtf8(byte[] input) {
1166             int i = 0;
1167             // BOM:
1168             if (input.length >= 3
1169                 && (input[0] & 0xFF) == 0xEF
1170                 && (input[1] & 0xFF) == 0xBB
1171                 && (input[2] & 0xFF) == 0xBF) {
1172                 i = 3;
1173             }
1174 
1175             int end;
1176             boolean foundNonAscii = false;
1177             for (int j = input.length; i < j; ++i) {
1178                 int o = input[i];
1179                 if ((o & 0x80) == 0) {
1180                     continue; // ASCII
1181                 }
1182                 foundNonAscii = true;
1183 
1184                 // UTF-8 leading:
1185                 if ((o & 0xE0) == 0xC0) {
1186                     end = i + 1;
1187                 } else if ((o & 0xF0) == 0xE0) {
1188                     end = i + 2;
1189                 } else if ((o & 0xF8) == 0xF0) {
1190                     end = i + 3;
1191                 } else {
1192                     return false;
1193                 }
1194 
1195                 if (end >= input.length)
1196                     return false;
1197 
1198                 while (i < end) {
1199                     i++;
1200                     o = input[i];
1201                     if ((o & 0xC0) != 0x80) {
1202                         return false;
1203                     }
1204                 }
1205             }
1206             return foundNonAscii;
1207         }
1208 
setOutputContentType(final Connection.Request req)1209         private @Nullable static String setOutputContentType(final Connection.Request req) {
1210             final String contentType = req.header(CONTENT_TYPE);
1211             String bound = null;
1212             if (contentType != null) {
1213                 // no-op; don't add content type as already set (e.g. for requestBody())
1214                 // todo - if content type already set, we could add charset
1215 
1216                 // if user has set content type to multipart/form-data, auto add boundary.
1217                 if(contentType.contains(MULTIPART_FORM_DATA) && !contentType.contains("boundary")) {
1218                     bound = DataUtil.mimeBoundary();
1219                     req.header(CONTENT_TYPE, MULTIPART_FORM_DATA + "; boundary=" + bound);
1220                 }
1221 
1222             }
1223             else if (needsMultipart(req)) {
1224                 bound = DataUtil.mimeBoundary();
1225                 req.header(CONTENT_TYPE, MULTIPART_FORM_DATA + "; boundary=" + bound);
1226             } else {
1227                 req.header(CONTENT_TYPE, FORM_URL_ENCODED + "; charset=" + req.postDataCharset());
1228             }
1229             return bound;
1230         }
1231 
writePost(final Connection.Request req, final OutputStream outputStream, @Nullable final String boundary)1232         private static void writePost(final Connection.Request req, final OutputStream outputStream, @Nullable final String boundary) throws IOException {
1233             final Collection<Connection.KeyVal> data = req.data();
1234             final BufferedWriter w = new BufferedWriter(new OutputStreamWriter(outputStream, Charset.forName(req.postDataCharset())));
1235 
1236             if (boundary != null) {
1237                 // boundary will be set if we're in multipart mode
1238                 for (Connection.KeyVal keyVal : data) {
1239                     w.write("--");
1240                     w.write(boundary);
1241                     w.write("\r\n");
1242                     w.write("Content-Disposition: form-data; name=\"");
1243                     w.write(encodeMimeName(keyVal.key())); // encodes " to %22
1244                     w.write("\"");
1245                     final InputStream input = keyVal.inputStream();
1246                     if (input != null) {
1247                         w.write("; filename=\"");
1248                         w.write(encodeMimeName(keyVal.value()));
1249                         w.write("\"\r\nContent-Type: ");
1250                         String contentType = keyVal.contentType();
1251                         w.write(contentType != null ? contentType : DefaultUploadType);
1252                         w.write("\r\n\r\n");
1253                         w.flush(); // flush
1254                         DataUtil.crossStreams(input, outputStream);
1255                         outputStream.flush();
1256                     } else {
1257                         w.write("\r\n\r\n");
1258                         w.write(keyVal.value());
1259                     }
1260                     w.write("\r\n");
1261                 }
1262                 w.write("--");
1263                 w.write(boundary);
1264                 w.write("--");
1265             } else {
1266                 String body = req.requestBody();
1267                 if (body != null) {
1268                     // data will be in query string, we're sending a plaintext body
1269                     w.write(body);
1270                 }
1271                 else {
1272                     // regular form data (application/x-www-form-urlencoded)
1273                     boolean first = true;
1274                     for (Connection.KeyVal keyVal : data) {
1275                         if (!first)
1276                             w.append('&');
1277                         else
1278                             first = false;
1279 
1280                         w.write(URLEncoder.encode(keyVal.key(), req.postDataCharset()));
1281                         w.write('=');
1282                         w.write(URLEncoder.encode(keyVal.value(), req.postDataCharset()));
1283                     }
1284                 }
1285             }
1286             w.close();
1287         }
1288 
1289         // for get url reqs, serialise the data map into the url
serialiseRequestUrl(Connection.Request req)1290         private static void serialiseRequestUrl(Connection.Request req) throws IOException {
1291             UrlBuilder in = new UrlBuilder(req.url());
1292 
1293             for (Connection.KeyVal keyVal : req.data()) {
1294                 Validate.isFalse(keyVal.hasInputStream(), "InputStream data not supported in URL query string.");
1295                 in.appendKeyVal(keyVal);
1296             }
1297             req.url(in.build());
1298             req.data().clear(); // moved into url as get params
1299         }
1300     }
1301 
needsMultipart(Connection.Request req)1302     private static boolean needsMultipart(Connection.Request req) {
1303         // multipart mode, for files. add the header if we see something with an inputstream, and return a non-null boundary
1304         for (Connection.KeyVal keyVal : req.data()) {
1305             if (keyVal.hasInputStream())
1306                 return true;
1307         }
1308         return false;
1309     }
1310 
1311     public static class KeyVal implements Connection.KeyVal {
1312         private String key;
1313         private String value;
1314         private @Nullable InputStream stream;
1315         private @Nullable String contentType;
1316 
create(String key, String value)1317         public static KeyVal create(String key, String value) {
1318             return new KeyVal(key, value);
1319         }
1320 
create(String key, String filename, InputStream stream)1321         public static KeyVal create(String key, String filename, InputStream stream) {
1322             return new KeyVal(key, filename)
1323                 .inputStream(stream);
1324         }
1325 
KeyVal(String key, String value)1326         private KeyVal(String key, String value) {
1327             Validate.notEmptyParam(key, "key");
1328             Validate.notNullParam(value, "value");
1329             this.key = key;
1330             this.value = value;
1331         }
1332 
1333         @Override
key(String key)1334         public KeyVal key(String key) {
1335             Validate.notEmptyParam(key, "key");
1336             this.key = key;
1337             return this;
1338         }
1339 
1340         @Override
key()1341         public String key() {
1342             return key;
1343         }
1344 
1345         @Override
value(String value)1346         public KeyVal value(String value) {
1347             Validate.notNullParam(value, "value");
1348             this.value = value;
1349             return this;
1350         }
1351 
1352         @Override
value()1353         public String value() {
1354             return value;
1355         }
1356 
inputStream(InputStream inputStream)1357         public KeyVal inputStream(InputStream inputStream) {
1358             Validate.notNullParam(value, "inputStream");
1359             this.stream = inputStream;
1360             return this;
1361         }
1362 
1363         @Override
inputStream()1364         public InputStream inputStream() {
1365             return stream;
1366         }
1367 
1368         @Override
hasInputStream()1369         public boolean hasInputStream() {
1370             return stream != null;
1371         }
1372 
1373         @Override
contentType(String contentType)1374         public Connection.KeyVal contentType(String contentType) {
1375             Validate.notEmpty(contentType);
1376             this.contentType = contentType;
1377             return this;
1378         }
1379 
1380         @Override
contentType()1381         public String contentType() {
1382             return contentType;
1383         }
1384 
1385         @Override
toString()1386         public String toString() {
1387             return key + "=" + value;
1388         }
1389     }
1390 }
1391