package org.jsoup.integration;

import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.UnsupportedMimeTypeException;
import org.jsoup.helper.W3CDom;
import org.jsoup.internal.StringUtil;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.FormElement;
import org.jsoup.parser.HtmlTreeBuilder;
import org.jsoup.parser.Parser;
import org.jsoup.parser.XmlTreeBuilder;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.net.ConnectException;
import java.net.InetSocketAddress;
import java.net.Proxy;
import java.net.URL;
import java.util.List;

import static org.junit.jupiter.api.Assertions.*;

/**
 Tests the URL connection. Not enabled by default, so tests don't require network connection.

 @author Jonathan Hedley, jonathan@hedley.net */
@Disabled // ignored by default so tests don't require network access. comment out to enable.
// todo: rebuild these into a local Jetty test server, so not reliant on the vagaries of the internet.
public class UrlConnectTest {
    private static final String WEBSITE_WITH_INVALID_CERTIFICATE = "https://certs.cac.washington.edu/CAtest/";
    private static final String WEBSITE_WITH_SNI = "https://jsoup.org/";
    public static String browserUa = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.36";

    @Test
    public void fetchBaidu() throws IOException {
        Connection.Response res = Jsoup.connect("http://www.baidu.com/").timeout(10*1000).execute();
        Document doc = res.parse();

        assertEquals("GBK", doc.outputSettings().charset().displayName());
        assertEquals("GBK", res.charset());
        assert(res.hasCookie("BAIDUID"));
        assertEquals("text/html;charset=gbk", res.contentType());
    }

    @Test
    public void exceptOnUnknownContentType() {
        String url = "http://direct.jsoup.org/rez/osi_logo.png"; // not text/* but image/png, should throw
        boolean threw = false;
        try {
            Document doc = Jsoup.parse(new URL(url), 3000);
        } catch (UnsupportedMimeTypeException e) {
            threw = true;
            assertEquals("org.jsoup.UnsupportedMimeTypeException: Unhandled content type. Must be text/*, application/xml, or application/xhtml+xml. Mimetype=image/png, URL=http://direct.jsoup.org/rez/osi_logo.png", e.toString());
            assertEquals(url, e.getUrl());
            assertEquals("image/png", e.getMimeType());
        } catch (IOException e) {
        }
        assertTrue(threw);
    }

    @Test
    public void ignoresContentTypeIfSoConfigured() throws IOException {
        Document doc = Jsoup.connect("https://jsoup.org/rez/osi_logo.png").ignoreContentType(true).get();
        assertEquals("", doc.title()); // this will cause an ugly parse tree
    }

    private static String ihVal(String key, Document doc) {
        return doc.select("th:contains("+key+") + td").first().text();
    }

    @Test
    public void followsTempRedirect() throws IOException {
        Connection con = Jsoup.connect("http://direct.infohound.net/tools/302.pl"); // http://jsoup.org
        Document doc = con.get();
        assertTrue(doc.title().contains("jsoup"));
    }

    @Test
    public void followsNewTempRedirect() throws IOException {
        Connection con = Jsoup.connect("http://direct.infohound.net/tools/307.pl"); // http://jsoup.org
        Document doc = con.get();
        assertTrue(doc.title().contains("jsoup"));
        assertEquals("https://jsoup.org/", con.response().url().toString());
    }

    @Test
    public void postRedirectsFetchWithGet() throws IOException {
        Connection con = Jsoup.connect("http://direct.infohound.net/tools/302.pl")
                .data("Argument", "Riposte")
                .method(Connection.Method.POST);
        Connection.Response res = con.execute();
        assertEquals("https://jsoup.org/", res.url().toExternalForm());
        assertEquals(Connection.Method.GET, res.method());
    }

    @Test
    public void followsRedirectToHttps() throws IOException {
        Connection con = Jsoup.connect("http://direct.infohound.net/tools/302-secure.pl"); // https://www.google.com
        con.data("id", "5");
        Document doc = con.get();
        assertTrue(doc.title().contains("Google"));
    }

    @Test
    public void followsRelativeRedirect() throws IOException {
        Connection con = Jsoup.connect("http://direct.infohound.net/tools/302-rel.pl"); // to /tidy/
        Document doc = con.post();
        assertTrue(doc.title().contains("HTML Tidy Online"));
    }

    @Test
    public void followsRelativeDotRedirect() throws IOException {
        // redirects to "./ok.html", should resolve to http://direct.infohound.net/tools/ok.html
        Connection con = Jsoup.connect("http://direct.infohound.net/tools/302-rel-dot.pl"); // to ./ok.html
        Document doc = con.post();
        assertTrue(doc.title().contains("OK"));
        assertEquals(doc.location(), "http://direct.infohound.net/tools/ok.html");
    }

    @Test
    public void followsRelativeDotRedirect2() throws IOException {
        //redirects to "esportspenedes.cat/./ep/index.php", should resolve to "esportspenedes.cat/ep/index.php"
        Connection con = Jsoup.connect("http://esportspenedes.cat")  // note lack of trailing / - server should redir to / first, then to ./ep/...; but doesn't'
                .timeout(10000);
        Document doc = con.post();
        assertEquals(doc.location(), "http://esportspenedes.cat/ep/index.php");
    }

    @Test
    public void followsRedirectsWithWithespaces() throws IOException {
        Connection con = Jsoup.connect("http://tinyurl.com/kgofxl8"); // to http://www.google.com/?q=white spaces
        Document doc = con.get();
        assertTrue(doc.title().contains("Google"));
    }

    @Test
    public void gracefullyHandleBrokenLocationRedirect() throws IOException {
        Connection con = Jsoup.connect("http://aag-ye.com"); // has Location: http:/temp/AAG_New/en/index.php
        con.get(); // would throw exception on error
        assertTrue(true);
    }

    @Test
    public void ignores500tExceptionIfSoConfigured() throws IOException {
        Connection con = Jsoup.connect("http://direct.infohound.net/tools/500.pl").ignoreHttpErrors(true);
        Connection.Response res = con.execute();
        Document doc = res.parse();
        assertEquals(500, res.statusCode());
        assertEquals("Application Error", res.statusMessage());
        assertEquals("Woops", doc.select("h1").first().text());
    }

    @Test
    public void ignores500WithNoContentExceptionIfSoConfigured() throws IOException {
        Connection con = Jsoup.connect("http://direct.infohound.net/tools/500-no-content.pl").ignoreHttpErrors(true);
        Connection.Response res = con.execute();
        Document doc = res.parse();
        assertEquals(500, res.statusCode());
        assertEquals("Application Error", res.statusMessage());
    }

    @Test
    public void ignores200WithNoContentExceptionIfSoConfigured() throws IOException {
        Connection con = Jsoup.connect("http://direct.infohound.net/tools/200-no-content.pl").ignoreHttpErrors(true);
        Connection.Response res = con.execute();
        Document doc = res.parse();
        assertEquals(200, res.statusCode());
        assertEquals("All Good", res.statusMessage());
    }

    @Test
    public void handles200WithNoContent() throws IOException {
        Connection con = Jsoup
            .connect("http://direct.infohound.net/tools/200-no-content.pl")
            .userAgent(browserUa);
        Connection.Response res = con.execute();
        Document doc = res.parse();
        assertEquals(200, res.statusCode());

        con = Jsoup
            .connect("http://direct.infohound.net/tools/200-no-content.pl")
            .parser(Parser.xmlParser())
            .userAgent(browserUa);
        res = con.execute();
        doc = res.parse();
        assertEquals(200, res.statusCode());
    }

    @Test
    public void doesntRedirectIfSoConfigured() throws IOException {
        Connection con = Jsoup.connect("http://direct.infohound.net/tools/302.pl").followRedirects(false);
        Connection.Response res = con.execute();
        assertEquals(302, res.statusCode());
        assertEquals("http://jsoup.org", res.header("Location"));
    }

    @Test
    public void redirectsResponseCookieToNextResponse() throws IOException {
        Connection con = Jsoup.connect("http://direct.infohound.net/tools/302-cookie.pl");
        Connection.Response res = con.execute();
        assertEquals("asdfg123", res.cookie("token")); // confirms that cookies set on 1st hit are presented in final result
        Document doc = res.parse();
        assertEquals("token=asdfg123; uid=jhy", ihVal("HTTP_COOKIE", doc)); // confirms that redirected hit saw cookie
    }

    @Test
    public void maximumRedirects() {
        boolean threw = false;
        try {
            Document doc = Jsoup.connect("http://direct.infohound.net/tools/loop.pl").get();
        } catch (IOException e) {
            assertTrue(e.getMessage().contains("Too many redirects"));
            threw = true;
        }
        assertTrue(threw);
    }

    @Test
    public void handlesDodgyCharset() throws IOException {
        // tests that when we get back "UFT8", that it is recognised as unsupported, and falls back to default instead
        String url = "http://direct.infohound.net/tools/bad-charset.pl";
        Connection.Response res = Jsoup.connect(url).execute();
        assertEquals("text/html; charset=UFT8", res.header("Content-Type")); // from the header
        assertNull(res.charset()); // tried to get from header, not supported, so returns null
        Document doc = res.parse(); // would throw an error if charset unsupported
        assertTrue(doc.text().contains("Hello!"));
        assertEquals("UTF-8", res.charset()); // set from default on parse
    }

    /**
     * Verify that security disabling feature works properly.
     * <p/>
     * 1. try to hit url with invalid certificate and evaluate that exception is thrown
     *
     * @throws Exception
     */
    @Test
    public void testUnsafeFail() throws Exception {
        String url = WEBSITE_WITH_INVALID_CERTIFICATE;
        assertThrows(IOException.class, () -> Jsoup.connect(url).execute());
    }


    /**
     * Verify that requests to websites with SNI fail on jdk 1.6
     * <p/>
     * read for more details:
     * http://en.wikipedia.org/wiki/Server_Name_Indication
     *
     * Test is ignored independent from others as it requires JDK 1.6
     * @throws Exception
     */
    @Test
    public void testSNIFail() throws Exception {
        assertThrows(IOException.class, () -> Jsoup.connect(WEBSITE_WITH_SNI).execute());
    }

    @Test
    public void shouldWorkForCharsetInExtraAttribute() throws IOException {
        Connection.Response res = Jsoup.connect("https://www.creditmutuel.com/groupe/fr/").execute();
        Document doc = res.parse(); // would throw an error if charset unsupported
        assertEquals("ISO-8859-1", res.charset());
    }

    // The following tests were added to test specific domains if they work. All code paths
    // which make the following test green are tested in other unit or integration tests, so the following lines
    // could be deleted

    @Test
    public void shouldSelectFirstCharsetOnWeirdMultileCharsetsInMetaTags() throws IOException {
        Connection.Response res = Jsoup.connect("http://aamo.info/").execute();
        res.parse(); // would throw an error if charset unsupported
        assertEquals("ISO-8859-1", res.charset());
    }

    @Test
    public void shouldParseBrokenHtml5MetaCharsetTagCorrectly() throws IOException {
        Connection.Response res = Jsoup.connect("http://9kuhkep.net").execute();
        res.parse(); // would throw an error if charset unsupported
        assertEquals("UTF-8", res.charset());
    }

    @Test
    public void shouldEmptyMetaCharsetCorrectly() throws IOException {
        Connection.Response res = Jsoup.connect("http://aastmultimedia.com").execute();
        res.parse(); // would throw an error if charset unsupported
        assertEquals("UTF-8", res.charset());
    }

    @Test
    public void shouldWorkForDuplicateCharsetInTag() throws IOException {
        Connection.Response res = Jsoup.connect("http://aaptsdassn.org").execute();
        Document doc = res.parse(); // would throw an error if charset unsupported
        assertEquals("ISO-8859-1", res.charset());
    }

    @Test
    public void handles201Created() throws IOException {
        Document doc = Jsoup.connect("http://direct.infohound.net/tools/201.pl").get(); // 201, location=jsoup
        assertEquals("https://jsoup.org/", doc.location());
    }

    /*
     Proxy tests. Assumes local proxy running on 8888, without system propery set (so that specifying it is required).
     */

    @Test
    public void fetchViaHttpProxy() throws IOException {
        String url = "https://jsoup.org";
        Proxy proxy = new Proxy(Proxy.Type.HTTP, InetSocketAddress.createUnresolved("localhost", 8888));
        Document doc = Jsoup.connect(url).proxy(proxy).get();
        assertTrue(doc.title().contains("jsoup"));
    }

    @Test
    public void fetchViaHttpProxySetByArgument() throws IOException {
        String url = "https://jsoup.org";
        Document doc = Jsoup.connect(url).proxy("localhost", 8888).get();
        assertTrue(doc.title().contains("jsoup"));
    }

    @Test
    public void invalidProxyFails() {
        boolean caught = false;
        String url = "https://jsoup.org";
        try {
            Document doc = Jsoup.connect(url).proxy("localhost", 8889).get();
        } catch (IOException e) {
            caught = e instanceof ConnectException;
        }
        assertTrue(caught);
    }

    @Test
    public void proxyGetAndSet() throws IOException {
        String url = "https://jsoup.org";
        Proxy proxy = new Proxy(Proxy.Type.HTTP, InetSocketAddress.createUnresolved("localhost", 8889)); // invalid
        final Connection con = Jsoup.connect(url).proxy(proxy);

        assert con.request().proxy() == proxy;
        con.request().proxy(null); // disable
        Document doc = con.get();
        assertTrue(doc.title().contains("jsoup")); // would fail if actually went via proxy
    }

    @Test
    public void throwsIfRequestBodyForGet() throws IOException {
        boolean caught = false;
        String url = "https://jsoup.org";
        try {
            Document doc = Jsoup.connect(url).requestBody("fail").get();
        } catch (IllegalArgumentException e) {
            caught = true;
        }
        assertTrue(caught);
    }

    @Test
    public void canSpecifyResponseCharset() throws IOException {
        // both these docs have <80> in there as euro/control char depending on charset
        String noCharsetUrl = "http://direct.infohound.net/tools/Windows-1252-nocharset.html";
        String charsetUrl = "http://direct.infohound.net/tools/Windows-1252-charset.html";

        // included in meta
        Connection.Response res1 = Jsoup.connect(charsetUrl).execute();
        assertNull(res1.charset()); // not set in headers
        final Document doc1 = res1.parse();
        assertEquals("windows-1252", doc1.charset().displayName()); // but determined at parse time
        assertEquals("Cost is €100", doc1.select("p").text());
        assertTrue(doc1.text().contains("€"));

        // no meta, no override
        Connection.Response res2 = Jsoup.connect(noCharsetUrl).execute();
        assertNull(res2.charset()); // not set in headers
        final Document doc2 = res2.parse();
        assertEquals("UTF-8", doc2.charset().displayName()); // so defaults to utf-8
        assertEquals("Cost is �100", doc2.select("p").text());
        assertTrue(doc2.text().contains("�"));

        // no meta, let's override
        Connection.Response res3 = Jsoup.connect(noCharsetUrl).execute();
        assertNull(res3.charset()); // not set in headers
        res3.charset("windows-1252");
        assertEquals("windows-1252", res3.charset()); // read back
        final Document doc3 = res3.parse();
        assertEquals("windows-1252", doc3.charset().displayName()); // from override
        assertEquals("Cost is €100", doc3.select("p").text());
        assertTrue(doc3.text().contains("€"));
    }

    @Test
    public void handlesUnescapedRedirects() throws IOException {
        // URL locations should be url safe (ascii) but are often not, so we should try to guess
        // in this case the location header is utf-8, but defined in spec as iso8859, so detect, convert, encode
        String url = "https://direct.infohound.net/tools/302-utf.pl";
        String urlEscaped = "https://direct.infohound.net/tools/test%F0%9F%92%A9.html";

        Connection.Response res = Jsoup.connect(url).execute();
        Document doc = res.parse();
        assertEquals(doc.body().text(), "\uD83D\uDCA9!");
        assertEquals(doc.location(), urlEscaped);

        Connection.Response res2 = Jsoup.connect(url).followRedirects(false).execute();
        assertEquals("/tools/test\uD83D\uDCA9.html", res2.header("Location"));
        // if we didn't notice it was utf8, would look like: Location: /tools/testð©.html
    }

    @Test public void handlesEscapesInRedirecct() throws IOException {
        Document doc = Jsoup.connect("https://direct.infohound.net/tools/302-escaped.pl").get();
        assertEquals("https://direct.infohound.net/tools/q.pl?q=one%20two", doc.location());

        doc = Jsoup.connect("https://direct.infohound.net/tools/302-white.pl").get();
        assertEquals("https://direct.infohound.net/tools/q.pl?q=one+two", doc.location());
    }

    @Test
    public void handlesUt8fInUrl() throws IOException {
        String url = "https://direct.infohound.net/tools/test\uD83D\uDCA9.html";
        String urlEscaped = "https://direct.infohound.net/tools/test%F0%9F%92%A9.html";

        Connection.Response res = Jsoup.connect(url).execute();
        Document doc = res.parse();
        assertEquals("\uD83D\uDCA9!", doc.body().text());
        assertEquals(urlEscaped, doc.location());
    }

    @Test
    public void inWildUtfRedirect() throws IOException {
        Connection.Response res = Jsoup.connect("http://brabantn.ws/Q4F").execute();
        Document doc = res.parse();
        assertEquals(
            "http://www.omroepbrabant.nl/?news/2474781303/Gestrande+ree+in+Oss+niet+verdoofd,+maar+doodgeschoten+%E2%80%98Dit+kan+gewoon+niet,+bizar%E2%80%99+[VIDEO].aspx",
            doc.location()
            );
    }

    @Test
    public void inWildUtfRedirect2() throws IOException {
        Connection.Response res = Jsoup.connect("https://ssl.souq.com/sa-en/2724288604627/s").execute();
        Document doc = res.parse();
        assertEquals(
            "https://saudi.souq.com/sa-en/%D8%AE%D8%B2%D9%86%D8%A9-%D8%A2%D9%85%D9%86%D8%A9-3-%D8%B7%D8%A8%D9%82%D8%A7%D8%AA-%D8%A8%D9%86%D8%B8%D8%A7%D9%85-%D9%82%D9%81%D9%84-%D8%A5%D9%84%D9%83%D8%AA%D8%B1%D9%88%D9%86%D9%8A-bsd11523-6831477/i/?ctype=dsrch",
            doc.location()
        );
    }

   @Test public void handlesEscapedRedirectUrls() throws IOException {
        String url = "http://www.altalex.com/documents/news/2016/12/06/questioni-civilistiche-conseguenti-alla-depenalizzazione";
        // sends: Location:http://shop.wki.it/shared/sso/sso.aspx?sso=&url=http%3a%2f%2fwww.altalex.com%2fsession%2fset%2f%3freturnurl%3dhttp%253a%252f%252fwww.altalex.com%253a80%252fdocuments%252fnews%252f2016%252f12%252f06%252fquestioni-civilistiche-conseguenti-alla-depenalizzazione
        // then to: http://www.altalex.com/session/set/?returnurl=http%3a%2f%2fwww.altalex.com%3a80%2fdocuments%2fnews%2f2016%2f12%2f06%2fquestioni-civilistiche-conseguenti-alla-depenalizzazione&sso=RDRG6T684G4AK2E7U591UGR923
        // then : http://www.altalex.com:80/documents/news/2016/12/06/questioni-civilistiche-conseguenti-alla-depenalizzazione

        // bug is that jsoup goes to
        // 	GET /shared/sso/sso.aspx?sso=&url=http%253a%252f%252fwww.altalex.com%252fsession%252fset%252f%253freturnurl%253dhttp%25253a%25252f%25252fwww.altalex.com%25253a80%25252fdocuments%25252fnews%25252f2016%25252f12%25252f06%25252fquestioni-civilistiche-conseguenti-alla-depenalizzazione HTTP/1.1
        // i.e. double escaped

        Connection.Response res = Jsoup.connect(url)
                .proxy("localhost", 8888)
                .execute();
        Document doc = res.parse();
        assertEquals(200, res.statusCode());
    }

    @Test public void handlesUnicodeInQuery() throws IOException {
        Document doc = Jsoup.connect("https://www.google.pl/search?q=gąska").get();
        assertEquals("gąska - Szukaj w Google", doc.title());

        doc = Jsoup.connect("http://mov-world.net/archiv/TV/A/%23No.Title/").get();
        assertEquals("Index of /archiv/TV/A/%23No.Title", doc.title());
    }

    @Test public void handlesSuperDeepPage() throws IOException {
        // https://github.com/jhy/jsoup/issues/955

        long start = System.currentTimeMillis();
        String url = "http://sv.stargate.wikia.com/wiki/M2J";
        Document doc = Jsoup.connect(url).get();
        assertEquals("M2J | Sv.stargate Wiki | FANDOM powered by Wikia", doc.title());
        assertEquals(110160, doc.select("dd").size());
        // those are all <dl><dd> stacked in each other. wonder how that got generated?
        assertTrue(System.currentTimeMillis() - start < 1000);
    }

    @Test public void handles966() throws IOException {
        // http://szshb.nxszs.gov.cn/
        // https://github.com/jhy/jsoup/issues/966

        Document doc = Jsoup.connect("http://szshb.nxszs.gov.cn/").get();

        assertEquals("石嘴山市环境保护局", doc.title());
    }

    @Test public void canRequestIdn() throws IOException {
        String url = "https://räksmörgås.josefsson.org/";
        Document doc = Jsoup.connect(url).get();

        assertEquals("https://xn--rksmrgs-5wao1o.josefsson.org/", doc.location());
        assertTrue(doc.title().contains("Räksmörgås.josefßon.org"));
    }

}
