package org.jsoup.nodes; import org.jsoup.Connection; import org.jsoup.Jsoup; import org.jsoup.helper.DataUtil; import org.jsoup.helper.Validate; import org.jsoup.internal.StringUtil; import org.jsoup.parser.ParseSettings; import org.jsoup.parser.Parser; import org.jsoup.parser.Tag; import org.jsoup.select.Elements; import org.jsoup.select.Evaluator; import org.jsoup.select.Selector; import org.jspecify.annotations.Nullable; import java.nio.charset.Charset; import java.nio.charset.CharsetEncoder; import java.util.List; import static org.jsoup.parser.Parser.NamespaceHtml; /** A HTML Document. @author Jonathan Hedley, jonathan@hedley.net */ public class Document extends Element { private @Nullable Connection connection; // the connection this doc was fetched from, if any private OutputSettings outputSettings = new OutputSettings(); private Parser parser; // the parser used to parse this document private QuirksMode quirksMode = QuirksMode.noQuirks; private final String location; private boolean updateMetaCharset = false; /** Create a new, empty Document, in the specified namespace. @param namespace the namespace of this Document's root node. @param baseUri base URI of document @see org.jsoup.Jsoup#parse @see #createShell */ public Document(String namespace, String baseUri) { super(Tag.valueOf("#root", namespace, ParseSettings.htmlDefault), baseUri); this.location = baseUri; this.parser = Parser.htmlParser(); // default, but overridable } /** Create a new, empty Document, in the HTML namespace. @param baseUri base URI of document @see org.jsoup.Jsoup#parse @see #Document(String namespace, String baseUri) */ public Document(String baseUri) { this(NamespaceHtml, baseUri); } /** Create a valid, empty shell of a document, suitable for adding more elements to. @param baseUri baseUri of document @return document with html, head, and body elements. */ public static Document createShell(String baseUri) { Validate.notNull(baseUri); Document doc = new Document(baseUri); doc.parser = doc.parser(); Element html = doc.appendElement("html"); html.appendElement("head"); html.appendElement("body"); return doc; } /** * Get the URL this Document was parsed from. If the starting URL is a redirect, * this will return the final URL from which the document was served from. *
Will return an empty string if the location is unknown (e.g. if parsed from a String). * @return location */ public String location() { return location; } /** Returns the Connection (Request/Response) object that was used to fetch this document, if any; otherwise, a new default Connection object. This can be used to continue a session, preserving settings and cookies, etc. @return the Connection (session) associated with this Document, or an empty one otherwise. @see Connection#newRequest() */ public Connection connection() { if (connection == null) return Jsoup.newSession(); else return connection; } /** * Returns this Document's doctype. * @return document type, or null if not set */ public @Nullable DocumentType documentType() { for (Node node : childNodes) { if (node instanceof DocumentType) return (DocumentType) node; else if (!(node instanceof LeafNode)) // scans forward across comments, text, processing instructions etc break; } return null; // todo - add a set document type? } /** Find the root HTML element, or create it if it doesn't exist. @return the root HTML element. */ private Element htmlEl() { Element el = firstElementChild(); while (el != null) { if (el.nameIs("html")) return el; el = el.nextElementSibling(); } return appendElement("html"); } /** Get this document's {@code head} element.
As a side-effect, if this Document does not already have a HTML structure, it will be created. If you do not want that, use {@code #selectFirst("head")} instead. @return {@code head} element. */ public Element head() { final Element html = htmlEl(); Element el = html.firstElementChild(); while (el != null) { if (el.nameIs("head")) return el; el = el.nextElementSibling(); } return html.prependElement("head"); } /** Get this document's {@code
} or {@code