import { Tokenizer, TokenizerMode, type TokenHandler } from '../tokenizer/index.js'; import { OpenElementStack, type StackHandler } from './open-element-stack.js'; import { FormattingElementList, EntryType, type ElementEntry } from './formatting-element-list.js'; import { defaultTreeAdapter, type DefaultTreeAdapterMap } from '../tree-adapters/default.js'; import * as doctype from '../common/doctype.js'; import * as foreignContent from '../common/foreign-content.js'; import { ERR, type ParserErrorHandler } from '../common/error-codes.js'; import * as unicode from '../common/unicode.js'; import { TAG_ID as $, TAG_NAMES as TN, NS, ATTRS, SPECIAL_ELEMENTS, DOCUMENT_MODE, isNumberedHeader, getTagID, } from '../common/html.js'; import type { TreeAdapter, TreeAdapterTypeMap } from '../tree-adapters/interface.js'; import { TokenType, getTokenAttr, type Token, type CommentToken, type CharacterToken, type TagToken, type DoctypeToken, type EOFToken, type LocationWithAttributes, type ElementLocation, } from '../common/token.js'; //Misc constants const HIDDEN_INPUT_TYPE = 'hidden'; //Adoption agency loops iteration count const AA_OUTER_LOOP_ITER = 8; const AA_INNER_LOOP_ITER = 3; //Insertion modes enum InsertionMode { INITIAL, BEFORE_HTML, BEFORE_HEAD, IN_HEAD, IN_HEAD_NO_SCRIPT, AFTER_HEAD, IN_BODY, TEXT, IN_TABLE, IN_TABLE_TEXT, IN_CAPTION, IN_COLUMN_GROUP, IN_TABLE_BODY, IN_ROW, IN_CELL, IN_SELECT, IN_SELECT_IN_TABLE, IN_TEMPLATE, AFTER_BODY, IN_FRAMESET, AFTER_FRAMESET, AFTER_AFTER_BODY, AFTER_AFTER_FRAMESET, } const BASE_LOC = { startLine: -1, startCol: -1, startOffset: -1, endLine: -1, endCol: -1, endOffset: -1, }; const TABLE_STRUCTURE_TAGS = new Set([$.TABLE, $.TBODY, $.TFOOT, $.THEAD, $.TR]); export interface ParserOptions { /** * The [scripting flag](https://html.spec.whatwg.org/multipage/parsing.html#scripting-flag). If set * to `true`, `noscript` element content will be parsed as text. * * @default `true` */ scriptingEnabled?: boolean; /** * Enables source code location information. When enabled, each node (except the root node) * will have a `sourceCodeLocation` property. If the node is not an empty element, `sourceCodeLocation` will * be a {@link ElementLocation} object, otherwise it will be {@link Location}. * If the element was implicitly created by the parser (as part of * [tree correction](https://html.spec.whatwg.org/multipage/syntax.html#an-introduction-to-error-handling-and-strange-cases-in-the-parser)), * its `sourceCodeLocation` property will be `undefined`. * * @default `false` */ sourceCodeLocationInfo?: boolean; /** * Specifies the resulting tree format. * * @default `treeAdapters.default` */ treeAdapter?: TreeAdapter; /** * Callback for parse errors. * * @default `null` */ onParseError?: ParserErrorHandler | null; } const defaultParserOptions: Required> = { scriptingEnabled: true, sourceCodeLocationInfo: false, treeAdapter: defaultTreeAdapter, onParseError: null, }; //Parser export class Parser implements TokenHandler, StackHandler { treeAdapter: TreeAdapter; onParseError: ParserErrorHandler | null; private currentToken: Token | null = null; public options: Required>; public document: T['document']; public constructor( options?: ParserOptions, document?: T['document'], public fragmentContext: T['element'] | null = null, public scriptHandler: null | ((pendingScript: T['element']) => void) = null ) { this.options = { ...defaultParserOptions, ...options, } as Required>; this.treeAdapter = this.options.treeAdapter; this.onParseError = this.options.onParseError; // Always enable location info if we report parse errors. if (this.onParseError) { this.options.sourceCodeLocationInfo = true; } this.document = document ?? this.treeAdapter.createDocument(); this.tokenizer = new Tokenizer(this.options, this); this.activeFormattingElements = new FormattingElementList(this.treeAdapter); this.fragmentContextID = fragmentContext ? getTagID(this.treeAdapter.getTagName(fragmentContext)) : $.UNKNOWN; this._setContextModes(fragmentContext ?? this.document, this.fragmentContextID); this.openElements = new OpenElementStack(this.document, this.treeAdapter, this); } // API public static parse(html: string, options?: ParserOptions): T['document'] { const parser = new this(options); parser.tokenizer.write(html, true); return parser.document; } public static getFragmentParser( fragmentContext?: T['parentNode'] | null, options?: ParserOptions ): Parser { const opts: Required> = { ...defaultParserOptions, ...options, } as Required>; //NOTE: use a