• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1import { TAG_NAMES as $, NS, hasUnescapedText } from '../common/html.js';
2import { escapeText, escapeAttribute } from 'entities/lib/escape.js';
3import type { TreeAdapter, TreeAdapterTypeMap } from '../tree-adapters/interface';
4import { defaultTreeAdapter, type DefaultTreeAdapterMap } from '../tree-adapters/default.js';
5
6// Sets
7const VOID_ELEMENTS = new Set<string>([
8    $.AREA,
9    $.BASE,
10    $.BASEFONT,
11    $.BGSOUND,
12    $.BR,
13    $.COL,
14    $.EMBED,
15    $.FRAME,
16    $.HR,
17    $.IMG,
18    $.INPUT,
19    $.KEYGEN,
20    $.LINK,
21    $.META,
22    $.PARAM,
23    $.SOURCE,
24    $.TRACK,
25    $.WBR,
26]);
27
28function isVoidElement<T extends TreeAdapterTypeMap>(node: T['node'], options: InternalOptions<T>): boolean {
29    return (
30        options.treeAdapter.isElementNode(node) &&
31        options.treeAdapter.getNamespaceURI(node) === NS.HTML &&
32        VOID_ELEMENTS.has(options.treeAdapter.getTagName(node))
33    );
34}
35
36export interface SerializerOptions<T extends TreeAdapterTypeMap> {
37    /**
38     * Specifies input tree format.
39     *
40     * @default `treeAdapters.default`
41     */
42    treeAdapter?: TreeAdapter<T>;
43    /**
44     * The [scripting flag](https://html.spec.whatwg.org/multipage/parsing.html#scripting-flag). If set
45     * to `true`, `noscript` element content will not be escaped.
46     *
47     *  @default `true`
48     */
49    scriptingEnabled?: boolean;
50}
51
52type InternalOptions<T extends TreeAdapterTypeMap> = Required<SerializerOptions<T>>;
53
54const defaultOpts: InternalOptions<DefaultTreeAdapterMap> = { treeAdapter: defaultTreeAdapter, scriptingEnabled: true };
55
56/**
57 * Serializes an AST node to an HTML string.
58 *
59 * @example
60 *
61 * ```js
62 * const parse5 = require('parse5');
63 *
64 * const document = parse5.parse('<!DOCTYPE html><html><head></head><body>Hi there!</body></html>');
65 *
66 * // Serializes a document.
67 * const html = parse5.serialize(document);
68 *
69 * // Serializes the <html> element content.
70 * const str = parse5.serialize(document.childNodes[1]);
71 *
72 * console.log(str); //> '<head></head><body>Hi there!</body>'
73 * ```
74 *
75 * @param node Node to serialize.
76 * @param options Serialization options.
77 */
78export function serialize<T extends TreeAdapterTypeMap = DefaultTreeAdapterMap>(
79    node: T['parentNode'],
80    options?: SerializerOptions<T>
81): string {
82    const opts = { ...defaultOpts, ...options } as InternalOptions<T>;
83
84    if (isVoidElement(node, opts)) {
85        return '';
86    }
87
88    return serializeChildNodes(node, opts);
89}
90
91/**
92 * Serializes an AST element node to an HTML string, including the element node.
93 *
94 * @example
95 *
96 * ```js
97 * const parse5 = require('parse5');
98 *
99 * const document = parse5.parseFragment('<div>Hello, <b>world</b>!</div>');
100 *
101 * // Serializes the <div> element.
102 * const html = parse5.serializeOuter(document.childNodes[0]);
103 *
104 * console.log(str); //> '<div>Hello, <b>world</b>!</div>'
105 * ```
106 *
107 * @param node Node to serialize.
108 * @param options Serialization options.
109 */
110export function serializeOuter<T extends TreeAdapterTypeMap = DefaultTreeAdapterMap>(
111    node: T['node'],
112    options?: SerializerOptions<T>
113): string {
114    const opts = { ...defaultOpts, ...options } as InternalOptions<T>;
115    return serializeNode(node, opts);
116}
117
118function serializeChildNodes<T extends TreeAdapterTypeMap>(
119    parentNode: T['parentNode'],
120    options: InternalOptions<T>
121): string {
122    let html = '';
123    // Get container of the child nodes
124    const container =
125        options.treeAdapter.isElementNode(parentNode) &&
126        options.treeAdapter.getTagName(parentNode) === $.TEMPLATE &&
127        options.treeAdapter.getNamespaceURI(parentNode) === NS.HTML
128            ? options.treeAdapter.getTemplateContent(parentNode)
129            : parentNode;
130    const childNodes = options.treeAdapter.getChildNodes(container);
131
132    if (childNodes) {
133        for (const currentNode of childNodes) {
134            html += serializeNode(currentNode, options);
135        }
136    }
137
138    return html;
139}
140
141function serializeNode<T extends TreeAdapterTypeMap>(node: T['node'], options: InternalOptions<T>): string {
142    if (options.treeAdapter.isElementNode(node)) {
143        return serializeElement(node, options);
144    }
145    if (options.treeAdapter.isTextNode(node)) {
146        return serializeTextNode(node, options);
147    }
148    if (options.treeAdapter.isCommentNode(node)) {
149        return serializeCommentNode(node, options);
150    }
151    if (options.treeAdapter.isDocumentTypeNode(node)) {
152        return serializeDocumentTypeNode(node, options);
153    }
154    // Return an empty string for unknown nodes
155    return '';
156}
157
158function serializeElement<T extends TreeAdapterTypeMap>(node: T['element'], options: InternalOptions<T>): string {
159    const tn = options.treeAdapter.getTagName(node);
160
161    return `<${tn}${serializeAttributes(node, options)}>${
162        isVoidElement(node, options) ? '' : `${serializeChildNodes(node, options)}</${tn}>`
163    }`;
164}
165
166function serializeAttributes<T extends TreeAdapterTypeMap>(
167    node: T['element'],
168    { treeAdapter }: InternalOptions<T>
169): string {
170    let html = '';
171    for (const attr of treeAdapter.getAttrList(node)) {
172        html += ' ';
173
174        if (!attr.namespace) {
175            html += attr.name;
176        } else
177            switch (attr.namespace) {
178                case NS.XML: {
179                    html += `xml:${attr.name}`;
180                    break;
181                }
182                case NS.XMLNS: {
183                    if (attr.name !== 'xmlns') {
184                        html += 'xmlns:';
185                    }
186
187                    html += attr.name;
188                    break;
189                }
190                case NS.XLINK: {
191                    html += `xlink:${attr.name}`;
192                    break;
193                }
194                default: {
195                    html += `${attr.prefix}:${attr.name}`;
196                }
197            }
198
199        html += `="${escapeAttribute(attr.value)}"`;
200    }
201
202    return html;
203}
204
205function serializeTextNode<T extends TreeAdapterTypeMap>(node: T['textNode'], options: InternalOptions<T>): string {
206    const { treeAdapter } = options;
207    const content = treeAdapter.getTextNodeContent(node);
208    const parent = treeAdapter.getParentNode(node);
209    const parentTn = parent && treeAdapter.isElementNode(parent) && treeAdapter.getTagName(parent);
210
211    return parentTn &&
212        treeAdapter.getNamespaceURI(parent) === NS.HTML &&
213        hasUnescapedText(parentTn, options.scriptingEnabled)
214        ? content
215        : escapeText(content);
216}
217
218function serializeCommentNode<T extends TreeAdapterTypeMap>(
219    node: T['commentNode'],
220    { treeAdapter }: InternalOptions<T>
221): string {
222    return `<!--${treeAdapter.getCommentNodeContent(node)}-->`;
223}
224
225function serializeDocumentTypeNode<T extends TreeAdapterTypeMap>(
226    node: T['documentType'],
227    { treeAdapter }: InternalOptions<T>
228): string {
229    return `<!DOCTYPE ${treeAdapter.getDocumentTypeNodeName(node)}>`;
230}
231