• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1import { readFile, writeFile } from 'node:fs/promises';
2import { basename } from 'node:path';
3import { Parser, type DefaultTreeAdapterMap, type TreeAdapterTypeMap, type Token, defaultTreeAdapter } from 'parse5';
4import type { HtmlLibToken } from 'parse5-test-utils/utils/generate-tokenization-tests.js';
5import { parseDatFile } from 'parse5-test-utils/utils/parse-dat-file.js';
6import { addSlashes } from 'parse5-test-utils/utils/common.js';
7
8// eslint-disable-next-line no-console
9main().catch(console.error);
10
11function main(): Promise<void[]> {
12    const convertPromises = process.argv.slice(2).map(async (file) => {
13        const content = await readFile(file, 'utf8');
14        const feedbackTestContent = generateParserFeedbackTest(content);
15        const feedbackTestFile = `test/data/parser-feedback/${basename(file, '.dat')}.test`;
16
17        await writeFile(feedbackTestFile, feedbackTestContent);
18    });
19
20    return Promise.all(convertPromises);
21}
22
23function collectParserTokens(html: string): HtmlLibToken[] {
24    const tokens: HtmlLibToken[] = [];
25
26    class ExtendedParser<T extends TreeAdapterTypeMap> extends Parser<T> {
27        private isTopLevel = true;
28        /**
29         * We only want to add tokens once. We guard against recursive calls
30         * using the `isTopLevel` flag.
31         */
32        private guardTopLevel(fn: () => void, getToken: () => HtmlLibToken): void {
33            const { isTopLevel } = this;
34            this.isTopLevel = false;
35
36            fn();
37
38            if (isTopLevel) {
39                this.isTopLevel = true;
40
41                const token = getToken();
42
43                if (token[0] === 'Character') {
44                    if (token[1] == null || token[1].length === 0) {
45                        return;
46                    }
47
48                    const lastToken = tokens[tokens.length - 1];
49
50                    if (lastToken?.[0] === 'Character') {
51                        lastToken[1] += token[1];
52                        return;
53                    }
54                }
55
56                tokens.push(token);
57            }
58        }
59
60        override onComment(token: Token.CommentToken): void {
61            this.guardTopLevel(
62                () => super.onComment(token),
63                () => ['Comment', token.data]
64            );
65        }
66        override onDoctype(token: Token.DoctypeToken): void {
67            this.guardTopLevel(
68                () => super.onDoctype(token),
69                () => ['DOCTYPE', token.name, token.publicId, token.systemId, !token.forceQuirks]
70            );
71        }
72        override onStartTag(token: Token.TagToken): void {
73            this.guardTopLevel(
74                () => super.onStartTag(token),
75                () => {
76                    const reformatedAttrs = Object.fromEntries(token.attrs.map(({ name, value }) => [name, value]));
77                    const startTagEntry: HtmlLibToken = ['StartTag', token.tagName, reformatedAttrs];
78
79                    if (token.selfClosing) {
80                        startTagEntry.push(true);
81                    }
82
83                    return startTagEntry;
84                }
85            );
86        }
87        override onEndTag(token: Token.TagToken): void {
88            this.guardTopLevel(
89                () => super.onEndTag(token),
90                // NOTE: parser feedback simulator can produce adjusted SVG
91                // tag names for end tag tokens so we need to lower case it
92                () => ['EndTag', token.tagName.toLowerCase()]
93            );
94        }
95        override onCharacter(token: Token.CharacterToken): void {
96            this.guardTopLevel(
97                () => super.onCharacter(token),
98                () => ['Character', token.chars]
99            );
100        }
101        override onNullCharacter(token: Token.CharacterToken): void {
102            this.guardTopLevel(
103                () => super.onNullCharacter(token),
104                () => ['Character', token.chars]
105            );
106        }
107        override onWhitespaceCharacter(token: Token.CharacterToken): void {
108            const { skipNextNewLine } = this;
109            const { chars } = token;
110
111            this.guardTopLevel(
112                () => super.onWhitespaceCharacter(token),
113                () => ['Character', skipNextNewLine && chars.startsWith('\n') ? chars.slice(1) : chars]
114            );
115        }
116    }
117
118    ExtendedParser.parse(html);
119
120    return tokens;
121}
122
123function generateParserFeedbackTest(parserTestFile: string): string {
124    const tests = parseDatFile<DefaultTreeAdapterMap>(parserTestFile, defaultTreeAdapter);
125
126    const feedbackTest = {
127        tests: tests.map(({ input, fragmentContext }) => ({
128            fragmentContext: fragmentContext?.tagName ?? null,
129            description: addSlashes(input),
130            input,
131            output: collectParserTokens(input),
132        })),
133    };
134
135    return JSON.stringify(feedbackTest, null, 4);
136}
137