• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1let { readFile, writeFile } = require('fs');
2const { promisify } = require('util');
3const { basename } = require('path');
4const Parser = require('../../packages/parse5/lib/parser');
5const Tokenizer = require('../../packages/parse5/lib/tokenizer');
6const defaultTreeAdapter = require('../../packages/parse5/lib/tree-adapters/default');
7const { convertTokenToHtml5Lib } = require('../../test/utils/generate-tokenization-tests');
8const parseDatFile = require('../../test/utils/parse-dat-file');
9const { addSlashes } = require('../../test/test/utils/common');
10
11readFile = promisify(readFile);
12writeFile = promisify(writeFile);
13
14main();
15
16async function main() {
17    const convertPromises = process.argv.slice(2).map(async file => {
18        const content = await readFile(file, 'utf-8');
19        const feedbackTestContent = generateParserFeedbackTest(content);
20        const feedbackTestFile = `test/data/parser-feedback/${basename(file, '.dat')}.test`;
21
22        await writeFile(feedbackTestFile, feedbackTestContent);
23    });
24
25    await Promise.all(convertPromises);
26}
27
28function appendToken(dest, token) {
29    switch (token.type) {
30        case Tokenizer.EOF_TOKEN:
31            return false;
32
33        case Tokenizer.NULL_CHARACTER_TOKEN:
34        case Tokenizer.WHITESPACE_CHARACTER_TOKEN:
35            token.type = Tokenizer.CHARACTER_TOKEN;
36        /* falls through */
37
38        case Tokenizer.CHARACTER_TOKEN:
39            if (dest.length > 0 && dest[dest.length - 1].type === Tokenizer.CHARACTER_TOKEN) {
40                dest[dest.length - 1].chars += token.chars;
41
42                return true;
43            }
44            break;
45    }
46
47    dest.push(token);
48
49    return true;
50}
51
52function collectParserTokens(html) {
53    const tokens = [];
54    const parser = new Parser();
55
56    parser._processInputToken = function(token) {
57        Parser.prototype._processInputToken.call(this, token);
58
59        // NOTE: Needed to split attributes of duplicate <html> and <body>
60        // which are otherwise merged as per tree constructor spec
61        if (token.type === Tokenizer.START_TAG_TOKEN) {
62            token.attrs = token.attrs.slice();
63        }
64
65        appendToken(tokens, token);
66    };
67
68    parser.parse(html);
69
70    return tokens.map(convertTokenToHtml5Lib);
71}
72
73function generateParserFeedbackTest(parserTestFile) {
74    const tests = parseDatFile(parserTestFile, defaultTreeAdapter);
75
76    const feedbackTest = {
77        tests: tests
78            .filter(test => !test.fragmentContext) // TODO
79            .map(test => {
80                const input = test.input;
81
82                return {
83                    description: addSlashes(input),
84                    input,
85                    output: collectParserTokens(input)
86                };
87            })
88    };
89
90    return JSON.stringify(feedbackTest, null, 4);
91}
92