1import { readFile, writeFile } from 'node:fs/promises'; 2import { basename } from 'node:path'; 3import { Parser, type DefaultTreeAdapterMap, type TreeAdapterTypeMap, type Token, defaultTreeAdapter } from 'parse5'; 4import type { HtmlLibToken } from 'parse5-test-utils/utils/generate-tokenization-tests.js'; 5import { parseDatFile } from 'parse5-test-utils/utils/parse-dat-file.js'; 6import { addSlashes } from 'parse5-test-utils/utils/common.js'; 7 8// eslint-disable-next-line no-console 9main().catch(console.error); 10 11function main(): Promise<void[]> { 12 const convertPromises = process.argv.slice(2).map(async (file) => { 13 const content = await readFile(file, 'utf8'); 14 const feedbackTestContent = generateParserFeedbackTest(content); 15 const feedbackTestFile = `test/data/parser-feedback/${basename(file, '.dat')}.test`; 16 17 await writeFile(feedbackTestFile, feedbackTestContent); 18 }); 19 20 return Promise.all(convertPromises); 21} 22 23function collectParserTokens(html: string): HtmlLibToken[] { 24 const tokens: HtmlLibToken[] = []; 25 26 class ExtendedParser<T extends TreeAdapterTypeMap> extends Parser<T> { 27 private isTopLevel = true; 28 /** 29 * We only want to add tokens once. We guard against recursive calls 30 * using the `isTopLevel` flag. 31 */ 32 private guardTopLevel(fn: () => void, getToken: () => HtmlLibToken): void { 33 const { isTopLevel } = this; 34 this.isTopLevel = false; 35 36 fn(); 37 38 if (isTopLevel) { 39 this.isTopLevel = true; 40 41 const token = getToken(); 42 43 if (token[0] === 'Character') { 44 if (token[1] == null || token[1].length === 0) { 45 return; 46 } 47 48 const lastToken = tokens[tokens.length - 1]; 49 50 if (lastToken?.[0] === 'Character') { 51 lastToken[1] += token[1]; 52 return; 53 } 54 } 55 56 tokens.push(token); 57 } 58 } 59 60 override onComment(token: Token.CommentToken): void { 61 this.guardTopLevel( 62 () => super.onComment(token), 63 () => ['Comment', token.data] 64 ); 65 } 66 override onDoctype(token: Token.DoctypeToken): void { 67 this.guardTopLevel( 68 () => super.onDoctype(token), 69 () => ['DOCTYPE', token.name, token.publicId, token.systemId, !token.forceQuirks] 70 ); 71 } 72 override onStartTag(token: Token.TagToken): void { 73 this.guardTopLevel( 74 () => super.onStartTag(token), 75 () => { 76 const reformatedAttrs = Object.fromEntries(token.attrs.map(({ name, value }) => [name, value])); 77 const startTagEntry: HtmlLibToken = ['StartTag', token.tagName, reformatedAttrs]; 78 79 if (token.selfClosing) { 80 startTagEntry.push(true); 81 } 82 83 return startTagEntry; 84 } 85 ); 86 } 87 override onEndTag(token: Token.TagToken): void { 88 this.guardTopLevel( 89 () => super.onEndTag(token), 90 // NOTE: parser feedback simulator can produce adjusted SVG 91 // tag names for end tag tokens so we need to lower case it 92 () => ['EndTag', token.tagName.toLowerCase()] 93 ); 94 } 95 override onCharacter(token: Token.CharacterToken): void { 96 this.guardTopLevel( 97 () => super.onCharacter(token), 98 () => ['Character', token.chars] 99 ); 100 } 101 override onNullCharacter(token: Token.CharacterToken): void { 102 this.guardTopLevel( 103 () => super.onNullCharacter(token), 104 () => ['Character', token.chars] 105 ); 106 } 107 override onWhitespaceCharacter(token: Token.CharacterToken): void { 108 const { skipNextNewLine } = this; 109 const { chars } = token; 110 111 this.guardTopLevel( 112 () => super.onWhitespaceCharacter(token), 113 () => ['Character', skipNextNewLine && chars.startsWith('\n') ? chars.slice(1) : chars] 114 ); 115 } 116 } 117 118 ExtendedParser.parse(html); 119 120 return tokens; 121} 122 123function generateParserFeedbackTest(parserTestFile: string): string { 124 const tests = parseDatFile<DefaultTreeAdapterMap>(parserTestFile, defaultTreeAdapter); 125 126 const feedbackTest = { 127 tests: tests.map(({ input, fragmentContext }) => ({ 128 fragmentContext: fragmentContext?.tagName ?? null, 129 description: addSlashes(input), 130 input, 131 output: collectParserTokens(input), 132 })), 133 }; 134 135 return JSON.stringify(feedbackTest, null, 4); 136} 137