1import * as assert from 'node:assert'; 2import * as fs from 'node:fs'; 3import type { SAXParserOptions } from '../lib/index.js'; 4import { SAXParser } from '../lib/index.js'; 5import { loadSAXParserTestData } from 'parse5-test-utils/utils/load-sax-parser-test-data.js'; 6import { 7 finished, 8 getStringDiffMsg, 9 writeChunkedToStream, 10 removeNewLines, 11 WritableStreamStub, 12} from 'parse5-test-utils/utils/common.js'; 13 14function sanitizeForComparison(str: string): string { 15 return removeNewLines(str).replace(/\s/g, '').replace(/'/g, '"').toLowerCase(); 16} 17 18function createBasicTest(html: string, expected: string, options?: SAXParserOptions) { 19 return async function (): Promise<void> { 20 //NOTE: the idea of the test is to serialize back given HTML using SAXParser handlers 21 let actual = ''; 22 const parser = new SAXParser(options); 23 24 parser.on('doctype', ({ name, publicId, systemId }) => { 25 actual += `<!DOCTYPE ${name}`; 26 27 if (publicId !== null) { 28 actual += ` PUBLIC "${publicId}"`; 29 } else if (systemId !== null) { 30 actual += ' SYSTEM'; 31 } 32 33 if (systemId !== null) { 34 actual += ` "${systemId}"`; 35 } 36 37 actual += '>'; 38 }); 39 40 parser.on('startTag', ({ tagName, attrs, selfClosing }) => { 41 actual += `<${tagName}`; 42 for (const attr of attrs) { 43 actual += ` ${attr.name}="${attr.value}"`; 44 } 45 actual += selfClosing ? '/>' : '>'; 46 }); 47 48 parser.on('endTag', ({ tagName }) => { 49 actual += `</${tagName}>`; 50 }); 51 52 parser.on('text', ({ text }) => { 53 actual += text; 54 }); 55 56 parser.on('comment', ({ text }) => { 57 actual += `<!--${text}-->`; 58 }); 59 60 writeChunkedToStream(html, parser); 61 62 await finished(parser); 63 64 expected = sanitizeForComparison(expected); 65 actual = sanitizeForComparison(actual); 66 67 //NOTE: use ok assertion, so output will not be polluted by the whole content of the strings 68 assert.ok(actual === expected, getStringDiffMsg(actual, expected)); 69 }; 70} 71 72const hugePage = new URL('../../../test/data/huge-page/huge-page.html', import.meta.url); 73 74describe('SAX parser', () => { 75 //Basic tests 76 for (const [idx, data] of loadSAXParserTestData().entries()) 77 it(`${idx + 1}.${data.name}`, createBasicTest(data.src, data.expected)); 78 79 it('Piping and .stop()', async () => { 80 const parser = new SAXParser(); 81 const writable = new WritableStreamStub(); 82 let handlerCallCount = 0; 83 84 function handler(): void { 85 handlerCallCount++; 86 87 if (handlerCallCount === 10) { 88 parser.stop(); 89 } 90 } 91 92 fs.createReadStream(hugePage, 'utf8').pipe(parser).pipe(writable); 93 94 parser.on('startTag', handler); 95 parser.on('endTag', handler); 96 parser.on('doctype', handler); 97 parser.on('comment', handler); 98 parser.on('text', handler); 99 100 await finished(writable); 101 102 const expected = fs.readFileSync(hugePage).toString(); 103 104 assert.strictEqual(handlerCallCount, 10); 105 assert.strictEqual(writable.writtenData, expected); 106 }); 107 108 it('Parser silently exits on big files (GH-97)', () => { 109 const parser = new SAXParser(); 110 111 fs.createReadStream(hugePage, 'utf8').pipe(parser); 112 113 //NOTE: This is a smoke test - in case of regression it will fail with timeout. 114 return finished(parser); 115 }); 116 117 it('Last text chunk must be flushed (GH-271)', async () => { 118 const parser = new SAXParser(); 119 let foundText = false; 120 121 parser.on('text', ({ text }) => { 122 foundText = true; 123 assert.strictEqual(text, 'text'); 124 }); 125 126 parser.write('text'); 127 parser.end(); 128 129 await finished(parser); 130 131 assert.ok(foundText); 132 }); 133 134 it('Should not accept binary input (GH-269)', () => { 135 const stream = new SAXParser(); 136 const buf = Buffer.from('test'); 137 138 assert.throws(() => stream.write(buf), TypeError); 139 }); 140}); 141