• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1import * as assert from 'node:assert';
2import * as fs from 'node:fs';
3import type { SAXParserOptions } from '../lib/index.js';
4import { SAXParser } from '../lib/index.js';
5import { loadSAXParserTestData } from 'parse5-test-utils/utils/load-sax-parser-test-data.js';
6import {
7    finished,
8    getStringDiffMsg,
9    writeChunkedToStream,
10    removeNewLines,
11    WritableStreamStub,
12} from 'parse5-test-utils/utils/common.js';
13
14function sanitizeForComparison(str: string): string {
15    return removeNewLines(str).replace(/\s/g, '').replace(/'/g, '"').toLowerCase();
16}
17
18function createBasicTest(html: string, expected: string, options?: SAXParserOptions) {
19    return async function (): Promise<void> {
20        //NOTE: the idea of the test is to serialize back given HTML using SAXParser handlers
21        let actual = '';
22        const parser = new SAXParser(options);
23
24        parser.on('doctype', ({ name, publicId, systemId }) => {
25            actual += `<!DOCTYPE ${name}`;
26
27            if (publicId !== null) {
28                actual += ` PUBLIC "${publicId}"`;
29            } else if (systemId !== null) {
30                actual += ' SYSTEM';
31            }
32
33            if (systemId !== null) {
34                actual += ` "${systemId}"`;
35            }
36
37            actual += '>';
38        });
39
40        parser.on('startTag', ({ tagName, attrs, selfClosing }) => {
41            actual += `<${tagName}`;
42            for (const attr of attrs) {
43                actual += ` ${attr.name}="${attr.value}"`;
44            }
45            actual += selfClosing ? '/>' : '>';
46        });
47
48        parser.on('endTag', ({ tagName }) => {
49            actual += `</${tagName}>`;
50        });
51
52        parser.on('text', ({ text }) => {
53            actual += text;
54        });
55
56        parser.on('comment', ({ text }) => {
57            actual += `<!--${text}-->`;
58        });
59
60        writeChunkedToStream(html, parser);
61
62        await finished(parser);
63
64        expected = sanitizeForComparison(expected);
65        actual = sanitizeForComparison(actual);
66
67        //NOTE: use ok assertion, so output will not be polluted by the whole content of the strings
68        assert.ok(actual === expected, getStringDiffMsg(actual, expected));
69    };
70}
71
72const hugePage = new URL('../../../test/data/huge-page/huge-page.html', import.meta.url);
73
74describe('SAX parser', () => {
75    //Basic tests
76    for (const [idx, data] of loadSAXParserTestData().entries())
77        it(`${idx + 1}.${data.name}`, createBasicTest(data.src, data.expected));
78
79    it('Piping and .stop()', async () => {
80        const parser = new SAXParser();
81        const writable = new WritableStreamStub();
82        let handlerCallCount = 0;
83
84        function handler(): void {
85            handlerCallCount++;
86
87            if (handlerCallCount === 10) {
88                parser.stop();
89            }
90        }
91
92        fs.createReadStream(hugePage, 'utf8').pipe(parser).pipe(writable);
93
94        parser.on('startTag', handler);
95        parser.on('endTag', handler);
96        parser.on('doctype', handler);
97        parser.on('comment', handler);
98        parser.on('text', handler);
99
100        await finished(writable);
101
102        const expected = fs.readFileSync(hugePage).toString();
103
104        assert.strictEqual(handlerCallCount, 10);
105        assert.strictEqual(writable.writtenData, expected);
106    });
107
108    it('Parser silently exits on big files (GH-97)', () => {
109        const parser = new SAXParser();
110
111        fs.createReadStream(hugePage, 'utf8').pipe(parser);
112
113        //NOTE: This is a smoke test - in case of regression it will fail with timeout.
114        return finished(parser);
115    });
116
117    it('Last text chunk must be flushed (GH-271)', async () => {
118        const parser = new SAXParser();
119        let foundText = false;
120
121        parser.on('text', ({ text }) => {
122            foundText = true;
123            assert.strictEqual(text, 'text');
124        });
125
126        parser.write('text');
127        parser.end();
128
129        await finished(parser);
130
131        assert.ok(foundText);
132    });
133
134    it('Should not accept binary input (GH-269)', () => {
135        const stream = new SAXParser();
136        const buf = Buffer.from('test');
137
138        assert.throws(() => stream.write(buf), TypeError);
139    });
140
141    it('Should treat NULL characters as normal text', async () => {
142        const parser = new SAXParser();
143        let foundText = false;
144
145        parser.on('text', ({ text }) => {
146            foundText = true;
147            assert.strictEqual(text, '\0');
148        });
149
150        parser.write('\0');
151        parser.end();
152
153        await finished(parser);
154
155        assert.strictEqual(foundText, true);
156    });
157});
158