• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1import * as assert from 'node:assert';
2import { outdent } from 'outdent';
3import { RewritingStream } from '../lib/index.js';
4import { loadSAXParserTestData } from 'parse5-test-utils/utils/load-sax-parser-test-data.js';
5import {
6    finished,
7    getStringDiffMsg,
8    writeChunkedToStream,
9    WritableStreamStub,
10} from 'parse5-test-utils/utils/common.js';
11
12const srcHtml = outdent`
13  <!DOCTYPE html "">
14  <html>
15      <!-- comment1 -->
16      <head /// 123>
17      </head>
18      <!-- comment2 -->
19      <body =123>
20          <div>Hey ya</div>
21      </body>
22  </html>
23`;
24
25const LONG_TEXT = 'a'.repeat((1 << 16) + 1);
26const LONG_TEXT_WITH_COMMENT = `${'a'.repeat((1 << 16) - 5)}<!-- comment -->`;
27
28function createRewriterTest({
29    src,
30    expected,
31    assignTokenHandlers = (): void => {
32        /* Ignore */
33    },
34}: {
35    src: string;
36    expected: string;
37    assignTokenHandlers?: (rewriter: RewritingStream) => void;
38}) {
39    return async (): Promise<void> => {
40        const rewriter = new RewritingStream();
41        const writable = new WritableStreamStub();
42
43        rewriter.pipe(writable);
44
45        assignTokenHandlers(rewriter);
46        writeChunkedToStream(src, rewriter);
47
48        await finished(writable);
49
50        assert.ok(writable.writtenData === expected, getStringDiffMsg(writable.writtenData, expected));
51    };
52}
53
54describe('RewritingStream', () => {
55    // Raw data tests
56    for (const [idx, data] of loadSAXParserTestData().entries()) {
57        // NOTE: if we don't have any event handlers assigned, stream should use raw
58        // data for the serialization, so serialized content should identical to the original.
59        it(
60            `Raw token serialization - ${idx + 1}.${data.name}`,
61            createRewriterTest({
62                src: data.src,
63                expected: data.src,
64            })
65        );
66    }
67
68    it(
69        'rewrite start tags',
70        createRewriterTest({
71            src: srcHtml,
72            expected: outdent`
73              <!DOCTYPE html "">
74              <html>
75                  <!-- comment1 -->
76                  <body 123="">
77                  </head>
78                  <!-- comment2 -->
79                  <head =123="">
80                      <div>Hey ya</div>
81                  </body>
82              </html>
83            `,
84            assignTokenHandlers: (rewriter) => {
85                rewriter.on('startTag', (token) => {
86                    if (token.tagName === 'head') {
87                        token.tagName = 'body';
88                    } else if (token.tagName === 'body') {
89                        token.tagName = 'head';
90                    }
91
92                    rewriter.emitStartTag(token);
93                });
94            },
95        })
96    );
97
98    it(
99        'rewrite end tags',
100        createRewriterTest({
101            src: srcHtml,
102            expected: outdent`
103              <!DOCTYPE html "">
104              <html>
105                  <!-- comment1 -->
106                  <head /// 123>
107                  </rewritten>
108                  <!-- comment2 -->
109                  <body =123>
110                      <div>Hey ya</rewritten>
111                  </rewritten>
112              </rewritten>
113            `,
114            assignTokenHandlers: (rewriter) => {
115                rewriter.on('endTag', (token) => {
116                    token.tagName = 'rewritten';
117
118                    rewriter.emitEndTag(token);
119                });
120            },
121        })
122    );
123
124    it(
125        'rewrite text',
126        createRewriterTest({
127            src: srcHtml,
128            expected: outdent`
129              <!DOCTYPE html "">
130              <html>
131                  <!-- comment1 -->
132                  <head /// 123>
133                  </head>
134                  <!-- comment2 -->
135                  <body =123>
136                      <div>42</div>
137                  </body>
138              </html>
139            `,
140            assignTokenHandlers: (rewriter) => {
141                rewriter.on('text', (token) => {
142                    if (token.text.trim().length > 0) {
143                        token.text = '42';
144                    }
145
146                    rewriter.emitText(token);
147                });
148            },
149        })
150    );
151
152    it(
153        'rewrite comment',
154        createRewriterTest({
155            src: srcHtml,
156            expected: outdent`
157              <!DOCTYPE html "">
158              <html>
159                  <!--42-->
160                  <head /// 123>
161                  </head>
162                  <!--42-->
163                  <body =123>
164                      <div>Hey ya</div>
165                  </body>
166              </html>
167            `,
168            assignTokenHandlers: (rewriter) => {
169                rewriter.on('comment', (token) => {
170                    token.text = '42';
171
172                    rewriter.emitComment(token);
173                });
174            },
175        })
176    );
177
178    it(
179        'rewrite doctype',
180        createRewriterTest({
181            src: srcHtml,
182            expected: outdent`
183              <!DOCTYPE html PUBLIC "42" "hey">
184              <html>
185                  <!-- comment1 -->
186                  <head /// 123>
187                  </head>
188                  <!-- comment2 -->
189                  <body =123>
190                      <div>Hey ya</div>
191                  </body>
192              </html>
193            `,
194            assignTokenHandlers: (rewriter) => {
195                rewriter.on('doctype', (token) => {
196                    token.publicId = '42';
197                    token.systemId = 'hey';
198
199                    rewriter.emitDoctype(token);
200                });
201            },
202        })
203    );
204
205    it(
206        'emit multiple',
207        createRewriterTest({
208            src: srcHtml,
209            expected: outdent`
210              <!DOCTYPE html "">
211              <wrap><html></wrap>
212                  <!-- comment1 -->
213                  <wrap><head 123=""></wrap>
214                  </head>
215                  <!-- comment2 -->
216                  <wrap><body =123=""></wrap>
217                      <wrap><div></wrap>Hey ya</div>
218                  </body>
219              </html>
220            `,
221            assignTokenHandlers: (rewriter) => {
222                rewriter.on('startTag', (token) => {
223                    rewriter.emitRaw('<wrap>');
224                    rewriter.emitStartTag(token);
225                    rewriter.emitRaw('</wrap>');
226                });
227            },
228        })
229    );
230
231    it(
232        'rewrite raw',
233        createRewriterTest({
234            src: srcHtml,
235            expected: outdent`
236              <!DOCTYPE html "">42
237              <html>42
238                  <!-- comment1 -->42
239                  <head /// 123>42
240                  </head>42
241                  <!-- comment2 -->42
242                  <body =123>42
243                      <div>42Hey ya</div>42
244                  </body>42
245              </html>42
246            `,
247            assignTokenHandlers: (rewriter) => {
248                const rewriteRaw = (_: unknown, raw: string): void => {
249                    rewriter.emitRaw(`${raw}42`);
250                };
251
252                rewriter
253                    .on('doctype', rewriteRaw)
254                    .on('startTag', rewriteRaw)
255                    .on('endTag', rewriteRaw)
256                    .on('comment', rewriteRaw);
257            },
258        })
259    );
260
261    it(
262        'Should escape entities in attributes and text',
263        createRewriterTest({
264            src: outdent`
265              <!DOCTYPE html "">
266              <html>
267                  <head foo='bar"baz"'>
268                  </head>
269                  <body>
270                      <div>foo&amp;bar</div>
271                  </body>
272              </html>
273            `,
274            expected: outdent`
275              <!DOCTYPE html "">
276              <html>
277                  <head foo="bar&quot;baz&quot;">
278                  </head>
279                  <body>
280                      <div>foo&amp;bar</div>
281                  </body>
282              </html>
283            `,
284            assignTokenHandlers: (rewriter) => {
285                rewriter.on('startTag', (token) => rewriter.emitStartTag(token));
286                rewriter.on('text', (token) => rewriter.emitText(token));
287            },
288        })
289    );
290
291    it('Last text chunk must be flushed (GH-271)', async () => {
292        const parser = new RewritingStream();
293        let foundText = false;
294
295        parser.on('text', ({ text }) => {
296            foundText = true;
297            assert.strictEqual(text, 'text');
298        });
299
300        parser.write('text');
301        parser.end();
302
303        await finished(parser);
304
305        assert.ok(foundText);
306    });
307
308    it('Should not accept binary input (GH-269)', () => {
309        const stream = new RewritingStream();
310        const buf = Buffer.from('test');
311
312        assert.throws(() => stream.write(buf), TypeError);
313    });
314
315    it(
316        'Should pass long text correctly (GH-292)',
317        createRewriterTest({
318            src: LONG_TEXT,
319            expected: LONG_TEXT,
320        })
321    );
322
323    it(
324        'Should emit comment after text correctly',
325        createRewriterTest({
326            src: LONG_TEXT_WITH_COMMENT,
327            expected: LONG_TEXT_WITH_COMMENT,
328        })
329    );
330
331    it(
332        'Should emit text in script without escaping (GH-339)',
333        createRewriterTest({
334            src: '<script></script>',
335            expected: '<script>foo && bar</script>',
336            assignTokenHandlers: (rewriter) => {
337                // On a script tag, emit the text without escaping
338                rewriter.on('startTag', (token) => {
339                    rewriter.emitStartTag(token);
340                    if (token.tagName === 'script') {
341                        rewriter.emitText({ text: 'foo && bar' });
342                    }
343                });
344            },
345        })
346    );
347});
348