1import * as assert from 'node:assert'; 2import { outdent } from 'outdent'; 3import { RewritingStream } from '../lib/index.js'; 4import { loadSAXParserTestData } from 'parse5-test-utils/utils/load-sax-parser-test-data.js'; 5import { 6 finished, 7 getStringDiffMsg, 8 writeChunkedToStream, 9 WritableStreamStub, 10} from 'parse5-test-utils/utils/common.js'; 11 12const srcHtml = outdent` 13 <!DOCTYPE html ""> 14 <html> 15 <!-- comment1 --> 16 <head /// 123> 17 </head> 18 <!-- comment2 --> 19 <body =123> 20 <div>Hey ya</div> 21 </body> 22 </html> 23`; 24 25const LONG_TEXT = 'a'.repeat((1 << 16) + 1); 26const LONG_TEXT_WITH_COMMENT = `${'a'.repeat((1 << 16) - 5)}<!-- comment -->`; 27 28function createRewriterTest({ 29 src, 30 expected, 31 assignTokenHandlers = (): void => { 32 /* Ignore */ 33 }, 34}: { 35 src: string; 36 expected: string; 37 assignTokenHandlers?: (rewriter: RewritingStream) => void; 38}) { 39 return async (): Promise<void> => { 40 const rewriter = new RewritingStream(); 41 const writable = new WritableStreamStub(); 42 43 rewriter.pipe(writable); 44 45 assignTokenHandlers(rewriter); 46 writeChunkedToStream(src, rewriter); 47 48 await finished(writable); 49 50 assert.ok(writable.writtenData === expected, getStringDiffMsg(writable.writtenData, expected)); 51 }; 52} 53 54describe('RewritingStream', () => { 55 // Raw data tests 56 for (const [idx, data] of loadSAXParserTestData().entries()) { 57 // NOTE: if we don't have any event handlers assigned, stream should use raw 58 // data for the serialization, so serialized content should identical to the original. 59 it( 60 `Raw token serialization - ${idx + 1}.${data.name}`, 61 createRewriterTest({ 62 src: data.src, 63 expected: data.src, 64 }) 65 ); 66 } 67 68 it( 69 'rewrite start tags', 70 createRewriterTest({ 71 src: srcHtml, 72 expected: outdent` 73 <!DOCTYPE html ""> 74 <html> 75 <!-- comment1 --> 76 <body 123=""> 77 </head> 78 <!-- comment2 --> 79 <head =123=""> 80 <div>Hey ya</div> 81 </body> 82 </html> 83 `, 84 assignTokenHandlers: (rewriter) => { 85 rewriter.on('startTag', (token) => { 86 if (token.tagName === 'head') { 87 token.tagName = 'body'; 88 } else if (token.tagName === 'body') { 89 token.tagName = 'head'; 90 } 91 92 rewriter.emitStartTag(token); 93 }); 94 }, 95 }) 96 ); 97 98 it( 99 'rewrite end tags', 100 createRewriterTest({ 101 src: srcHtml, 102 expected: outdent` 103 <!DOCTYPE html ""> 104 <html> 105 <!-- comment1 --> 106 <head /// 123> 107 </rewritten> 108 <!-- comment2 --> 109 <body =123> 110 <div>Hey ya</rewritten> 111 </rewritten> 112 </rewritten> 113 `, 114 assignTokenHandlers: (rewriter) => { 115 rewriter.on('endTag', (token) => { 116 token.tagName = 'rewritten'; 117 118 rewriter.emitEndTag(token); 119 }); 120 }, 121 }) 122 ); 123 124 it( 125 'rewrite text', 126 createRewriterTest({ 127 src: srcHtml, 128 expected: outdent` 129 <!DOCTYPE html ""> 130 <html> 131 <!-- comment1 --> 132 <head /// 123> 133 </head> 134 <!-- comment2 --> 135 <body =123> 136 <div>42</div> 137 </body> 138 </html> 139 `, 140 assignTokenHandlers: (rewriter) => { 141 rewriter.on('text', (token) => { 142 if (token.text.trim().length > 0) { 143 token.text = '42'; 144 } 145 146 rewriter.emitText(token); 147 }); 148 }, 149 }) 150 ); 151 152 it( 153 'rewrite comment', 154 createRewriterTest({ 155 src: srcHtml, 156 expected: outdent` 157 <!DOCTYPE html ""> 158 <html> 159 <!--42--> 160 <head /// 123> 161 </head> 162 <!--42--> 163 <body =123> 164 <div>Hey ya</div> 165 </body> 166 </html> 167 `, 168 assignTokenHandlers: (rewriter) => { 169 rewriter.on('comment', (token) => { 170 token.text = '42'; 171 172 rewriter.emitComment(token); 173 }); 174 }, 175 }) 176 ); 177 178 it( 179 'rewrite doctype', 180 createRewriterTest({ 181 src: srcHtml, 182 expected: outdent` 183 <!DOCTYPE html PUBLIC "42" "hey"> 184 <html> 185 <!-- comment1 --> 186 <head /// 123> 187 </head> 188 <!-- comment2 --> 189 <body =123> 190 <div>Hey ya</div> 191 </body> 192 </html> 193 `, 194 assignTokenHandlers: (rewriter) => { 195 rewriter.on('doctype', (token) => { 196 token.publicId = '42'; 197 token.systemId = 'hey'; 198 199 rewriter.emitDoctype(token); 200 }); 201 }, 202 }) 203 ); 204 205 it( 206 'emit multiple', 207 createRewriterTest({ 208 src: srcHtml, 209 expected: outdent` 210 <!DOCTYPE html ""> 211 <wrap><html></wrap> 212 <!-- comment1 --> 213 <wrap><head 123=""></wrap> 214 </head> 215 <!-- comment2 --> 216 <wrap><body =123=""></wrap> 217 <wrap><div></wrap>Hey ya</div> 218 </body> 219 </html> 220 `, 221 assignTokenHandlers: (rewriter) => { 222 rewriter.on('startTag', (token) => { 223 rewriter.emitRaw('<wrap>'); 224 rewriter.emitStartTag(token); 225 rewriter.emitRaw('</wrap>'); 226 }); 227 }, 228 }) 229 ); 230 231 it( 232 'rewrite raw', 233 createRewriterTest({ 234 src: srcHtml, 235 expected: outdent` 236 <!DOCTYPE html "">42 237 <html>42 238 <!-- comment1 -->42 239 <head /// 123>42 240 </head>42 241 <!-- comment2 -->42 242 <body =123>42 243 <div>42Hey ya</div>42 244 </body>42 245 </html>42 246 `, 247 assignTokenHandlers: (rewriter) => { 248 const rewriteRaw = (_: unknown, raw: string): void => { 249 rewriter.emitRaw(`${raw}42`); 250 }; 251 252 rewriter 253 .on('doctype', rewriteRaw) 254 .on('startTag', rewriteRaw) 255 .on('endTag', rewriteRaw) 256 .on('comment', rewriteRaw); 257 }, 258 }) 259 ); 260 261 it( 262 'Should escape entities in attributes and text', 263 createRewriterTest({ 264 src: outdent` 265 <!DOCTYPE html ""> 266 <html> 267 <head foo='bar"baz"'> 268 </head> 269 <body> 270 <div>foo&bar</div> 271 </body> 272 </html> 273 `, 274 expected: outdent` 275 <!DOCTYPE html ""> 276 <html> 277 <head foo="bar"baz""> 278 </head> 279 <body> 280 <div>foo&bar</div> 281 </body> 282 </html> 283 `, 284 assignTokenHandlers: (rewriter) => { 285 rewriter.on('startTag', (token) => rewriter.emitStartTag(token)); 286 rewriter.on('text', (token) => rewriter.emitText(token)); 287 }, 288 }) 289 ); 290 291 it('Last text chunk must be flushed (GH-271)', async () => { 292 const parser = new RewritingStream(); 293 let foundText = false; 294 295 parser.on('text', ({ text }) => { 296 foundText = true; 297 assert.strictEqual(text, 'text'); 298 }); 299 300 parser.write('text'); 301 parser.end(); 302 303 await finished(parser); 304 305 assert.ok(foundText); 306 }); 307 308 it('Should not accept binary input (GH-269)', () => { 309 const stream = new RewritingStream(); 310 const buf = Buffer.from('test'); 311 312 assert.throws(() => stream.write(buf), TypeError); 313 }); 314 315 it( 316 'Should pass long text correctly (GH-292)', 317 createRewriterTest({ 318 src: LONG_TEXT, 319 expected: LONG_TEXT, 320 }) 321 ); 322 323 it( 324 'Should emit comment after text correctly', 325 createRewriterTest({ 326 src: LONG_TEXT_WITH_COMMENT, 327 expected: LONG_TEXT_WITH_COMMENT, 328 }) 329 ); 330 331 it( 332 'Should emit text in script without escaping (GH-339)', 333 createRewriterTest({ 334 src: '<script></script>', 335 expected: '<script>foo && bar</script>', 336 assignTokenHandlers: (rewriter) => { 337 // On a script tag, emit the text without escaping 338 rewriter.on('startTag', (token) => { 339 rewriter.emitStartTag(token); 340 if (token.tagName === 'script') { 341 rewriter.emitText({ text: 'foo && bar' }); 342 } 343 }); 344 }, 345 }) 346 ); 347}); 348