1'use strict'; 2 3const assert = require('assert'); 4const fs = require('fs'); 5const path = require('path'); 6const SAXParser = require('../lib'); 7const loadSAXParserTestData = require('../../../test/utils/load-sax-parser-test-data'); 8const { 9 getStringDiffMsg, 10 writeChunkedToStream, 11 removeNewLines, 12 WritableStreamStub 13} = require('../../../test/utils/common'); 14 15function sanitizeForComparison(str) { 16 return removeNewLines(str) 17 .replace(/\s/g, '') 18 .replace(/'/g, '"') 19 .toLowerCase(); 20} 21 22function createBasicTest(html, expected, options) { 23 return function() { 24 //NOTE: the idea of the test is to serialize back given HTML using SAXParser handlers 25 let actual = ''; 26 const parser = new SAXParser(options); 27 28 parser.on('doctype', ({ name, publicId, systemId }) => { 29 actual += '<!DOCTYPE ' + name; 30 31 if (publicId !== null) { 32 actual += ' PUBLIC "' + publicId + '"'; 33 } else if (systemId !== null) { 34 actual += ' SYSTEM'; 35 } 36 37 if (systemId !== null) { 38 actual += ' "' + systemId + '"'; 39 } 40 41 actual += '>'; 42 }); 43 44 parser.on('startTag', ({ tagName, attrs, selfClosing }) => { 45 actual += '<' + tagName; 46 47 if (attrs.length) { 48 for (let i = 0; i < attrs.length; i++) { 49 actual += ' ' + attrs[i].name + '="' + attrs[i].value + '"'; 50 } 51 } 52 53 actual += selfClosing ? '/>' : '>'; 54 }); 55 56 parser.on('endTag', ({ tagName }) => { 57 actual += '</' + tagName + '>'; 58 }); 59 60 parser.on('text', ({ text }) => { 61 actual += text; 62 }); 63 64 parser.on('comment', ({ text }) => { 65 actual += '<!--' + text + '-->'; 66 }); 67 68 parser.once('finish', () => { 69 expected = sanitizeForComparison(expected); 70 actual = sanitizeForComparison(actual); 71 72 //NOTE: use ok assertion, so output will not be polluted by the whole content of the strings 73 assert.ok(actual === expected, getStringDiffMsg(actual, expected)); 74 }); 75 76 writeChunkedToStream(html, parser); 77 }; 78} 79 80//Basic tests 81loadSAXParserTestData().forEach( 82 (test, idx) => (exports[`SAX - ${idx + 1}.${test.name}`] = createBasicTest(test.src, test.expected, test.options)) 83); 84 85exports['SAX - Piping and .stop()'] = function(done) { 86 const parser = new SAXParser(); 87 const writable = new WritableStreamStub(); 88 let handlerCallCount = 0; 89 90 const handler = function() { 91 handlerCallCount++; 92 93 if (handlerCallCount === 10) { 94 parser.stop(); 95 } 96 }; 97 98 fs.createReadStream(path.join(__dirname, '../../../test/data/huge-page/huge-page.html'), 'utf8') 99 .pipe(parser) 100 .pipe(writable); 101 102 parser.on('startTag', handler); 103 parser.on('endTag', handler); 104 parser.on('doctype', handler); 105 parser.on('comment', handler); 106 parser.on('text', handler); 107 108 writable.once('finish', () => { 109 const expected = fs 110 .readFileSync(path.join(__dirname, '../../../test/data/huge-page/huge-page.html')) 111 .toString(); 112 113 assert.strictEqual(handlerCallCount, 10); 114 assert.strictEqual(writable.writtenData, expected); 115 done(); 116 }); 117}; 118 119exports['Regression - SAX - Parser silently exits on big files (GH-97)'] = function(done) { 120 const parser = new SAXParser(); 121 122 fs.createReadStream(path.join(__dirname, '../../../test/data/huge-page/huge-page.html'), 'utf8').pipe(parser); 123 124 //NOTE: This is a smoke test - in case of regression it will fail with timeout. 125 parser.once('finish', done); 126}; 127 128exports['Regression - SAX - Last text chunk must be flushed (GH-271)'] = done => { 129 const parser = new SAXParser(); 130 let foundText = false; 131 132 parser.on('text', ({ text }) => { 133 foundText = true; 134 assert.strictEqual(text, 'text'); 135 }); 136 137 parser.once('finish', () => { 138 assert.ok(foundText); 139 done(); 140 }); 141 142 parser.write('text'); 143 parser.end(); 144}; 145 146exports['Regression - SAX - Should not accept binary input (GH-269)'] = () => { 147 const stream = new SAXParser(); 148 const buf = Buffer.from('test'); 149 150 assert.throws(() => stream.write(buf), TypeError); 151}; 152