1'use strict'; 2 3const assert = require('assert'); 4const Stream = require('stream'); 5 6 7/* 8 * This filter consumes a stream of characters and emits one string per line. 9 */ 10class LineSplitter extends Stream { 11 constructor() { 12 super(); 13 this.buffer = ''; 14 this.writable = true; 15 } 16 17 write(data) { 18 const lines = (this.buffer + data).split(/\r\n|\n\r|\n|\r/); 19 for (let i = 0; i < lines.length - 1; i++) { 20 this.emit('data', lines[i]); 21 } 22 this.buffer = lines[lines.length - 1]; 23 return true; 24 } 25 26 end(data) { 27 this.write(data || ''); 28 if (this.buffer) { 29 this.emit('data', this.buffer); 30 } 31 this.emit('end'); 32 } 33} 34 35 36/* 37 * This filter consumes lines and emits paragraph objects. 38 */ 39class ParagraphParser extends Stream { 40 constructor() { 41 super(); 42 this.blockIsLicenseBlock = false; 43 this.writable = true; 44 this.resetBlock(false); 45 } 46 47 write(data) { 48 this.parseLine(data + ''); 49 return true; 50 } 51 52 end(data) { 53 if (data) 54 this.parseLine(data + ''); 55 this.flushParagraph(); 56 this.emit('end'); 57 } 58 59 resetParagraph() { 60 this.paragraphLineIndent = -1; 61 62 this.paragraph = { 63 li: '', 64 inLicenseBlock: this.blockIsLicenseBlock, 65 lines: [] 66 }; 67 } 68 69 resetBlock(isLicenseBlock) { 70 this.blockIsLicenseBlock = isLicenseBlock; 71 this.blockHasCStyleComment = false; 72 this.resetParagraph(); 73 } 74 75 flushParagraph() { 76 if (this.paragraph.lines.length || this.paragraph.li) { 77 this.emit('data', this.paragraph); 78 } 79 this.resetParagraph(); 80 } 81 82 parseLine(line) { 83 // Strip trailing whitespace 84 line = line.trimRight(); 85 86 // Detect block separator 87 if (/^\s*(=|"){3,}\s*$/.test(line)) { 88 this.flushParagraph(); 89 this.resetBlock(!this.blockIsLicenseBlock); 90 return; 91 } 92 93 // Strip comments around block 94 if (this.blockIsLicenseBlock) { 95 if (!this.blockHasCStyleComment) 96 this.blockHasCStyleComment = /^\s*(\/\*)/.test(line); 97 if (this.blockHasCStyleComment) { 98 const prev = line; 99 line = line.replace(/^(\s*?)(?:\s?\*\/|\/\*\s|\s\*\s?)/, '$1'); 100 if (prev === line) 101 line = line.replace(/^\s{2}/, ''); 102 if (/\*\//.test(prev)) 103 this.blockHasCStyleComment = false; 104 } else { 105 // Strip C++ and perl style comments. 106 line = line.replace(/^(\s*)(?:\/\/\s?|#\s?)/, '$1'); 107 } 108 } 109 110 // Detect blank line (paragraph separator) 111 if (!/\S/.test(line)) { 112 this.flushParagraph(); 113 return; 114 } 115 116 // Detect separator "lines" within a block. These mark a paragraph break 117 // and are stripped from the output. 118 if (/^\s*[=*-]{5,}\s*$/.test(line)) { 119 this.flushParagraph(); 120 return; 121 } 122 123 // Find out indentation level and the start of a lied or numbered list; 124 const result = /^(\s*)(\d+\.|\*|-)?\s*/.exec(line); 125 assert.ok(result); 126 // The number of characters that will be stripped from the beginning of 127 // the line. 128 const lineStripLength = result[0].length; 129 // The indentation size that will be used to detect indentation jumps. 130 // Fudge by 1 space. 131 const lineIndent = Math.floor(lineStripLength / 2) * 2; 132 // The indentation level that will be exported 133 const level = Math.floor(result[1].length / 2); 134 // The list indicator that precedes the actual content, if any. 135 const lineLi = result[2]; 136 137 // Flush the paragraph when there is a li or an indentation jump 138 if (lineLi || (lineIndent !== this.paragraphLineIndent && 139 this.paragraphLineIndent !== -1)) { 140 this.flushParagraph(); 141 this.paragraph.li = lineLi; 142 } 143 144 // Set the paragraph indent that we use to detect indentation jumps. When 145 // we just detected a list indicator, wait 146 // for the next line to arrive before setting this. 147 if (!lineLi && this.paragraphLineIndent !== -1) { 148 this.paragraphLineIndent = lineIndent; 149 } 150 151 // Set the output indent level if it has not been set yet. 152 if (this.paragraph.level === undefined) 153 this.paragraph.level = level; 154 155 // Strip leading whitespace and li. 156 line = line.slice(lineStripLength); 157 158 if (line) 159 this.paragraph.lines.push(line); 160 } 161} 162 163 164/* 165 * This filter consumes paragraph objects and emits modified paragraph objects. 166 * The lines within the paragraph are unwrapped where appropriate. It also 167 * replaces multiple consecutive whitespace characters by a single one. 168 */ 169class Unwrapper extends Stream { 170 constructor() { 171 super(); 172 this.writable = true; 173 } 174 175 write(paragraph) { 176 const lines = paragraph.lines; 177 const breakAfter = []; 178 let i; 179 180 for (i = 0; i < lines.length - 1; i++) { 181 const line = lines[i]; 182 183 // When a line is really short, the line was probably kept separate for a 184 // reason. 185 if (line.length < 50) { 186 // If the first word on the next line really didn't fit after the line, 187 // it probably was just ordinary wrapping after all. 188 const nextFirstWordLength = lines[i + 1].replace(/\s.*$/, '').length; 189 if (line.length + nextFirstWordLength < 60) { 190 breakAfter[i] = true; 191 } 192 } 193 } 194 195 for (i = 0; i < lines.length - 1;) { 196 if (!breakAfter[i]) { 197 lines[i] += ` ${lines.splice(i + 1, 1)[0]}`; 198 } else { 199 i++; 200 } 201 } 202 203 for (i = 0; i < lines.length; i++) { 204 // Replace multiple whitespace characters by a single one, and strip 205 // trailing whitespace. 206 lines[i] = lines[i].replace(/\s+/g, ' ').replace(/\s+$/, ''); 207 } 208 209 this.emit('data', paragraph); 210 } 211 212 end(data) { 213 if (data) 214 this.write(data); 215 this.emit('end'); 216 } 217} 218 219function rtfEscape(string) { 220 function toHex(number, length) { 221 return (~~number).toString(16).padStart(length, '0'); 222 } 223 224 return string 225 .replace(/[\\{}]/g, (m) => `\\${m}`) 226 .replace(/\t/g, () => '\\tab ') 227 // eslint-disable-next-line no-control-regex 228 .replace(/[\x00-\x1f\x7f-\xff]/g, (m) => `\\'${toHex(m.charCodeAt(0), 2)}`) 229 .replace(/\ufeff/g, '') 230 .replace(/[\u0100-\uffff]/g, (m) => `\\u${toHex(m.charCodeAt(0), 4)}?`); 231} 232 233/* 234 * This filter generates an rtf document from a stream of paragraph objects. 235 */ 236class RtfGenerator extends Stream { 237 constructor() { 238 super(); 239 this.didWriteAnything = false; 240 this.writable = true; 241 } 242 243 write({ li, level, lines, inLicenseBlock: lic }) { 244 if (!this.didWriteAnything) { 245 this.emitHeader(); 246 this.didWriteAnything = true; 247 } 248 249 if (li) 250 level++; 251 252 let rtf = '\\pard\\sa150\\sl300\\slmult1'; 253 if (level > 0) 254 rtf += `\\li${level * 240}`; 255 if (li) 256 rtf += `\\tx${level * 240}\\fi-240`; 257 if (lic) 258 rtf += '\\ri240'; 259 if (!lic) 260 rtf += '\\b'; 261 if (li) 262 rtf += ` ${li}\\tab`; 263 rtf += ` ${lines.map(rtfEscape).join('\\line ')}`; 264 if (!lic) 265 rtf += '\\b0'; 266 rtf += '\\par\n'; 267 268 this.emit('data', rtf); 269 } 270 271 end(data) { 272 if (data) 273 this.write(data); 274 if (this.didWriteAnything) 275 this.emitFooter(); 276 this.emit('end'); 277 } 278 279 emitHeader() { 280 this.emit('data', '{\\rtf1\\ansi\\ansicpg1252\\uc1\\deff0\\deflang1033' + 281 '{\\fonttbl{\\f0\\fswiss\\fcharset0 Tahoma;}}\\fs20\n' + 282 '{\\*\\generator txt2rtf 0.0.1;}\n'); 283 } 284 285 emitFooter() { 286 this.emit('data', '}'); 287 } 288} 289 290 291const stdin = process.stdin; 292const stdout = process.stdout; 293const lineSplitter = new LineSplitter(); 294const paragraphParser = new ParagraphParser(); 295const unwrapper = new Unwrapper(); 296const rtfGenerator = new RtfGenerator(); 297 298stdin.setEncoding('utf-8'); 299stdin.resume(); 300 301stdin.pipe(lineSplitter); 302lineSplitter.pipe(paragraphParser); 303paragraphParser.pipe(unwrapper); 304unwrapper.pipe(rtfGenerator); 305rtfGenerator.pipe(stdout); 306