1import assert from 'node:assert'; 2import Stream from 'node:stream'; 3import { pipeline } from 'node:stream/promises'; 4import { stdin, stdout } from 'node:process'; 5 6/* 7 * This filter consumes a stream of characters and emits one string per line. 8 */ 9class LineSplitter extends Stream { 10 constructor() { 11 super(); 12 this.buffer = ''; 13 this.writable = true; 14 } 15 16 write(data) { 17 const lines = (this.buffer + data).split(/\r\n|\n\r|\n|\r/); 18 for (let i = 0; i < lines.length - 1; i++) { 19 this.emit('data', lines[i]); 20 } 21 this.buffer = lines[lines.length - 1]; 22 return true; 23 } 24 25 end(data) { 26 this.write(data || ''); 27 if (this.buffer) { 28 this.emit('data', this.buffer); 29 } 30 this.writable = false; 31 this.emit('end'); 32 } 33} 34 35 36/* 37 * This filter consumes lines and emits paragraph objects. 38 */ 39class ParagraphParser extends Stream { 40 constructor() { 41 super(); 42 this.blockIsLicenseBlock = false; 43 this.writable = true; 44 this.resetBlock(false); 45 } 46 47 write(data) { 48 this.parseLine(data + ''); 49 return true; 50 } 51 52 end(data) { 53 if (data) 54 this.parseLine(data + ''); 55 this.flushParagraph(); 56 this.writable = false; 57 this.emit('end'); 58 } 59 60 resetParagraph() { 61 this.paragraphLineIndent = -1; 62 63 this.paragraph = { 64 li: '', 65 inLicenseBlock: this.blockIsLicenseBlock, 66 lines: [], 67 }; 68 } 69 70 resetBlock(isLicenseBlock) { 71 this.blockIsLicenseBlock = isLicenseBlock; 72 this.blockHasCStyleComment = false; 73 this.resetParagraph(); 74 } 75 76 flushParagraph() { 77 if (this.paragraph.lines.length || this.paragraph.li) { 78 this.emit('data', this.paragraph); 79 } 80 this.resetParagraph(); 81 } 82 83 parseLine(line) { 84 // Strip trailing whitespace 85 line = line.trimRight(); 86 87 // Detect block separator 88 if (/^\s*(=|"){3,}\s*$/.test(line)) { 89 this.flushParagraph(); 90 this.resetBlock(!this.blockIsLicenseBlock); 91 return; 92 } 93 94 // Strip comments around block 95 if (this.blockIsLicenseBlock) { 96 if (!this.blockHasCStyleComment) 97 this.blockHasCStyleComment = /^\s*(\/\*)/.test(line); 98 if (this.blockHasCStyleComment) { 99 const prev = line; 100 line = line.replace(/^(\s*?)(?:\s?\*\/|\/\*\s|\s\*\s?)/, '$1'); 101 if (prev === line) 102 line = line.replace(/^\s{2}/, ''); 103 if (/\*\//.test(prev)) 104 this.blockHasCStyleComment = false; 105 } else { 106 // Strip C++ and perl style comments. 107 line = line.replace(/^(\s*)(?:\/\/\s?|#\s?)/, '$1'); 108 } 109 } 110 111 // Detect blank line (paragraph separator) 112 if (!/\S/.test(line)) { 113 this.flushParagraph(); 114 return; 115 } 116 117 // Detect separator "lines" within a block. These mark a paragraph break 118 // and are stripped from the output. 119 if (/^\s*[=*-]{5,}\s*$/.test(line)) { 120 this.flushParagraph(); 121 return; 122 } 123 124 // Find out indentation level and the start of a lied or numbered list; 125 const result = /^(\s*)(\d+\.|\*|-)?\s*/.exec(line); 126 assert.ok(result); 127 // The number of characters that will be stripped from the beginning of 128 // the line. 129 const lineStripLength = result[0].length; 130 // The indentation size that will be used to detect indentation jumps. 131 // Fudge by 1 space. 132 const lineIndent = Math.floor(lineStripLength / 2) * 2; 133 // The indentation level that will be exported 134 const level = Math.floor(result[1].length / 2); 135 // The list indicator that precedes the actual content, if any. 136 const lineLi = result[2]; 137 138 // Flush the paragraph when there is a li or an indentation jump 139 if (lineLi || (lineIndent !== this.paragraphLineIndent && 140 this.paragraphLineIndent !== -1)) { 141 this.flushParagraph(); 142 this.paragraph.li = lineLi; 143 } 144 145 // Set the paragraph indent that we use to detect indentation jumps. When 146 // we just detected a list indicator, wait 147 // for the next line to arrive before setting this. 148 if (!lineLi && this.paragraphLineIndent !== -1) { 149 this.paragraphLineIndent = lineIndent; 150 } 151 152 // Set the output indent level if it has not been set yet. 153 if (this.paragraph.level === undefined) 154 this.paragraph.level = level; 155 156 // Strip leading whitespace and li. 157 line = line.slice(lineStripLength); 158 159 if (line) 160 this.paragraph.lines.push(line); 161 } 162} 163 164 165/* 166 * This filter consumes paragraph objects and emits modified paragraph objects. 167 * The lines within the paragraph are unwrapped where appropriate. It also 168 * replaces multiple consecutive whitespace characters by a single one. 169 */ 170class Unwrapper extends Stream { 171 constructor() { 172 super(); 173 this.writable = true; 174 } 175 176 write(paragraph) { 177 const lines = paragraph.lines; 178 const breakAfter = []; 179 let i; 180 181 for (i = 0; i < lines.length - 1; i++) { 182 const line = lines[i]; 183 184 // When a line is really short, the line was probably kept separate for a 185 // reason. 186 if (line.length < 50) { 187 // If the first word on the next line really didn't fit after the line, 188 // it probably was just ordinary wrapping after all. 189 const nextFirstWordLength = lines[i + 1].replace(/\s.*$/, '').length; 190 if (line.length + nextFirstWordLength < 60) { 191 breakAfter[i] = true; 192 } 193 } 194 } 195 196 for (i = 0; i < lines.length - 1;) { 197 if (!breakAfter[i]) { 198 lines[i] += ` ${lines.splice(i + 1, 1)[0]}`; 199 } else { 200 i++; 201 } 202 } 203 204 for (i = 0; i < lines.length; i++) { 205 // Replace multiple whitespace characters by a single one, and strip 206 // trailing whitespace. 207 lines[i] = lines[i].replace(/\s+/g, ' ').replace(/\s+$/, ''); 208 } 209 210 this.emit('data', paragraph); 211 } 212 213 end(data) { 214 if (data) 215 this.write(data); 216 this.writable = false; 217 this.emit('end'); 218 } 219} 220 221function rtfEscape(string) { 222 function toHex(number, length) { 223 return (~~number).toString(16).padStart(length, '0'); 224 } 225 226 return string 227 .replace(/[\\{}]/g, (m) => `\\${m}`) 228 .replace(/\t/g, () => '\\tab ') 229 // eslint-disable-next-line no-control-regex 230 .replace(/[\x00-\x1f\x7f-\xff]/g, (m) => `\\'${toHex(m.charCodeAt(0), 2)}`) 231 .replace(/\ufeff/g, '') 232 .replace(/[\u0100-\uffff]/g, (m) => `\\u${toHex(m.charCodeAt(0), 4)}?`); 233} 234 235/* 236 * This filter generates an rtf document from a stream of paragraph objects. 237 */ 238class RtfGenerator extends Stream { 239 constructor() { 240 super(); 241 this.didWriteAnything = false; 242 this.writable = true; 243 } 244 245 write({ li, level, lines, inLicenseBlock: lic }) { 246 if (!this.didWriteAnything) { 247 this.emitHeader(); 248 this.didWriteAnything = true; 249 } 250 251 if (li) 252 level++; 253 254 let rtf = '\\pard\\sa150\\sl300\\slmult1'; 255 if (level > 0) 256 rtf += `\\li${level * 240}`; 257 if (li) 258 rtf += `\\tx${level * 240}\\fi-240`; 259 if (lic) 260 rtf += '\\ri240'; 261 if (!lic) 262 rtf += '\\b'; 263 if (li) 264 rtf += ` ${li}\\tab`; 265 rtf += ` ${lines.map(rtfEscape).join('\\line ')}`; 266 if (!lic) 267 rtf += '\\b0'; 268 rtf += '\\par\n'; 269 270 this.emit('data', rtf); 271 } 272 273 end(data) { 274 if (data) 275 this.write(data); 276 if (this.didWriteAnything) 277 this.emitFooter(); 278 this.writable = false; 279 this.emit('end'); 280 } 281 282 emitHeader() { 283 this.emit('data', '{\\rtf1\\ansi\\ansicpg1252\\uc1\\deff0\\deflang1033' + 284 '{\\fonttbl{\\f0\\fswiss\\fcharset0 Tahoma;}}\\fs20\n' + 285 '{\\*\\generator txt2rtf 0.0.1;}\n'); 286 } 287 288 emitFooter() { 289 this.emit('data', '}'); 290 } 291} 292 293stdin.setEncoding('utf-8'); 294stdin.resume(); 295 296await pipeline( 297 stdin, 298 new LineSplitter(), 299 new ParagraphParser(), 300 new Unwrapper(), 301 new RtfGenerator(), 302 stdout, 303); 304