• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1import assert from 'node:assert';
2import Stream from 'node:stream';
3import { pipeline } from 'node:stream/promises';
4import { stdin, stdout } from 'node:process';
5
6/*
7 * This filter consumes a stream of characters and emits one string per line.
8 */
9class LineSplitter extends Stream {
10  constructor() {
11    super();
12    this.buffer = '';
13    this.writable = true;
14  }
15
16  write(data) {
17    const lines = (this.buffer + data).split(/\r\n|\n\r|\n|\r/);
18    for (let i = 0; i < lines.length - 1; i++) {
19      this.emit('data', lines[i]);
20    }
21    this.buffer = lines[lines.length - 1];
22    return true;
23  }
24
25  end(data) {
26    this.write(data || '');
27    if (this.buffer) {
28      this.emit('data', this.buffer);
29    }
30    this.writable = false;
31    this.emit('end');
32  }
33}
34
35
36/*
37 * This filter consumes lines and emits paragraph objects.
38 */
39class ParagraphParser extends Stream {
40  constructor() {
41    super();
42    this.blockIsLicenseBlock = false;
43    this.writable = true;
44    this.resetBlock(false);
45  }
46
47  write(data) {
48    this.parseLine(data + '');
49    return true;
50  }
51
52  end(data) {
53    if (data)
54      this.parseLine(data + '');
55    this.flushParagraph();
56    this.writable = false;
57    this.emit('end');
58  }
59
60  resetParagraph() {
61    this.paragraphLineIndent = -1;
62
63    this.paragraph = {
64      li: '',
65      inLicenseBlock: this.blockIsLicenseBlock,
66      lines: [],
67    };
68  }
69
70  resetBlock(isLicenseBlock) {
71    this.blockIsLicenseBlock = isLicenseBlock;
72    this.blockHasCStyleComment = false;
73    this.resetParagraph();
74  }
75
76  flushParagraph() {
77    if (this.paragraph.lines.length || this.paragraph.li) {
78      this.emit('data', this.paragraph);
79    }
80    this.resetParagraph();
81  }
82
83  parseLine(line) {
84    // Strip trailing whitespace
85    line = line.trimRight();
86
87    // Detect block separator
88    if (/^\s*(=|"){3,}\s*$/.test(line)) {
89      this.flushParagraph();
90      this.resetBlock(!this.blockIsLicenseBlock);
91      return;
92    }
93
94    // Strip comments around block
95    if (this.blockIsLicenseBlock) {
96      if (!this.blockHasCStyleComment)
97        this.blockHasCStyleComment = /^\s*(\/\*)/.test(line);
98      if (this.blockHasCStyleComment) {
99        const prev = line;
100        line = line.replace(/^(\s*?)(?:\s?\*\/|\/\*\s|\s\*\s?)/, '$1');
101        if (prev === line)
102          line = line.replace(/^\s{2}/, '');
103        if (/\*\//.test(prev))
104          this.blockHasCStyleComment = false;
105      } else {
106        // Strip C++ and perl style comments.
107        line = line.replace(/^(\s*)(?:\/\/\s?|#\s?)/, '$1');
108      }
109    }
110
111    // Detect blank line (paragraph separator)
112    if (!/\S/.test(line)) {
113      this.flushParagraph();
114      return;
115    }
116
117    // Detect separator "lines" within a block. These mark a paragraph break
118    // and are stripped from the output.
119    if (/^\s*[=*-]{5,}\s*$/.test(line)) {
120      this.flushParagraph();
121      return;
122    }
123
124    // Find out indentation level and the start of a lied or numbered list;
125    const result = /^(\s*)(\d+\.|\*|-)?\s*/.exec(line);
126    assert.ok(result);
127    // The number of characters that will be stripped from the beginning of
128    // the line.
129    const lineStripLength = result[0].length;
130    // The indentation size that will be used to detect indentation jumps.
131    // Fudge by 1 space.
132    const lineIndent = Math.floor(lineStripLength / 2) * 2;
133    // The indentation level that will be exported
134    const level = Math.floor(result[1].length / 2);
135    // The list indicator that precedes the actual content, if any.
136    const lineLi = result[2];
137
138    // Flush the paragraph when there is a li or an indentation jump
139    if (lineLi || (lineIndent !== this.paragraphLineIndent &&
140                   this.paragraphLineIndent !== -1)) {
141      this.flushParagraph();
142      this.paragraph.li = lineLi;
143    }
144
145    // Set the paragraph indent that we use to detect indentation jumps. When
146    // we just detected a list indicator, wait
147    // for the next line to arrive before setting this.
148    if (!lineLi && this.paragraphLineIndent !== -1) {
149      this.paragraphLineIndent = lineIndent;
150    }
151
152    // Set the output indent level if it has not been set yet.
153    if (this.paragraph.level === undefined)
154      this.paragraph.level = level;
155
156    // Strip leading whitespace and li.
157    line = line.slice(lineStripLength);
158
159    if (line)
160      this.paragraph.lines.push(line);
161  }
162}
163
164
165/*
166 * This filter consumes paragraph objects and emits modified paragraph objects.
167 * The lines within the paragraph are unwrapped where appropriate. It also
168 * replaces multiple consecutive whitespace characters by a single one.
169 */
170class Unwrapper extends Stream {
171  constructor() {
172    super();
173    this.writable = true;
174  }
175
176  write(paragraph) {
177    const lines = paragraph.lines;
178    const breakAfter = [];
179    let i;
180
181    for (i = 0; i < lines.length - 1; i++) {
182      const line = lines[i];
183
184      // When a line is really short, the line was probably kept separate for a
185      // reason.
186      if (line.length < 50) {
187        // If the first word on the next line really didn't fit after the line,
188        // it probably was just ordinary wrapping after all.
189        const nextFirstWordLength = lines[i + 1].replace(/\s.*$/, '').length;
190        if (line.length + nextFirstWordLength < 60) {
191          breakAfter[i] = true;
192        }
193      }
194    }
195
196    for (i = 0; i < lines.length - 1;) {
197      if (!breakAfter[i]) {
198        lines[i] += ` ${lines.splice(i + 1, 1)[0]}`;
199      } else {
200        i++;
201      }
202    }
203
204    for (i = 0; i < lines.length; i++) {
205      // Replace multiple whitespace characters by a single one, and strip
206      // trailing whitespace.
207      lines[i] = lines[i].replace(/\s+/g, ' ').replace(/\s+$/, '');
208    }
209
210    this.emit('data', paragraph);
211  }
212
213  end(data) {
214    if (data)
215      this.write(data);
216    this.writable = false;
217    this.emit('end');
218  }
219}
220
221function rtfEscape(string) {
222  function toHex(number, length) {
223    return (~~number).toString(16).padStart(length, '0');
224  }
225
226  return string
227    .replace(/[\\{}]/g, (m) => `\\${m}`)
228    .replace(/\t/g, () => '\\tab ')
229    // eslint-disable-next-line no-control-regex
230    .replace(/[\x00-\x1f\x7f-\xff]/g, (m) => `\\'${toHex(m.charCodeAt(0), 2)}`)
231    .replace(/\ufeff/g, '')
232    .replace(/[\u0100-\uffff]/g, (m) => `\\u${toHex(m.charCodeAt(0), 4)}?`);
233}
234
235/*
236 * This filter generates an rtf document from a stream of paragraph objects.
237 */
238class RtfGenerator extends Stream {
239  constructor() {
240    super();
241    this.didWriteAnything = false;
242    this.writable = true;
243  }
244
245  write({ li, level, lines, inLicenseBlock: lic }) {
246    if (!this.didWriteAnything) {
247      this.emitHeader();
248      this.didWriteAnything = true;
249    }
250
251    if (li)
252      level++;
253
254    let rtf = '\\pard\\sa150\\sl300\\slmult1';
255    if (level > 0)
256      rtf += `\\li${level * 240}`;
257    if (li)
258      rtf += `\\tx${level * 240}\\fi-240`;
259    if (lic)
260      rtf += '\\ri240';
261    if (!lic)
262      rtf += '\\b';
263    if (li)
264      rtf += ` ${li}\\tab`;
265    rtf += ` ${lines.map(rtfEscape).join('\\line ')}`;
266    if (!lic)
267      rtf += '\\b0';
268    rtf += '\\par\n';
269
270    this.emit('data', rtf);
271  }
272
273  end(data) {
274    if (data)
275      this.write(data);
276    if (this.didWriteAnything)
277      this.emitFooter();
278    this.writable = false;
279    this.emit('end');
280  }
281
282  emitHeader() {
283    this.emit('data', '{\\rtf1\\ansi\\ansicpg1252\\uc1\\deff0\\deflang1033' +
284                      '{\\fonttbl{\\f0\\fswiss\\fcharset0 Tahoma;}}\\fs20\n' +
285                      '{\\*\\generator txt2rtf 0.0.1;}\n');
286  }
287
288  emitFooter() {
289    this.emit('data', '}');
290  }
291}
292
293stdin.setEncoding('utf-8');
294stdin.resume();
295
296await pipeline(
297  stdin,
298  new LineSplitter(),
299  new ParagraphParser(),
300  new Unwrapper(),
301  new RtfGenerator(),
302  stdout,
303);
304