• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1'use strict';
2
3const assert = require('assert');
4const Stream = require('stream');
5
6
7/*
8 * This filter consumes a stream of characters and emits one string per line.
9 */
10class LineSplitter extends Stream {
11  constructor() {
12    super();
13    this.buffer = '';
14    this.writable = true;
15  }
16
17  write(data) {
18    const lines = (this.buffer + data).split(/\r\n|\n\r|\n|\r/);
19    for (let i = 0; i < lines.length - 1; i++) {
20      this.emit('data', lines[i]);
21    }
22    this.buffer = lines[lines.length - 1];
23    return true;
24  }
25
26  end(data) {
27    this.write(data || '');
28    if (this.buffer) {
29      this.emit('data', this.buffer);
30    }
31    this.emit('end');
32  }
33}
34
35
36/*
37 * This filter consumes lines and emits paragraph objects.
38 */
39class ParagraphParser extends Stream {
40  constructor() {
41    super();
42    this.blockIsLicenseBlock = false;
43    this.writable = true;
44    this.resetBlock(false);
45  }
46
47  write(data) {
48    this.parseLine(data + '');
49    return true;
50  }
51
52  end(data) {
53    if (data)
54      this.parseLine(data + '');
55    this.flushParagraph();
56    this.emit('end');
57  }
58
59  resetParagraph() {
60    this.paragraphLineIndent = -1;
61
62    this.paragraph = {
63      li: '',
64      inLicenseBlock: this.blockIsLicenseBlock,
65      lines: []
66    };
67  }
68
69  resetBlock(isLicenseBlock) {
70    this.blockIsLicenseBlock = isLicenseBlock;
71    this.blockHasCStyleComment = false;
72    this.resetParagraph();
73  }
74
75  flushParagraph() {
76    if (this.paragraph.lines.length || this.paragraph.li) {
77      this.emit('data', this.paragraph);
78    }
79    this.resetParagraph();
80  }
81
82  parseLine(line) {
83    // Strip trailing whitespace
84    line = line.trimRight();
85
86    // Detect block separator
87    if (/^\s*(=|"){3,}\s*$/.test(line)) {
88      this.flushParagraph();
89      this.resetBlock(!this.blockIsLicenseBlock);
90      return;
91    }
92
93    // Strip comments around block
94    if (this.blockIsLicenseBlock) {
95      if (!this.blockHasCStyleComment)
96        this.blockHasCStyleComment = /^\s*(\/\*)/.test(line);
97      if (this.blockHasCStyleComment) {
98        const prev = line;
99        line = line.replace(/^(\s*?)(?:\s?\*\/|\/\*\s|\s\*\s?)/, '$1');
100        if (prev === line)
101          line = line.replace(/^\s{2}/, '');
102        if (/\*\//.test(prev))
103          this.blockHasCStyleComment = false;
104      } else {
105        // Strip C++ and perl style comments.
106        line = line.replace(/^(\s*)(?:\/\/\s?|#\s?)/, '$1');
107      }
108    }
109
110    // Detect blank line (paragraph separator)
111    if (!/\S/.test(line)) {
112      this.flushParagraph();
113      return;
114    }
115
116    // Detect separator "lines" within a block. These mark a paragraph break
117    // and are stripped from the output.
118    if (/^\s*[=*-]{5,}\s*$/.test(line)) {
119      this.flushParagraph();
120      return;
121    }
122
123    // Find out indentation level and the start of a lied or numbered list;
124    const result = /^(\s*)(\d+\.|\*|-)?\s*/.exec(line);
125    assert.ok(result);
126    // The number of characters that will be stripped from the beginning of
127    // the line.
128    const lineStripLength = result[0].length;
129    // The indentation size that will be used to detect indentation jumps.
130    // Fudge by 1 space.
131    const lineIndent = Math.floor(lineStripLength / 2) * 2;
132    // The indentation level that will be exported
133    const level = Math.floor(result[1].length / 2);
134    // The list indicator that precedes the actual content, if any.
135    const lineLi = result[2];
136
137    // Flush the paragraph when there is a li or an indentation jump
138    if (lineLi || (lineIndent !== this.paragraphLineIndent &&
139                   this.paragraphLineIndent !== -1)) {
140      this.flushParagraph();
141      this.paragraph.li = lineLi;
142    }
143
144    // Set the paragraph indent that we use to detect indentation jumps. When
145    // we just detected a list indicator, wait
146    // for the next line to arrive before setting this.
147    if (!lineLi && this.paragraphLineIndent !== -1) {
148      this.paragraphLineIndent = lineIndent;
149    }
150
151    // Set the output indent level if it has not been set yet.
152    if (this.paragraph.level === undefined)
153      this.paragraph.level = level;
154
155    // Strip leading whitespace and li.
156    line = line.slice(lineStripLength);
157
158    if (line)
159      this.paragraph.lines.push(line);
160  }
161}
162
163
164/*
165 * This filter consumes paragraph objects and emits modified paragraph objects.
166 * The lines within the paragraph are unwrapped where appropriate. It also
167 * replaces multiple consecutive whitespace characters by a single one.
168 */
169class Unwrapper extends Stream {
170  constructor() {
171    super();
172    this.writable = true;
173  }
174
175  write(paragraph) {
176    const lines = paragraph.lines;
177    const breakAfter = [];
178    let i;
179
180    for (i = 0; i < lines.length - 1; i++) {
181      const line = lines[i];
182
183      // When a line is really short, the line was probably kept separate for a
184      // reason.
185      if (line.length < 50) {
186        // If the first word on the next line really didn't fit after the line,
187        // it probably was just ordinary wrapping after all.
188        const nextFirstWordLength = lines[i + 1].replace(/\s.*$/, '').length;
189        if (line.length + nextFirstWordLength < 60) {
190          breakAfter[i] = true;
191        }
192      }
193    }
194
195    for (i = 0; i < lines.length - 1;) {
196      if (!breakAfter[i]) {
197        lines[i] += ` ${lines.splice(i + 1, 1)[0]}`;
198      } else {
199        i++;
200      }
201    }
202
203    for (i = 0; i < lines.length; i++) {
204      // Replace multiple whitespace characters by a single one, and strip
205      // trailing whitespace.
206      lines[i] = lines[i].replace(/\s+/g, ' ').replace(/\s+$/, '');
207    }
208
209    this.emit('data', paragraph);
210  }
211
212  end(data) {
213    if (data)
214      this.write(data);
215    this.emit('end');
216  }
217}
218
219function rtfEscape(string) {
220  function toHex(number, length) {
221    return (~~number).toString(16).padStart(length, '0');
222  }
223
224  return string
225    .replace(/[\\{}]/g, (m) => `\\${m}`)
226    .replace(/\t/g, () => '\\tab ')
227    // eslint-disable-next-line no-control-regex
228    .replace(/[\x00-\x1f\x7f-\xff]/g, (m) => `\\'${toHex(m.charCodeAt(0), 2)}`)
229    .replace(/\ufeff/g, '')
230    .replace(/[\u0100-\uffff]/g, (m) => `\\u${toHex(m.charCodeAt(0), 4)}?`);
231}
232
233/*
234 * This filter generates an rtf document from a stream of paragraph objects.
235 */
236class RtfGenerator extends Stream {
237  constructor() {
238    super();
239    this.didWriteAnything = false;
240    this.writable = true;
241  }
242
243  write({ li, level, lines, inLicenseBlock: lic }) {
244    if (!this.didWriteAnything) {
245      this.emitHeader();
246      this.didWriteAnything = true;
247    }
248
249    if (li)
250      level++;
251
252    let rtf = '\\pard\\sa150\\sl300\\slmult1';
253    if (level > 0)
254      rtf += `\\li${level * 240}`;
255    if (li)
256      rtf += `\\tx${level * 240}\\fi-240`;
257    if (lic)
258      rtf += '\\ri240';
259    if (!lic)
260      rtf += '\\b';
261    if (li)
262      rtf += ` ${li}\\tab`;
263    rtf += ` ${lines.map(rtfEscape).join('\\line ')}`;
264    if (!lic)
265      rtf += '\\b0';
266    rtf += '\\par\n';
267
268    this.emit('data', rtf);
269  }
270
271  end(data) {
272    if (data)
273      this.write(data);
274    if (this.didWriteAnything)
275      this.emitFooter();
276    this.emit('end');
277  }
278
279  emitHeader() {
280    this.emit('data', '{\\rtf1\\ansi\\ansicpg1252\\uc1\\deff0\\deflang1033' +
281                      '{\\fonttbl{\\f0\\fswiss\\fcharset0 Tahoma;}}\\fs20\n' +
282                      '{\\*\\generator txt2rtf 0.0.1;}\n');
283  }
284
285  emitFooter() {
286    this.emit('data', '}');
287  }
288}
289
290
291const stdin = process.stdin;
292const stdout = process.stdout;
293const lineSplitter = new LineSplitter();
294const paragraphParser = new ParagraphParser();
295const unwrapper = new Unwrapper();
296const rtfGenerator = new RtfGenerator();
297
298stdin.setEncoding('utf-8');
299stdin.resume();
300
301stdin.pipe(lineSplitter);
302lineSplitter.pipe(paragraphParser);
303paragraphParser.pipe(unwrapper);
304unwrapper.pipe(rtfGenerator);
305rtfGenerator.pipe(stdout);
306