• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1'use strict';
2
3// An implementation of the WHATWG Encoding Standard
4// https://encoding.spec.whatwg.org
5
6const {
7  Map,
8  ObjectCreate,
9  ObjectDefineProperties,
10  ObjectGetOwnPropertyDescriptors,
11  Symbol,
12  SymbolToStringTag,
13  Uint32Array,
14  Uint8Array,
15} = primordials;
16
17const {
18  ERR_ENCODING_INVALID_ENCODED_DATA,
19  ERR_ENCODING_NOT_SUPPORTED,
20  ERR_INVALID_ARG_TYPE,
21  ERR_INVALID_THIS,
22  ERR_NO_ICU
23} = require('internal/errors').codes;
24const kHandle = Symbol('handle');
25const kFlags = Symbol('flags');
26const kEncoding = Symbol('encoding');
27const kDecoder = Symbol('decoder');
28const kEncoder = Symbol('encoder');
29
30const {
31  getConstructorOf,
32  customInspectSymbol: inspect
33} = require('internal/util');
34
35const {
36  isAnyArrayBuffer,
37  isArrayBufferView,
38  isUint8Array
39} = require('internal/util/types');
40
41const { validateString } = require('internal/validators');
42
43const {
44  encodeInto,
45  encodeUtf8String
46} = internalBinding('buffer');
47
48let Buffer;
49function lazyBuffer() {
50  if (Buffer === undefined)
51    Buffer = require('buffer').Buffer;
52  return Buffer;
53}
54
55function validateEncoder(obj) {
56  if (obj == null || obj[kEncoder] !== true)
57    throw new ERR_INVALID_THIS('TextEncoder');
58}
59
60function validateDecoder(obj) {
61  if (obj == null || obj[kDecoder] !== true)
62    throw new ERR_INVALID_THIS('TextDecoder');
63}
64
65function validateArgument(prop, expected, propName, expectedName) {
66  if (typeof prop !== expected)
67    throw new ERR_INVALID_ARG_TYPE(propName, expectedName, prop);
68}
69
70const CONVERTER_FLAGS_FLUSH = 0x1;
71const CONVERTER_FLAGS_FATAL = 0x2;
72const CONVERTER_FLAGS_IGNORE_BOM = 0x4;
73
74const empty = new Uint8Array(0);
75
76const encodings = new Map([
77  ['unicode-1-1-utf-8', 'utf-8'],
78  ['utf8', 'utf-8'],
79  ['utf-8', 'utf-8'],
80  ['866', 'ibm866'],
81  ['cp866', 'ibm866'],
82  ['csibm866', 'ibm866'],
83  ['ibm866', 'ibm866'],
84  ['csisolatin2', 'iso-8859-2'],
85  ['iso-8859-2', 'iso-8859-2'],
86  ['iso-ir-101', 'iso-8859-2'],
87  ['iso8859-2', 'iso-8859-2'],
88  ['iso88592', 'iso-8859-2'],
89  ['iso_8859-2', 'iso-8859-2'],
90  ['iso_8859-2:1987', 'iso-8859-2'],
91  ['l2', 'iso-8859-2'],
92  ['latin2', 'iso-8859-2'],
93  ['csisolatin3', 'iso-8859-3'],
94  ['iso-8859-3', 'iso-8859-3'],
95  ['iso-ir-109', 'iso-8859-3'],
96  ['iso8859-3', 'iso-8859-3'],
97  ['iso88593', 'iso-8859-3'],
98  ['iso_8859-3', 'iso-8859-3'],
99  ['iso_8859-3:1988', 'iso-8859-3'],
100  ['l3', 'iso-8859-3'],
101  ['latin3', 'iso-8859-3'],
102  ['csisolatin4', 'iso-8859-4'],
103  ['iso-8859-4', 'iso-8859-4'],
104  ['iso-ir-110', 'iso-8859-4'],
105  ['iso8859-4', 'iso-8859-4'],
106  ['iso88594', 'iso-8859-4'],
107  ['iso_8859-4', 'iso-8859-4'],
108  ['iso_8859-4:1988', 'iso-8859-4'],
109  ['l4', 'iso-8859-4'],
110  ['latin4', 'iso-8859-4'],
111  ['csisolatincyrillic', 'iso-8859-5'],
112  ['cyrillic', 'iso-8859-5'],
113  ['iso-8859-5', 'iso-8859-5'],
114  ['iso-ir-144', 'iso-8859-5'],
115  ['iso8859-5', 'iso-8859-5'],
116  ['iso88595', 'iso-8859-5'],
117  ['iso_8859-5', 'iso-8859-5'],
118  ['iso_8859-5:1988', 'iso-8859-5'],
119  ['arabic', 'iso-8859-6'],
120  ['asmo-708', 'iso-8859-6'],
121  ['csiso88596e', 'iso-8859-6'],
122  ['csiso88596i', 'iso-8859-6'],
123  ['csisolatinarabic', 'iso-8859-6'],
124  ['ecma-114', 'iso-8859-6'],
125  ['iso-8859-6', 'iso-8859-6'],
126  ['iso-8859-6-e', 'iso-8859-6'],
127  ['iso-8859-6-i', 'iso-8859-6'],
128  ['iso-ir-127', 'iso-8859-6'],
129  ['iso8859-6', 'iso-8859-6'],
130  ['iso88596', 'iso-8859-6'],
131  ['iso_8859-6', 'iso-8859-6'],
132  ['iso_8859-6:1987', 'iso-8859-6'],
133  ['csisolatingreek', 'iso-8859-7'],
134  ['ecma-118', 'iso-8859-7'],
135  ['elot_928', 'iso-8859-7'],
136  ['greek', 'iso-8859-7'],
137  ['greek8', 'iso-8859-7'],
138  ['iso-8859-7', 'iso-8859-7'],
139  ['iso-ir-126', 'iso-8859-7'],
140  ['iso8859-7', 'iso-8859-7'],
141  ['iso88597', 'iso-8859-7'],
142  ['iso_8859-7', 'iso-8859-7'],
143  ['iso_8859-7:1987', 'iso-8859-7'],
144  ['sun_eu_greek', 'iso-8859-7'],
145  ['csiso88598e', 'iso-8859-8'],
146  ['csisolatinhebrew', 'iso-8859-8'],
147  ['hebrew', 'iso-8859-8'],
148  ['iso-8859-8', 'iso-8859-8'],
149  ['iso-8859-8-e', 'iso-8859-8'],
150  ['iso-ir-138', 'iso-8859-8'],
151  ['iso8859-8', 'iso-8859-8'],
152  ['iso88598', 'iso-8859-8'],
153  ['iso_8859-8', 'iso-8859-8'],
154  ['iso_8859-8:1988', 'iso-8859-8'],
155  ['visual', 'iso-8859-8'],
156  ['csiso88598i', 'iso-8859-8-i'],
157  ['iso-8859-8-i', 'iso-8859-8-i'],
158  ['logical', 'iso-8859-8-i'],
159  ['csisolatin6', 'iso-8859-10'],
160  ['iso-8859-10', 'iso-8859-10'],
161  ['iso-ir-157', 'iso-8859-10'],
162  ['iso8859-10', 'iso-8859-10'],
163  ['iso885910', 'iso-8859-10'],
164  ['l6', 'iso-8859-10'],
165  ['latin6', 'iso-8859-10'],
166  ['iso-8859-13', 'iso-8859-13'],
167  ['iso8859-13', 'iso-8859-13'],
168  ['iso885913', 'iso-8859-13'],
169  ['iso-8859-14', 'iso-8859-14'],
170  ['iso8859-14', 'iso-8859-14'],
171  ['iso885914', 'iso-8859-14'],
172  ['csisolatin9', 'iso-8859-15'],
173  ['iso-8859-15', 'iso-8859-15'],
174  ['iso8859-15', 'iso-8859-15'],
175  ['iso885915', 'iso-8859-15'],
176  ['iso_8859-15', 'iso-8859-15'],
177  ['l9', 'iso-8859-15'],
178  ['cskoi8r', 'koi8-r'],
179  ['koi', 'koi8-r'],
180  ['koi8', 'koi8-r'],
181  ['koi8-r', 'koi8-r'],
182  ['koi8_r', 'koi8-r'],
183  ['koi8-ru', 'koi8-u'],
184  ['koi8-u', 'koi8-u'],
185  ['csmacintosh', 'macintosh'],
186  ['mac', 'macintosh'],
187  ['macintosh', 'macintosh'],
188  ['x-mac-roman', 'macintosh'],
189  ['dos-874', 'windows-874'],
190  ['iso-8859-11', 'windows-874'],
191  ['iso8859-11', 'windows-874'],
192  ['iso885911', 'windows-874'],
193  ['tis-620', 'windows-874'],
194  ['windows-874', 'windows-874'],
195  ['cp1250', 'windows-1250'],
196  ['windows-1250', 'windows-1250'],
197  ['x-cp1250', 'windows-1250'],
198  ['cp1251', 'windows-1251'],
199  ['windows-1251', 'windows-1251'],
200  ['x-cp1251', 'windows-1251'],
201  ['ansi_x3.4-1968', 'windows-1252'],
202  ['ascii', 'windows-1252'],
203  ['cp1252', 'windows-1252'],
204  ['cp819', 'windows-1252'],
205  ['csisolatin1', 'windows-1252'],
206  ['ibm819', 'windows-1252'],
207  ['iso-8859-1', 'windows-1252'],
208  ['iso-ir-100', 'windows-1252'],
209  ['iso8859-1', 'windows-1252'],
210  ['iso88591', 'windows-1252'],
211  ['iso_8859-1', 'windows-1252'],
212  ['iso_8859-1:1987', 'windows-1252'],
213  ['l1', 'windows-1252'],
214  ['latin1', 'windows-1252'],
215  ['us-ascii', 'windows-1252'],
216  ['windows-1252', 'windows-1252'],
217  ['x-cp1252', 'windows-1252'],
218  ['cp1253', 'windows-1253'],
219  ['windows-1253', 'windows-1253'],
220  ['x-cp1253', 'windows-1253'],
221  ['cp1254', 'windows-1254'],
222  ['csisolatin5', 'windows-1254'],
223  ['iso-8859-9', 'windows-1254'],
224  ['iso-ir-148', 'windows-1254'],
225  ['iso8859-9', 'windows-1254'],
226  ['iso88599', 'windows-1254'],
227  ['iso_8859-9', 'windows-1254'],
228  ['iso_8859-9:1989', 'windows-1254'],
229  ['l5', 'windows-1254'],
230  ['latin5', 'windows-1254'],
231  ['windows-1254', 'windows-1254'],
232  ['x-cp1254', 'windows-1254'],
233  ['cp1255', 'windows-1255'],
234  ['windows-1255', 'windows-1255'],
235  ['x-cp1255', 'windows-1255'],
236  ['cp1256', 'windows-1256'],
237  ['windows-1256', 'windows-1256'],
238  ['x-cp1256', 'windows-1256'],
239  ['cp1257', 'windows-1257'],
240  ['windows-1257', 'windows-1257'],
241  ['x-cp1257', 'windows-1257'],
242  ['cp1258', 'windows-1258'],
243  ['windows-1258', 'windows-1258'],
244  ['x-cp1258', 'windows-1258'],
245  ['x-mac-cyrillic', 'x-mac-cyrillic'],
246  ['x-mac-ukrainian', 'x-mac-cyrillic'],
247  ['chinese', 'gbk'],
248  ['csgb2312', 'gbk'],
249  ['csiso58gb231280', 'gbk'],
250  ['gb2312', 'gbk'],
251  ['gb_2312', 'gbk'],
252  ['gb_2312-80', 'gbk'],
253  ['gbk', 'gbk'],
254  ['iso-ir-58', 'gbk'],
255  ['x-gbk', 'gbk'],
256  ['gb18030', 'gb18030'],
257  ['big5', 'big5'],
258  ['big5-hkscs', 'big5'],
259  ['cn-big5', 'big5'],
260  ['csbig5', 'big5'],
261  ['x-x-big5', 'big5'],
262  ['cseucpkdfmtjapanese', 'euc-jp'],
263  ['euc-jp', 'euc-jp'],
264  ['x-euc-jp', 'euc-jp'],
265  ['csiso2022jp', 'iso-2022-jp'],
266  ['iso-2022-jp', 'iso-2022-jp'],
267  ['csshiftjis', 'shift_jis'],
268  ['ms932', 'shift_jis'],
269  ['ms_kanji', 'shift_jis'],
270  ['shift-jis', 'shift_jis'],
271  ['shift_jis', 'shift_jis'],
272  ['sjis', 'shift_jis'],
273  ['windows-31j', 'shift_jis'],
274  ['x-sjis', 'shift_jis'],
275  ['cseuckr', 'euc-kr'],
276  ['csksc56011987', 'euc-kr'],
277  ['euc-kr', 'euc-kr'],
278  ['iso-ir-149', 'euc-kr'],
279  ['korean', 'euc-kr'],
280  ['ks_c_5601-1987', 'euc-kr'],
281  ['ks_c_5601-1989', 'euc-kr'],
282  ['ksc5601', 'euc-kr'],
283  ['ksc_5601', 'euc-kr'],
284  ['windows-949', 'euc-kr'],
285  ['utf-16be', 'utf-16be'],
286  ['utf-16le', 'utf-16le'],
287  ['utf-16', 'utf-16le']
288]);
289
290// Unfortunately, String.prototype.trim also removes non-ascii whitespace,
291// so we have to do this manually
292function trimAsciiWhitespace(label) {
293  let s = 0;
294  let e = label.length;
295  while (s < e && (
296    label[s] === '\u0009' ||
297    label[s] === '\u000a' ||
298    label[s] === '\u000c' ||
299    label[s] === '\u000d' ||
300    label[s] === '\u0020')) {
301    s++;
302  }
303  while (e > s && (
304    label[e - 1] === '\u0009' ||
305    label[e - 1] === '\u000a' ||
306    label[e - 1] === '\u000c' ||
307    label[e - 1] === '\u000d' ||
308    label[e - 1] === '\u0020')) {
309    e--;
310  }
311  return label.slice(s, e);
312}
313
314function getEncodingFromLabel(label) {
315  const enc = encodings.get(label);
316  if (enc !== undefined) return enc;
317  return encodings.get(trimAsciiWhitespace(label.toLowerCase()));
318}
319
320const encodeIntoResults = new Uint32Array(2);
321
322class TextEncoder {
323  constructor() {
324    this[kEncoder] = true;
325  }
326
327  get encoding() {
328    validateEncoder(this);
329    return 'utf-8';
330  }
331
332  encode(input = '') {
333    validateEncoder(this);
334    return encodeUtf8String(`${input}`);
335  }
336
337  encodeInto(src, dest) {
338    validateEncoder(this);
339    validateString(src, 'src');
340    if (!dest || !isUint8Array(dest))
341      throw new ERR_INVALID_ARG_TYPE('dest', 'Uint8Array', dest);
342    encodeInto(src, dest, encodeIntoResults);
343    return { read: encodeIntoResults[0], written: encodeIntoResults[1] };
344  }
345
346  [inspect](depth, opts) {
347    validateEncoder(this);
348    if (typeof depth === 'number' && depth < 0)
349      return this;
350    const ctor = getConstructorOf(this);
351    const obj = ObjectCreate({
352      constructor: ctor === null ? TextEncoder : ctor
353    });
354    obj.encoding = this.encoding;
355    // Lazy to avoid circular dependency
356    return require('internal/util/inspect').inspect(obj, opts);
357  }
358}
359
360ObjectDefineProperties(
361  TextEncoder.prototype, {
362    'encode': { enumerable: true },
363    'encodeInto': { enumerable: true },
364    'encoding': { enumerable: true },
365    [SymbolToStringTag]: {
366      configurable: true,
367      value: 'TextEncoder'
368    } });
369
370const TextDecoder =
371  internalBinding('config').hasIntl ?
372    makeTextDecoderICU() :
373    makeTextDecoderJS();
374
375function makeTextDecoderICU() {
376  const {
377    decode: _decode,
378    getConverter,
379  } = internalBinding('icu');
380
381  class TextDecoder {
382    constructor(encoding = 'utf-8', options = {}) {
383      encoding = `${encoding}`;
384      validateArgument(options, 'object', 'options', 'Object');
385
386      const enc = getEncodingFromLabel(encoding);
387      if (enc === undefined)
388        throw new ERR_ENCODING_NOT_SUPPORTED(encoding);
389
390      let flags = 0;
391      if (options !== null) {
392        flags |= options.fatal ? CONVERTER_FLAGS_FATAL : 0;
393        flags |= options.ignoreBOM ? CONVERTER_FLAGS_IGNORE_BOM : 0;
394      }
395
396      const handle = getConverter(enc, flags);
397      if (handle === undefined)
398        throw new ERR_ENCODING_NOT_SUPPORTED(encoding);
399
400      this[kDecoder] = true;
401      this[kHandle] = handle;
402      this[kFlags] = flags;
403      this[kEncoding] = enc;
404    }
405
406
407    decode(input = empty, options = {}) {
408      validateDecoder(this);
409      if (isAnyArrayBuffer(input)) {
410        input = lazyBuffer().from(input);
411      } else if (!isArrayBufferView(input)) {
412        throw new ERR_INVALID_ARG_TYPE('input',
413                                       ['ArrayBuffer', 'ArrayBufferView'],
414                                       input);
415      }
416      validateArgument(options, 'object', 'options', 'Object');
417
418      let flags = 0;
419      if (options !== null)
420        flags |= options.stream ? 0 : CONVERTER_FLAGS_FLUSH;
421
422      const ret = _decode(this[kHandle], input, flags);
423      if (typeof ret === 'number') {
424        throw new ERR_ENCODING_INVALID_ENCODED_DATA(this.encoding, ret);
425      }
426      return ret.toString('ucs2');
427    }
428  }
429
430  return TextDecoder;
431}
432
433function makeTextDecoderJS() {
434  let StringDecoder;
435  function lazyStringDecoder() {
436    if (StringDecoder === undefined)
437      ({ StringDecoder } = require('string_decoder'));
438    return StringDecoder;
439  }
440
441  const kBOMSeen = Symbol('BOM seen');
442
443  function hasConverter(encoding) {
444    return encoding === 'utf-8' || encoding === 'utf-16le';
445  }
446
447  class TextDecoder {
448    constructor(encoding = 'utf-8', options = {}) {
449      encoding = `${encoding}`;
450      validateArgument(options, 'object', 'options', 'Object');
451
452      const enc = getEncodingFromLabel(encoding);
453      if (enc === undefined || !hasConverter(enc))
454        throw new ERR_ENCODING_NOT_SUPPORTED(encoding);
455
456      let flags = 0;
457      if (options !== null) {
458        if (options.fatal) {
459          throw new ERR_NO_ICU('"fatal" option');
460        }
461        flags |= options.ignoreBOM ? CONVERTER_FLAGS_IGNORE_BOM : 0;
462      }
463
464      this[kDecoder] = true;
465      // StringDecoder will normalize WHATWG encoding to Node.js encoding.
466      this[kHandle] = new (lazyStringDecoder())(enc);
467      this[kFlags] = flags;
468      this[kEncoding] = enc;
469      this[kBOMSeen] = false;
470    }
471
472    decode(input = empty, options = {}) {
473      validateDecoder(this);
474      if (isAnyArrayBuffer(input)) {
475        input = lazyBuffer().from(input);
476      } else if (isArrayBufferView(input)) {
477        input = lazyBuffer().from(input.buffer, input.byteOffset,
478                                  input.byteLength);
479      } else {
480        throw new ERR_INVALID_ARG_TYPE('input',
481                                       ['ArrayBuffer', 'ArrayBufferView'],
482                                       input);
483      }
484      validateArgument(options, 'object', 'options', 'Object');
485
486      if (this[kFlags] & CONVERTER_FLAGS_FLUSH) {
487        this[kBOMSeen] = false;
488      }
489
490      if (options !== null && options.stream) {
491        this[kFlags] &= ~CONVERTER_FLAGS_FLUSH;
492      } else {
493        this[kFlags] |= CONVERTER_FLAGS_FLUSH;
494      }
495
496      let result = this[kFlags] & CONVERTER_FLAGS_FLUSH ?
497        this[kHandle].end(input) :
498        this[kHandle].write(input);
499
500      if (result.length > 0 &&
501          !this[kBOMSeen] &&
502          !(this[kFlags] & CONVERTER_FLAGS_IGNORE_BOM)) {
503        // If the very first result in the stream is a BOM, and we are not
504        // explicitly told to ignore it, then we discard it.
505        if (result[0] === '\ufeff') {
506          result = result.slice(1);
507        }
508        this[kBOMSeen] = true;
509      }
510
511      return result;
512    }
513  }
514
515  return TextDecoder;
516}
517
518// Mix in some shared properties.
519{
520  ObjectDefineProperties(
521    TextDecoder.prototype,
522    ObjectGetOwnPropertyDescriptors({
523      get encoding() {
524        validateDecoder(this);
525        return this[kEncoding];
526      },
527
528      get fatal() {
529        validateDecoder(this);
530        return (this[kFlags] & CONVERTER_FLAGS_FATAL) === CONVERTER_FLAGS_FATAL;
531      },
532
533      get ignoreBOM() {
534        validateDecoder(this);
535        return (this[kFlags] & CONVERTER_FLAGS_IGNORE_BOM) ===
536               CONVERTER_FLAGS_IGNORE_BOM;
537      },
538
539      [inspect](depth, opts) {
540        validateDecoder(this);
541        if (typeof depth === 'number' && depth < 0)
542          return this;
543        const ctor = getConstructorOf(this);
544        const obj = ObjectCreate({
545          constructor: ctor === null ? TextDecoder : ctor
546        });
547        obj.encoding = this.encoding;
548        obj.fatal = this.fatal;
549        obj.ignoreBOM = this.ignoreBOM;
550        if (opts.showHidden) {
551          obj[kFlags] = this[kFlags];
552          obj[kHandle] = this[kHandle];
553        }
554        // Lazy to avoid circular dependency
555        return require('internal/util/inspect').inspect(obj, opts);
556      }
557    }));
558  ObjectDefineProperties(TextDecoder.prototype, {
559    decode: { enumerable: true },
560    [inspect]: { enumerable: false },
561    [SymbolToStringTag]: {
562      configurable: true,
563      value: 'TextDecoder'
564    }
565  });
566}
567
568module.exports = {
569  getEncodingFromLabel,
570  TextDecoder,
571  TextEncoder
572};
573