• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1'use strict';
2
3const { getOptionValue } = require('internal/options');
4if (getOptionValue('--pending-deprecation')){
5	process.emitWarning(
6		'The `punycode` module is deprecated. Please use a userland ' +
7		'alternative instead.',
8		'DeprecationWarning',
9		'DEP0040',
10	);
11}
12
13/** Highest positive signed 32-bit float value */
14const maxInt = 2147483647; // aka. 0x7FFFFFFF or 2^31-1
15
16/** Bootstring parameters */
17const base = 36;
18const tMin = 1;
19const tMax = 26;
20const skew = 38;
21const damp = 700;
22const initialBias = 72;
23const initialN = 128; // 0x80
24const delimiter = '-'; // '\x2D'
25
26/** Regular expressions */
27const regexPunycode = /^xn--/;
28const regexNonASCII = /[^\0-\x7F]/; // Note: U+007F DEL is excluded too.
29const regexSeparators = /[\x2E\u3002\uFF0E\uFF61]/g; // RFC 3490 separators
30
31/** Error messages */
32const errors = {
33	'overflow': 'Overflow: input needs wider integers to process',
34	'not-basic': 'Illegal input >= 0x80 (not a basic code point)',
35	'invalid-input': 'Invalid input'
36};
37
38/** Convenience shortcuts */
39const baseMinusTMin = base - tMin;
40const floor = Math.floor;
41const stringFromCharCode = String.fromCharCode;
42
43/*--------------------------------------------------------------------------*/
44
45/**
46 * A generic error utility function.
47 * @private
48 * @param {String} type The error type.
49 * @returns {Error} Throws a `RangeError` with the applicable error message.
50 */
51function error(type) {
52	throw new RangeError(errors[type]);
53}
54
55/**
56 * A generic `Array#map` utility function.
57 * @private
58 * @param {Array} array The array to iterate over.
59 * @param {Function} callback The function that gets called for every array
60 * item.
61 * @returns {Array} A new array of values returned by the callback function.
62 */
63function map(array, callback) {
64	const result = [];
65	let length = array.length;
66	while (length--) {
67		result[length] = callback(array[length]);
68	}
69	return result;
70}
71
72/**
73 * A simple `Array#map`-like wrapper to work with domain name strings or email
74 * addresses.
75 * @private
76 * @param {String} domain The domain name or email address.
77 * @param {Function} callback The function that gets called for every
78 * character.
79 * @returns {String} A new string of characters returned by the callback
80 * function.
81 */
82function mapDomain(domain, callback) {
83	const parts = domain.split('@');
84	let result = '';
85	if (parts.length > 1) {
86		// In email addresses, only the domain name should be punycoded. Leave
87		// the local part (i.e. everything up to `@`) intact.
88		result = parts[0] + '@';
89		domain = parts[1];
90	}
91	// Avoid `split(regex)` for IE8 compatibility. See #17.
92	domain = domain.replace(regexSeparators, '\x2E');
93	const labels = domain.split('.');
94	const encoded = map(labels, callback).join('.');
95	return result + encoded;
96}
97
98/**
99 * Creates an array containing the numeric code points of each Unicode
100 * character in the string. While JavaScript uses UCS-2 internally,
101 * this function will convert a pair of surrogate halves (each of which
102 * UCS-2 exposes as separate characters) into a single code point,
103 * matching UTF-16.
104 * @see `punycode.ucs2.encode`
105 * @see <https://mathiasbynens.be/notes/javascript-encoding>
106 * @memberOf punycode.ucs2
107 * @name decode
108 * @param {String} string The Unicode input string (UCS-2).
109 * @returns {Array} The new array of code points.
110 */
111function ucs2decode(string) {
112	const output = [];
113	let counter = 0;
114	const length = string.length;
115	while (counter < length) {
116		const value = string.charCodeAt(counter++);
117		if (value >= 0xD800 && value <= 0xDBFF && counter < length) {
118			// It's a high surrogate, and there is a next character.
119			const extra = string.charCodeAt(counter++);
120			if ((extra & 0xFC00) == 0xDC00) { // Low surrogate.
121				output.push(((value & 0x3FF) << 10) + (extra & 0x3FF) + 0x10000);
122			} else {
123				// It's an unmatched surrogate; only append this code unit, in case the
124				// next code unit is the high surrogate of a surrogate pair.
125				output.push(value);
126				counter--;
127			}
128		} else {
129			output.push(value);
130		}
131	}
132	return output;
133}
134
135/**
136 * Creates a string based on an array of numeric code points.
137 * @see `punycode.ucs2.decode`
138 * @memberOf punycode.ucs2
139 * @name encode
140 * @param {Array} codePoints The array of numeric code points.
141 * @returns {String} The new Unicode string (UCS-2).
142 */
143const ucs2encode = codePoints => String.fromCodePoint(...codePoints);
144
145/**
146 * Converts a basic code point into a digit/integer.
147 * @see `digitToBasic()`
148 * @private
149 * @param {Number} codePoint The basic numeric code point value.
150 * @returns {Number} The numeric value of a basic code point (for use in
151 * representing integers) in the range `0` to `base - 1`, or `base` if
152 * the code point does not represent a value.
153 */
154const basicToDigit = function(codePoint) {
155	if (codePoint >= 0x30 && codePoint < 0x3A) {
156		return 26 + (codePoint - 0x30);
157	}
158	if (codePoint >= 0x41 && codePoint < 0x5B) {
159		return codePoint - 0x41;
160	}
161	if (codePoint >= 0x61 && codePoint < 0x7B) {
162		return codePoint - 0x61;
163	}
164	return base;
165};
166
167/**
168 * Converts a digit/integer into a basic code point.
169 * @see `basicToDigit()`
170 * @private
171 * @param {Number} digit The numeric value of a basic code point.
172 * @returns {Number} The basic code point whose value (when used for
173 * representing integers) is `digit`, which needs to be in the range
174 * `0` to `base - 1`. If `flag` is non-zero, the uppercase form is
175 * used; else, the lowercase form is used. The behavior is undefined
176 * if `flag` is non-zero and `digit` has no uppercase form.
177 */
178const digitToBasic = function(digit, flag) {
179	//  0..25 map to ASCII a..z or A..Z
180	// 26..35 map to ASCII 0..9
181	return digit + 22 + 75 * (digit < 26) - ((flag != 0) << 5);
182};
183
184/**
185 * Bias adaptation function as per section 3.4 of RFC 3492.
186 * https://tools.ietf.org/html/rfc3492#section-3.4
187 * @private
188 */
189const adapt = function(delta, numPoints, firstTime) {
190	let k = 0;
191	delta = firstTime ? floor(delta / damp) : delta >> 1;
192	delta += floor(delta / numPoints);
193	for (/* no initialization */; delta > baseMinusTMin * tMax >> 1; k += base) {
194		delta = floor(delta / baseMinusTMin);
195	}
196	return floor(k + (baseMinusTMin + 1) * delta / (delta + skew));
197};
198
199/**
200 * Converts a Punycode string of ASCII-only symbols to a string of Unicode
201 * symbols.
202 * @memberOf punycode
203 * @param {String} input The Punycode string of ASCII-only symbols.
204 * @returns {String} The resulting string of Unicode symbols.
205 */
206const decode = function(input) {
207	// Don't use UCS-2.
208	const output = [];
209	const inputLength = input.length;
210	let i = 0;
211	let n = initialN;
212	let bias = initialBias;
213
214	// Handle the basic code points: let `basic` be the number of input code
215	// points before the last delimiter, or `0` if there is none, then copy
216	// the first basic code points to the output.
217
218	let basic = input.lastIndexOf(delimiter);
219	if (basic < 0) {
220		basic = 0;
221	}
222
223	for (let j = 0; j < basic; ++j) {
224		// if it's not a basic code point
225		if (input.charCodeAt(j) >= 0x80) {
226			error('not-basic');
227		}
228		output.push(input.charCodeAt(j));
229	}
230
231	// Main decoding loop: start just after the last delimiter if any basic code
232	// points were copied; start at the beginning otherwise.
233
234	for (let index = basic > 0 ? basic + 1 : 0; index < inputLength; /* no final expression */) {
235
236		// `index` is the index of the next character to be consumed.
237		// Decode a generalized variable-length integer into `delta`,
238		// which gets added to `i`. The overflow checking is easier
239		// if we increase `i` as we go, then subtract off its starting
240		// value at the end to obtain `delta`.
241		const oldi = i;
242		for (let w = 1, k = base; /* no condition */; k += base) {
243
244			if (index >= inputLength) {
245				error('invalid-input');
246			}
247
248			const digit = basicToDigit(input.charCodeAt(index++));
249
250			if (digit >= base) {
251				error('invalid-input');
252			}
253			if (digit > floor((maxInt - i) / w)) {
254				error('overflow');
255			}
256
257			i += digit * w;
258			const t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias);
259
260			if (digit < t) {
261				break;
262			}
263
264			const baseMinusT = base - t;
265			if (w > floor(maxInt / baseMinusT)) {
266				error('overflow');
267			}
268
269			w *= baseMinusT;
270
271		}
272
273		const out = output.length + 1;
274		bias = adapt(i - oldi, out, oldi == 0);
275
276		// `i` was supposed to wrap around from `out` to `0`,
277		// incrementing `n` each time, so we'll fix that now:
278		if (floor(i / out) > maxInt - n) {
279			error('overflow');
280		}
281
282		n += floor(i / out);
283		i %= out;
284
285		// Insert `n` at position `i` of the output.
286		output.splice(i++, 0, n);
287
288	}
289
290	return String.fromCodePoint(...output);
291};
292
293/**
294 * Converts a string of Unicode symbols (e.g. a domain name label) to a
295 * Punycode string of ASCII-only symbols.
296 * @memberOf punycode
297 * @param {String} input The string of Unicode symbols.
298 * @returns {String} The resulting Punycode string of ASCII-only symbols.
299 */
300const encode = function(input) {
301	const output = [];
302
303	// Convert the input in UCS-2 to an array of Unicode code points.
304	input = ucs2decode(input);
305
306	// Cache the length.
307	const inputLength = input.length;
308
309	// Initialize the state.
310	let n = initialN;
311	let delta = 0;
312	let bias = initialBias;
313
314	// Handle the basic code points.
315	for (const currentValue of input) {
316		if (currentValue < 0x80) {
317			output.push(stringFromCharCode(currentValue));
318		}
319	}
320
321	const basicLength = output.length;
322	let handledCPCount = basicLength;
323
324	// `handledCPCount` is the number of code points that have been handled;
325	// `basicLength` is the number of basic code points.
326
327	// Finish the basic string with a delimiter unless it's empty.
328	if (basicLength) {
329		output.push(delimiter);
330	}
331
332	// Main encoding loop:
333	while (handledCPCount < inputLength) {
334
335		// All non-basic code points < n have been handled already. Find the next
336		// larger one:
337		let m = maxInt;
338		for (const currentValue of input) {
339			if (currentValue >= n && currentValue < m) {
340				m = currentValue;
341			}
342		}
343
344		// Increase `delta` enough to advance the decoder's <n,i> state to <m,0>,
345		// but guard against overflow.
346		const handledCPCountPlusOne = handledCPCount + 1;
347		if (m - n > floor((maxInt - delta) / handledCPCountPlusOne)) {
348			error('overflow');
349		}
350
351		delta += (m - n) * handledCPCountPlusOne;
352		n = m;
353
354		for (const currentValue of input) {
355			if (currentValue < n && ++delta > maxInt) {
356				error('overflow');
357			}
358			if (currentValue === n) {
359				// Represent delta as a generalized variable-length integer.
360				let q = delta;
361				for (let k = base; /* no condition */; k += base) {
362					const t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias);
363					if (q < t) {
364						break;
365					}
366					const qMinusT = q - t;
367					const baseMinusT = base - t;
368					output.push(
369						stringFromCharCode(digitToBasic(t + qMinusT % baseMinusT, 0))
370					);
371					q = floor(qMinusT / baseMinusT);
372				}
373
374				output.push(stringFromCharCode(digitToBasic(q, 0)));
375				bias = adapt(delta, handledCPCountPlusOne, handledCPCount === basicLength);
376				delta = 0;
377				++handledCPCount;
378			}
379		}
380
381		++delta;
382		++n;
383
384	}
385	return output.join('');
386};
387
388/**
389 * Converts a Punycode string representing a domain name or an email address
390 * to Unicode. Only the Punycoded parts of the input will be converted, i.e.
391 * it doesn't matter if you call it on a string that has already been
392 * converted to Unicode.
393 * @memberOf punycode
394 * @param {String} input The Punycoded domain name or email address to
395 * convert to Unicode.
396 * @returns {String} The Unicode representation of the given Punycode
397 * string.
398 */
399const toUnicode = function(input) {
400	return mapDomain(input, function(string) {
401		return regexPunycode.test(string)
402			? decode(string.slice(4).toLowerCase())
403			: string;
404	});
405};
406
407/**
408 * Converts a Unicode string representing a domain name or an email address to
409 * Punycode. Only the non-ASCII parts of the domain name will be converted,
410 * i.e. it doesn't matter if you call it with a domain that's already in
411 * ASCII.
412 * @memberOf punycode
413 * @param {String} input The domain name or email address to convert, as a
414 * Unicode string.
415 * @returns {String} The Punycode representation of the given domain name or
416 * email address.
417 */
418const toASCII = function(input) {
419	return mapDomain(input, function(string) {
420		return regexNonASCII.test(string)
421			? 'xn--' + encode(string)
422			: string;
423	});
424};
425
426/*--------------------------------------------------------------------------*/
427
428/** Define the public API */
429const punycode = {
430	/**
431	 * A string representing the current Punycode.js version number.
432	 * @memberOf punycode
433	 * @type String
434	 */
435	'version': '2.1.0',
436	/**
437	 * An object of methods to convert from JavaScript's internal character
438	 * representation (UCS-2) to Unicode code points, and back.
439	 * @see <https://mathiasbynens.be/notes/javascript-encoding>
440	 * @memberOf punycode
441	 * @type Object
442	 */
443	'ucs2': {
444		'decode': ucs2decode,
445		'encode': ucs2encode
446	},
447	'decode': decode,
448	'encode': encode,
449	'toASCII': toASCII,
450	'toUnicode': toUnicode
451};
452
453module.exports = punycode;
454