• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1'use strict';
2
3/** Highest positive signed 32-bit float value */
4const maxInt = 2147483647; // aka. 0x7FFFFFFF or 2^31-1
5
6/** Bootstring parameters */
7const base = 36;
8const tMin = 1;
9const tMax = 26;
10const skew = 38;
11const damp = 700;
12const initialBias = 72;
13const initialN = 128; // 0x80
14const delimiter = '-'; // '\x2D'
15
16/** Regular expressions */
17const regexPunycode = /^xn--/;
18const regexNonASCII = /[^\0-\x7E]/; // non-ASCII chars
19const regexSeparators = /[\x2E\u3002\uFF0E\uFF61]/g; // RFC 3490 separators
20
21/** Error messages */
22const errors = {
23	'overflow': 'Overflow: input needs wider integers to process',
24	'not-basic': 'Illegal input >= 0x80 (not a basic code point)',
25	'invalid-input': 'Invalid input'
26};
27
28/** Convenience shortcuts */
29const baseMinusTMin = base - tMin;
30const floor = Math.floor;
31const stringFromCharCode = String.fromCharCode;
32
33/*--------------------------------------------------------------------------*/
34
35/**
36 * A generic error utility function.
37 * @private
38 * @param {String} type The error type.
39 * @returns {Error} Throws a `RangeError` with the applicable error message.
40 */
41function error(type) {
42	throw new RangeError(errors[type]);
43}
44
45/**
46 * A generic `Array#map` utility function.
47 * @private
48 * @param {Array} array The array to iterate over.
49 * @param {Function} callback The function that gets called for every array
50 * item.
51 * @returns {Array} A new array of values returned by the callback function.
52 */
53function map(array, fn) {
54	const result = [];
55	let length = array.length;
56	while (length--) {
57		result[length] = fn(array[length]);
58	}
59	return result;
60}
61
62/**
63 * A simple `Array#map`-like wrapper to work with domain name strings or email
64 * addresses.
65 * @private
66 * @param {String} domain The domain name or email address.
67 * @param {Function} callback The function that gets called for every
68 * character.
69 * @returns {Array} A new string of characters returned by the callback
70 * function.
71 */
72function mapDomain(string, fn) {
73	const parts = string.split('@');
74	let result = '';
75	if (parts.length > 1) {
76		// In email addresses, only the domain name should be punycoded. Leave
77		// the local part (i.e. everything up to `@`) intact.
78		result = parts[0] + '@';
79		string = parts[1];
80	}
81	// Avoid `split(regex)` for IE8 compatibility. See #17.
82	string = string.replace(regexSeparators, '\x2E');
83	const labels = string.split('.');
84	const encoded = map(labels, fn).join('.');
85	return result + encoded;
86}
87
88/**
89 * Creates an array containing the numeric code points of each Unicode
90 * character in the string. While JavaScript uses UCS-2 internally,
91 * this function will convert a pair of surrogate halves (each of which
92 * UCS-2 exposes as separate characters) into a single code point,
93 * matching UTF-16.
94 * @see `punycode.ucs2.encode`
95 * @see <https://mathiasbynens.be/notes/javascript-encoding>
96 * @memberOf punycode.ucs2
97 * @name decode
98 * @param {String} string The Unicode input string (UCS-2).
99 * @returns {Array} The new array of code points.
100 */
101function ucs2decode(string) {
102	const output = [];
103	let counter = 0;
104	const length = string.length;
105	while (counter < length) {
106		const value = string.charCodeAt(counter++);
107		if (value >= 0xD800 && value <= 0xDBFF && counter < length) {
108			// It's a high surrogate, and there is a next character.
109			const extra = string.charCodeAt(counter++);
110			if ((extra & 0xFC00) == 0xDC00) { // Low surrogate.
111				output.push(((value & 0x3FF) << 10) + (extra & 0x3FF) + 0x10000);
112			} else {
113				// It's an unmatched surrogate; only append this code unit, in case the
114				// next code unit is the high surrogate of a surrogate pair.
115				output.push(value);
116				counter--;
117			}
118		} else {
119			output.push(value);
120		}
121	}
122	return output;
123}
124
125/**
126 * Creates a string based on an array of numeric code points.
127 * @see `punycode.ucs2.decode`
128 * @memberOf punycode.ucs2
129 * @name encode
130 * @param {Array} codePoints The array of numeric code points.
131 * @returns {String} The new Unicode string (UCS-2).
132 */
133const ucs2encode = array => String.fromCodePoint(...array);
134
135/**
136 * Converts a basic code point into a digit/integer.
137 * @see `digitToBasic()`
138 * @private
139 * @param {Number} codePoint The basic numeric code point value.
140 * @returns {Number} The numeric value of a basic code point (for use in
141 * representing integers) in the range `0` to `base - 1`, or `base` if
142 * the code point does not represent a value.
143 */
144const basicToDigit = function(codePoint) {
145	if (codePoint - 0x30 < 0x0A) {
146		return codePoint - 0x16;
147	}
148	if (codePoint - 0x41 < 0x1A) {
149		return codePoint - 0x41;
150	}
151	if (codePoint - 0x61 < 0x1A) {
152		return codePoint - 0x61;
153	}
154	return base;
155};
156
157/**
158 * Converts a digit/integer into a basic code point.
159 * @see `basicToDigit()`
160 * @private
161 * @param {Number} digit The numeric value of a basic code point.
162 * @returns {Number} The basic code point whose value (when used for
163 * representing integers) is `digit`, which needs to be in the range
164 * `0` to `base - 1`. If `flag` is non-zero, the uppercase form is
165 * used; else, the lowercase form is used. The behavior is undefined
166 * if `flag` is non-zero and `digit` has no uppercase form.
167 */
168const digitToBasic = function(digit, flag) {
169	//  0..25 map to ASCII a..z or A..Z
170	// 26..35 map to ASCII 0..9
171	return digit + 22 + 75 * (digit < 26) - ((flag != 0) << 5);
172};
173
174/**
175 * Bias adaptation function as per section 3.4 of RFC 3492.
176 * https://tools.ietf.org/html/rfc3492#section-3.4
177 * @private
178 */
179const adapt = function(delta, numPoints, firstTime) {
180	let k = 0;
181	delta = firstTime ? floor(delta / damp) : delta >> 1;
182	delta += floor(delta / numPoints);
183	for (/* no initialization */; delta > baseMinusTMin * tMax >> 1; k += base) {
184		delta = floor(delta / baseMinusTMin);
185	}
186	return floor(k + (baseMinusTMin + 1) * delta / (delta + skew));
187};
188
189/**
190 * Converts a Punycode string of ASCII-only symbols to a string of Unicode
191 * symbols.
192 * @memberOf punycode
193 * @param {String} input The Punycode string of ASCII-only symbols.
194 * @returns {String} The resulting string of Unicode symbols.
195 */
196const decode = function(input) {
197	// Don't use UCS-2.
198	const output = [];
199	const inputLength = input.length;
200	let i = 0;
201	let n = initialN;
202	let bias = initialBias;
203
204	// Handle the basic code points: let `basic` be the number of input code
205	// points before the last delimiter, or `0` if there is none, then copy
206	// the first basic code points to the output.
207
208	let basic = input.lastIndexOf(delimiter);
209	if (basic < 0) {
210		basic = 0;
211	}
212
213	for (let j = 0; j < basic; ++j) {
214		// if it's not a basic code point
215		if (input.charCodeAt(j) >= 0x80) {
216			error('not-basic');
217		}
218		output.push(input.charCodeAt(j));
219	}
220
221	// Main decoding loop: start just after the last delimiter if any basic code
222	// points were copied; start at the beginning otherwise.
223
224	for (let index = basic > 0 ? basic + 1 : 0; index < inputLength; /* no final expression */) {
225
226		// `index` is the index of the next character to be consumed.
227		// Decode a generalized variable-length integer into `delta`,
228		// which gets added to `i`. The overflow checking is easier
229		// if we increase `i` as we go, then subtract off its starting
230		// value at the end to obtain `delta`.
231		let oldi = i;
232		for (let w = 1, k = base; /* no condition */; k += base) {
233
234			if (index >= inputLength) {
235				error('invalid-input');
236			}
237
238			const digit = basicToDigit(input.charCodeAt(index++));
239
240			if (digit >= base || digit > floor((maxInt - i) / w)) {
241				error('overflow');
242			}
243
244			i += digit * w;
245			const t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias);
246
247			if (digit < t) {
248				break;
249			}
250
251			const baseMinusT = base - t;
252			if (w > floor(maxInt / baseMinusT)) {
253				error('overflow');
254			}
255
256			w *= baseMinusT;
257
258		}
259
260		const out = output.length + 1;
261		bias = adapt(i - oldi, out, oldi == 0);
262
263		// `i` was supposed to wrap around from `out` to `0`,
264		// incrementing `n` each time, so we'll fix that now:
265		if (floor(i / out) > maxInt - n) {
266			error('overflow');
267		}
268
269		n += floor(i / out);
270		i %= out;
271
272		// Insert `n` at position `i` of the output.
273		output.splice(i++, 0, n);
274
275	}
276
277	return String.fromCodePoint(...output);
278};
279
280/**
281 * Converts a string of Unicode symbols (e.g. a domain name label) to a
282 * Punycode string of ASCII-only symbols.
283 * @memberOf punycode
284 * @param {String} input The string of Unicode symbols.
285 * @returns {String} The resulting Punycode string of ASCII-only symbols.
286 */
287const encode = function(input) {
288	const output = [];
289
290	// Convert the input in UCS-2 to an array of Unicode code points.
291	input = ucs2decode(input);
292
293	// Cache the length.
294	let inputLength = input.length;
295
296	// Initialize the state.
297	let n = initialN;
298	let delta = 0;
299	let bias = initialBias;
300
301	// Handle the basic code points.
302	for (const currentValue of input) {
303		if (currentValue < 0x80) {
304			output.push(stringFromCharCode(currentValue));
305		}
306	}
307
308	let basicLength = output.length;
309	let handledCPCount = basicLength;
310
311	// `handledCPCount` is the number of code points that have been handled;
312	// `basicLength` is the number of basic code points.
313
314	// Finish the basic string with a delimiter unless it's empty.
315	if (basicLength) {
316		output.push(delimiter);
317	}
318
319	// Main encoding loop:
320	while (handledCPCount < inputLength) {
321
322		// All non-basic code points < n have been handled already. Find the next
323		// larger one:
324		let m = maxInt;
325		for (const currentValue of input) {
326			if (currentValue >= n && currentValue < m) {
327				m = currentValue;
328			}
329		}
330
331		// Increase `delta` enough to advance the decoder's <n,i> state to <m,0>,
332		// but guard against overflow.
333		const handledCPCountPlusOne = handledCPCount + 1;
334		if (m - n > floor((maxInt - delta) / handledCPCountPlusOne)) {
335			error('overflow');
336		}
337
338		delta += (m - n) * handledCPCountPlusOne;
339		n = m;
340
341		for (const currentValue of input) {
342			if (currentValue < n && ++delta > maxInt) {
343				error('overflow');
344			}
345			if (currentValue === n) {
346				// Represent delta as a generalized variable-length integer.
347				let q = delta;
348				for (let k = base; /* no condition */; k += base) {
349					const t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias);
350					if (q < t) {
351						break;
352					}
353					const qMinusT = q - t;
354					const baseMinusT = base - t;
355					output.push(
356						stringFromCharCode(digitToBasic(t + qMinusT % baseMinusT, 0))
357					);
358					q = floor(qMinusT / baseMinusT);
359				}
360
361				output.push(stringFromCharCode(digitToBasic(q, 0)));
362				bias = adapt(delta, handledCPCountPlusOne, handledCPCount === basicLength);
363				delta = 0;
364				++handledCPCount;
365			}
366		}
367
368		++delta;
369		++n;
370
371	}
372	return output.join('');
373};
374
375/**
376 * Converts a Punycode string representing a domain name or an email address
377 * to Unicode. Only the Punycoded parts of the input will be converted, i.e.
378 * it doesn't matter if you call it on a string that has already been
379 * converted to Unicode.
380 * @memberOf punycode
381 * @param {String} input The Punycoded domain name or email address to
382 * convert to Unicode.
383 * @returns {String} The Unicode representation of the given Punycode
384 * string.
385 */
386const toUnicode = function(input) {
387	return mapDomain(input, function(string) {
388		return regexPunycode.test(string)
389			? decode(string.slice(4).toLowerCase())
390			: string;
391	});
392};
393
394/**
395 * Converts a Unicode string representing a domain name or an email address to
396 * Punycode. Only the non-ASCII parts of the domain name will be converted,
397 * i.e. it doesn't matter if you call it with a domain that's already in
398 * ASCII.
399 * @memberOf punycode
400 * @param {String} input The domain name or email address to convert, as a
401 * Unicode string.
402 * @returns {String} The Punycode representation of the given domain name or
403 * email address.
404 */
405const toASCII = function(input) {
406	return mapDomain(input, function(string) {
407		return regexNonASCII.test(string)
408			? 'xn--' + encode(string)
409			: string;
410	});
411};
412
413/*--------------------------------------------------------------------------*/
414
415/** Define the public API */
416const punycode = {
417	/**
418	 * A string representing the current Punycode.js version number.
419	 * @memberOf punycode
420	 * @type String
421	 */
422	'version': '2.1.0',
423	/**
424	 * An object of methods to convert from JavaScript's internal character
425	 * representation (UCS-2) to Unicode code points, and back.
426	 * @see <https://mathiasbynens.be/notes/javascript-encoding>
427	 * @memberOf punycode
428	 * @type Object
429	 */
430	'ucs2': {
431		'decode': ucs2decode,
432		'encode': ucs2encode
433	},
434	'decode': decode,
435	'encode': encode,
436	'toASCII': toASCII,
437	'toUnicode': toUnicode
438};
439
440module.exports = punycode;
441