1'use strict'; 2 3const { getOptionValue } = require('internal/options'); 4if (getOptionValue('--pending-deprecation')){ 5 process.emitWarning( 6 'The `punycode` module is deprecated. Please use a userland ' + 7 'alternative instead.', 8 'DeprecationWarning', 9 'DEP0040', 10 ); 11} 12 13/** Highest positive signed 32-bit float value */ 14const maxInt = 2147483647; // aka. 0x7FFFFFFF or 2^31-1 15 16/** Bootstring parameters */ 17const base = 36; 18const tMin = 1; 19const tMax = 26; 20const skew = 38; 21const damp = 700; 22const initialBias = 72; 23const initialN = 128; // 0x80 24const delimiter = '-'; // '\x2D' 25 26/** Regular expressions */ 27const regexPunycode = /^xn--/; 28const regexNonASCII = /[^\0-\x7F]/; // Note: U+007F DEL is excluded too. 29const regexSeparators = /[\x2E\u3002\uFF0E\uFF61]/g; // RFC 3490 separators 30 31/** Error messages */ 32const errors = { 33 'overflow': 'Overflow: input needs wider integers to process', 34 'not-basic': 'Illegal input >= 0x80 (not a basic code point)', 35 'invalid-input': 'Invalid input' 36}; 37 38/** Convenience shortcuts */ 39const baseMinusTMin = base - tMin; 40const floor = Math.floor; 41const stringFromCharCode = String.fromCharCode; 42 43/*--------------------------------------------------------------------------*/ 44 45/** 46 * A generic error utility function. 47 * @private 48 * @param {String} type The error type. 49 * @returns {Error} Throws a `RangeError` with the applicable error message. 50 */ 51function error(type) { 52 throw new RangeError(errors[type]); 53} 54 55/** 56 * A generic `Array#map` utility function. 57 * @private 58 * @param {Array} array The array to iterate over. 59 * @param {Function} callback The function that gets called for every array 60 * item. 61 * @returns {Array} A new array of values returned by the callback function. 62 */ 63function map(array, callback) { 64 const result = []; 65 let length = array.length; 66 while (length--) { 67 result[length] = callback(array[length]); 68 } 69 return result; 70} 71 72/** 73 * A simple `Array#map`-like wrapper to work with domain name strings or email 74 * addresses. 75 * @private 76 * @param {String} domain The domain name or email address. 77 * @param {Function} callback The function that gets called for every 78 * character. 79 * @returns {String} A new string of characters returned by the callback 80 * function. 81 */ 82function mapDomain(domain, callback) { 83 const parts = domain.split('@'); 84 let result = ''; 85 if (parts.length > 1) { 86 // In email addresses, only the domain name should be punycoded. Leave 87 // the local part (i.e. everything up to `@`) intact. 88 result = parts[0] + '@'; 89 domain = parts[1]; 90 } 91 // Avoid `split(regex)` for IE8 compatibility. See #17. 92 domain = domain.replace(regexSeparators, '\x2E'); 93 const labels = domain.split('.'); 94 const encoded = map(labels, callback).join('.'); 95 return result + encoded; 96} 97 98/** 99 * Creates an array containing the numeric code points of each Unicode 100 * character in the string. While JavaScript uses UCS-2 internally, 101 * this function will convert a pair of surrogate halves (each of which 102 * UCS-2 exposes as separate characters) into a single code point, 103 * matching UTF-16. 104 * @see `punycode.ucs2.encode` 105 * @see <https://mathiasbynens.be/notes/javascript-encoding> 106 * @memberOf punycode.ucs2 107 * @name decode 108 * @param {String} string The Unicode input string (UCS-2). 109 * @returns {Array} The new array of code points. 110 */ 111function ucs2decode(string) { 112 const output = []; 113 let counter = 0; 114 const length = string.length; 115 while (counter < length) { 116 const value = string.charCodeAt(counter++); 117 if (value >= 0xD800 && value <= 0xDBFF && counter < length) { 118 // It's a high surrogate, and there is a next character. 119 const extra = string.charCodeAt(counter++); 120 if ((extra & 0xFC00) == 0xDC00) { // Low surrogate. 121 output.push(((value & 0x3FF) << 10) + (extra & 0x3FF) + 0x10000); 122 } else { 123 // It's an unmatched surrogate; only append this code unit, in case the 124 // next code unit is the high surrogate of a surrogate pair. 125 output.push(value); 126 counter--; 127 } 128 } else { 129 output.push(value); 130 } 131 } 132 return output; 133} 134 135/** 136 * Creates a string based on an array of numeric code points. 137 * @see `punycode.ucs2.decode` 138 * @memberOf punycode.ucs2 139 * @name encode 140 * @param {Array} codePoints The array of numeric code points. 141 * @returns {String} The new Unicode string (UCS-2). 142 */ 143const ucs2encode = codePoints => String.fromCodePoint(...codePoints); 144 145/** 146 * Converts a basic code point into a digit/integer. 147 * @see `digitToBasic()` 148 * @private 149 * @param {Number} codePoint The basic numeric code point value. 150 * @returns {Number} The numeric value of a basic code point (for use in 151 * representing integers) in the range `0` to `base - 1`, or `base` if 152 * the code point does not represent a value. 153 */ 154const basicToDigit = function(codePoint) { 155 if (codePoint >= 0x30 && codePoint < 0x3A) { 156 return 26 + (codePoint - 0x30); 157 } 158 if (codePoint >= 0x41 && codePoint < 0x5B) { 159 return codePoint - 0x41; 160 } 161 if (codePoint >= 0x61 && codePoint < 0x7B) { 162 return codePoint - 0x61; 163 } 164 return base; 165}; 166 167/** 168 * Converts a digit/integer into a basic code point. 169 * @see `basicToDigit()` 170 * @private 171 * @param {Number} digit The numeric value of a basic code point. 172 * @returns {Number} The basic code point whose value (when used for 173 * representing integers) is `digit`, which needs to be in the range 174 * `0` to `base - 1`. If `flag` is non-zero, the uppercase form is 175 * used; else, the lowercase form is used. The behavior is undefined 176 * if `flag` is non-zero and `digit` has no uppercase form. 177 */ 178const digitToBasic = function(digit, flag) { 179 // 0..25 map to ASCII a..z or A..Z 180 // 26..35 map to ASCII 0..9 181 return digit + 22 + 75 * (digit < 26) - ((flag != 0) << 5); 182}; 183 184/** 185 * Bias adaptation function as per section 3.4 of RFC 3492. 186 * https://tools.ietf.org/html/rfc3492#section-3.4 187 * @private 188 */ 189const adapt = function(delta, numPoints, firstTime) { 190 let k = 0; 191 delta = firstTime ? floor(delta / damp) : delta >> 1; 192 delta += floor(delta / numPoints); 193 for (/* no initialization */; delta > baseMinusTMin * tMax >> 1; k += base) { 194 delta = floor(delta / baseMinusTMin); 195 } 196 return floor(k + (baseMinusTMin + 1) * delta / (delta + skew)); 197}; 198 199/** 200 * Converts a Punycode string of ASCII-only symbols to a string of Unicode 201 * symbols. 202 * @memberOf punycode 203 * @param {String} input The Punycode string of ASCII-only symbols. 204 * @returns {String} The resulting string of Unicode symbols. 205 */ 206const decode = function(input) { 207 // Don't use UCS-2. 208 const output = []; 209 const inputLength = input.length; 210 let i = 0; 211 let n = initialN; 212 let bias = initialBias; 213 214 // Handle the basic code points: let `basic` be the number of input code 215 // points before the last delimiter, or `0` if there is none, then copy 216 // the first basic code points to the output. 217 218 let basic = input.lastIndexOf(delimiter); 219 if (basic < 0) { 220 basic = 0; 221 } 222 223 for (let j = 0; j < basic; ++j) { 224 // if it's not a basic code point 225 if (input.charCodeAt(j) >= 0x80) { 226 error('not-basic'); 227 } 228 output.push(input.charCodeAt(j)); 229 } 230 231 // Main decoding loop: start just after the last delimiter if any basic code 232 // points were copied; start at the beginning otherwise. 233 234 for (let index = basic > 0 ? basic + 1 : 0; index < inputLength; /* no final expression */) { 235 236 // `index` is the index of the next character to be consumed. 237 // Decode a generalized variable-length integer into `delta`, 238 // which gets added to `i`. The overflow checking is easier 239 // if we increase `i` as we go, then subtract off its starting 240 // value at the end to obtain `delta`. 241 const oldi = i; 242 for (let w = 1, k = base; /* no condition */; k += base) { 243 244 if (index >= inputLength) { 245 error('invalid-input'); 246 } 247 248 const digit = basicToDigit(input.charCodeAt(index++)); 249 250 if (digit >= base) { 251 error('invalid-input'); 252 } 253 if (digit > floor((maxInt - i) / w)) { 254 error('overflow'); 255 } 256 257 i += digit * w; 258 const t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias); 259 260 if (digit < t) { 261 break; 262 } 263 264 const baseMinusT = base - t; 265 if (w > floor(maxInt / baseMinusT)) { 266 error('overflow'); 267 } 268 269 w *= baseMinusT; 270 271 } 272 273 const out = output.length + 1; 274 bias = adapt(i - oldi, out, oldi == 0); 275 276 // `i` was supposed to wrap around from `out` to `0`, 277 // incrementing `n` each time, so we'll fix that now: 278 if (floor(i / out) > maxInt - n) { 279 error('overflow'); 280 } 281 282 n += floor(i / out); 283 i %= out; 284 285 // Insert `n` at position `i` of the output. 286 output.splice(i++, 0, n); 287 288 } 289 290 return String.fromCodePoint(...output); 291}; 292 293/** 294 * Converts a string of Unicode symbols (e.g. a domain name label) to a 295 * Punycode string of ASCII-only symbols. 296 * @memberOf punycode 297 * @param {String} input The string of Unicode symbols. 298 * @returns {String} The resulting Punycode string of ASCII-only symbols. 299 */ 300const encode = function(input) { 301 const output = []; 302 303 // Convert the input in UCS-2 to an array of Unicode code points. 304 input = ucs2decode(input); 305 306 // Cache the length. 307 const inputLength = input.length; 308 309 // Initialize the state. 310 let n = initialN; 311 let delta = 0; 312 let bias = initialBias; 313 314 // Handle the basic code points. 315 for (const currentValue of input) { 316 if (currentValue < 0x80) { 317 output.push(stringFromCharCode(currentValue)); 318 } 319 } 320 321 const basicLength = output.length; 322 let handledCPCount = basicLength; 323 324 // `handledCPCount` is the number of code points that have been handled; 325 // `basicLength` is the number of basic code points. 326 327 // Finish the basic string with a delimiter unless it's empty. 328 if (basicLength) { 329 output.push(delimiter); 330 } 331 332 // Main encoding loop: 333 while (handledCPCount < inputLength) { 334 335 // All non-basic code points < n have been handled already. Find the next 336 // larger one: 337 let m = maxInt; 338 for (const currentValue of input) { 339 if (currentValue >= n && currentValue < m) { 340 m = currentValue; 341 } 342 } 343 344 // Increase `delta` enough to advance the decoder's <n,i> state to <m,0>, 345 // but guard against overflow. 346 const handledCPCountPlusOne = handledCPCount + 1; 347 if (m - n > floor((maxInt - delta) / handledCPCountPlusOne)) { 348 error('overflow'); 349 } 350 351 delta += (m - n) * handledCPCountPlusOne; 352 n = m; 353 354 for (const currentValue of input) { 355 if (currentValue < n && ++delta > maxInt) { 356 error('overflow'); 357 } 358 if (currentValue === n) { 359 // Represent delta as a generalized variable-length integer. 360 let q = delta; 361 for (let k = base; /* no condition */; k += base) { 362 const t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias); 363 if (q < t) { 364 break; 365 } 366 const qMinusT = q - t; 367 const baseMinusT = base - t; 368 output.push( 369 stringFromCharCode(digitToBasic(t + qMinusT % baseMinusT, 0)) 370 ); 371 q = floor(qMinusT / baseMinusT); 372 } 373 374 output.push(stringFromCharCode(digitToBasic(q, 0))); 375 bias = adapt(delta, handledCPCountPlusOne, handledCPCount === basicLength); 376 delta = 0; 377 ++handledCPCount; 378 } 379 } 380 381 ++delta; 382 ++n; 383 384 } 385 return output.join(''); 386}; 387 388/** 389 * Converts a Punycode string representing a domain name or an email address 390 * to Unicode. Only the Punycoded parts of the input will be converted, i.e. 391 * it doesn't matter if you call it on a string that has already been 392 * converted to Unicode. 393 * @memberOf punycode 394 * @param {String} input The Punycoded domain name or email address to 395 * convert to Unicode. 396 * @returns {String} The Unicode representation of the given Punycode 397 * string. 398 */ 399const toUnicode = function(input) { 400 return mapDomain(input, function(string) { 401 return regexPunycode.test(string) 402 ? decode(string.slice(4).toLowerCase()) 403 : string; 404 }); 405}; 406 407/** 408 * Converts a Unicode string representing a domain name or an email address to 409 * Punycode. Only the non-ASCII parts of the domain name will be converted, 410 * i.e. it doesn't matter if you call it with a domain that's already in 411 * ASCII. 412 * @memberOf punycode 413 * @param {String} input The domain name or email address to convert, as a 414 * Unicode string. 415 * @returns {String} The Punycode representation of the given domain name or 416 * email address. 417 */ 418const toASCII = function(input) { 419 return mapDomain(input, function(string) { 420 return regexNonASCII.test(string) 421 ? 'xn--' + encode(string) 422 : string; 423 }); 424}; 425 426/*--------------------------------------------------------------------------*/ 427 428/** Define the public API */ 429const punycode = { 430 /** 431 * A string representing the current Punycode.js version number. 432 * @memberOf punycode 433 * @type String 434 */ 435 'version': '2.1.0', 436 /** 437 * An object of methods to convert from JavaScript's internal character 438 * representation (UCS-2) to Unicode code points, and back. 439 * @see <https://mathiasbynens.be/notes/javascript-encoding> 440 * @memberOf punycode 441 * @type Object 442 */ 443 'ucs2': { 444 'decode': ucs2decode, 445 'encode': ucs2encode 446 }, 447 'decode': decode, 448 'encode': encode, 449 'toASCII': toASCII, 450 'toUnicode': toUnicode 451}; 452 453module.exports = punycode; 454