1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // © 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html 4 /* 5 ******************************************************************************* 6 * Copyright (C) 2003-2016, International Business Machines Corporation and * 7 * others. All Rights Reserved. * 8 ******************************************************************************* 9 */ 10 11 package android.icu.text; 12 13 import java.util.Collections; 14 import java.util.EnumSet; 15 import java.util.Set; 16 17 import android.icu.impl.IDNA2003; 18 import android.icu.impl.UTS46; 19 20 /** 21 * Abstract base class for IDNA processing. 22 * See http://www.unicode.org/reports/tr46/ 23 * and http://www.ietf.org/rfc/rfc3490.txt 24 * <p> 25 * The IDNA class is not intended for public subclassing. 26 * <p> 27 * The non-static methods implement UTS #46 and IDNA2008. 28 * IDNA2008 is implemented according to UTS #46, see {@link #getUTS46Instance(int)}. 29 * <p> 30 * IDNA2003 is obsolete. The static methods implement IDNA2003. They are all deprecated. 31 * <p> 32 * IDNA2003 API Overview: 33 * <p> 34 * The static IDNA API methods implement the IDNA protocol as defined in the 35 * <a href="http://www.ietf.org/rfc/rfc3490.txt">IDNA RFC</a>. 36 * The draft defines 2 operations: ToASCII and ToUnicode. Domain labels 37 * containing non-ASCII code points are required to be processed by 38 * ToASCII operation before passing it to resolver libraries. Domain names 39 * that are obtained from resolver libraries are required to be processed by 40 * ToUnicode operation before displaying the domain name to the user. 41 * IDNA requires that implementations process input strings with 42 * <a href="http://www.ietf.org/rfc/rfc3491.txt">Nameprep</a>, 43 * which is a profile of <a href="http://www.ietf.org/rfc/rfc3454.txt">Stringprep</a> , 44 * and then with <a href="http://www.ietf.org/rfc/rfc3492.txt">Punycode</a>. 45 * Implementations of IDNA MUST fully implement Nameprep and Punycode; 46 * neither Nameprep nor Punycode are optional. 47 * The input and output of ToASCII and ToUnicode operations are Unicode 48 * and are designed to be chainable, i.e., applying ToASCII or ToUnicode operations 49 * multiple times to an input string will yield the same result as applying the operation 50 * once. 51 * ToUnicode(ToUnicode(ToUnicode...(ToUnicode(string)))) == ToUnicode(string) 52 * ToASCII(ToASCII(ToASCII...(ToASCII(string))) == ToASCII(string). 53 * 54 * @author Ram Viswanadha, Markus Scherer 55 */ 56 public abstract class IDNA { 57 // Android-changed: ICU 76+ uses 0x30, but Android prefers not to change an API constant. 58 /** 59 * Default options value: None of the other options are set. 60 */ 61 // public static final int DEFAULT = 0x30; 62 public static final int DEFAULT = 0; 63 /** 64 * Option to allow unassigned code points in domain names and labels. 65 * For use in static worker and factory methods. 66 * <p>This option is ignored by the UTS46 implementation. 67 * (UTS #46 disallows unassigned code points.) 68 * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}. 69 * @hide original deprecated declaration 70 */ 71 @Deprecated 72 public static final int ALLOW_UNASSIGNED = 1; 73 /** 74 * Option to check whether the input conforms to the STD3 ASCII rules, 75 * for example the restriction of labels to LDH characters 76 * (ASCII Letters, Digits and Hyphen-Minus). 77 * For use in static worker and factory methods. 78 */ 79 public static final int USE_STD3_RULES = 2; 80 /** 81 * IDNA option to check for whether the input conforms to the BiDi rules. 82 * For use in static worker and factory methods. 83 * <p>This option is ignored by the IDNA2003 implementation. 84 * (IDNA2003 always performs a BiDi check.) 85 */ 86 public static final int CHECK_BIDI = 4; 87 /** 88 * IDNA option to check for whether the input conforms to the CONTEXTJ rules. 89 * For use in static worker and factory methods. 90 * <p>This option is ignored by the IDNA2003 implementation. 91 * (The CONTEXTJ check is new in IDNA2008.) 92 */ 93 public static final int CHECK_CONTEXTJ = 8; 94 /** 95 * IDNA option for nontransitional processing in ToASCII(). 96 * For use in static worker and factory methods. 97 * 98 * <p>By default, ToASCII() uses transitional processing. 99 * Unicode 15.1 UTS #46 deprecated transitional processing. 100 * 101 * <p>This option is ignored by the IDNA2003 implementation. 102 * (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.) 103 */ 104 public static final int NONTRANSITIONAL_TO_ASCII = 0x10; 105 /** 106 * IDNA option for nontransitional processing in ToUnicode(). 107 * For use in static worker and factory methods. 108 * 109 * <p>By default, ToUnicode() uses transitional processing. 110 * Unicode 15.1 UTS #46 deprecated transitional processing. 111 * 112 * <p>This option is ignored by the IDNA2003 implementation. 113 * (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.) 114 */ 115 public static final int NONTRANSITIONAL_TO_UNICODE = 0x20; 116 /** 117 * IDNA option to check for whether the input conforms to the CONTEXTO rules. 118 * For use in static worker and factory methods. 119 * <p>This option is ignored by the IDNA2003 implementation. 120 * (The CONTEXTO check is new in IDNA2008.) 121 * <p>This is for use by registries for IDNA2008 conformance. 122 * UTS #46 does not require the CONTEXTO check. 123 */ 124 public static final int CHECK_CONTEXTO = 0x40; 125 126 /** 127 * Returns an IDNA instance which implements UTS #46. 128 * Returns an unmodifiable instance, owned by the caller. 129 * Cache it for multiple operations, and delete it when done. 130 * The instance is thread-safe, that is, it can be used concurrently. 131 * <p> 132 * UTS #46 defines Unicode IDNA Compatibility Processing, 133 * updated to the latest version of Unicode and compatible with both 134 * IDNA2003 and IDNA2008. 135 * <p> 136 * The worker functions use transitional processing, including deviation mappings, 137 * unless {@link #NONTRANSITIONAL_TO_ASCII} or {@link #NONTRANSITIONAL_TO_UNICODE} 138 * is used in which case the deviation characters are passed through without change. 139 * <b>Unicode 15.1 UTS #46 deprecated transitional processing.</b> 140 * <p> 141 * Disallowed characters are mapped to U+FFFD. 142 * <p> 143 * Operations with the UTS #46 instance do not support the 144 * ALLOW_UNASSIGNED option. 145 * <p> 146 * By default, the UTS #46 implementation allows all ASCII characters (as valid or mapped). 147 * When the USE_STD3_RULES option is used, ASCII characters other than 148 * letters, digits, hyphen (LDH) and dot/full stop are disallowed and mapped to U+FFFD. 149 * 150 * @param options Bit set to modify the processing and error checking. 151 * These should include {@link IDNA#DEFAULT}, or 152 * {@link IDNA#NONTRANSITIONAL_TO_ASCII} | {@link IDNA#NONTRANSITIONAL_TO_UNICODE}. 153 * @return the UTS #46 IDNA instance, if successful 154 */ getUTS46Instance(int options)155 public static IDNA getUTS46Instance(int options) { 156 return new UTS46(options); 157 } 158 159 /** 160 * Converts a single domain name label into its ASCII form for DNS lookup. 161 * If any processing step fails, then info.hasErrors() will be true and 162 * the result might not be an ASCII string. 163 * The label might be modified according to the types of errors. 164 * Labels with severe errors will be left in (or turned into) their Unicode form. 165 * 166 * @param label Input domain name label 167 * @param dest Destination string object 168 * @param info Output container of IDNA processing details. 169 * @return dest 170 */ labelToASCII(CharSequence label, StringBuilder dest, Info info)171 public abstract StringBuilder labelToASCII(CharSequence label, StringBuilder dest, Info info); 172 173 /** 174 * Converts a single domain name label into its Unicode form for human-readable display. 175 * If any processing step fails, then info.hasErrors() will be true. 176 * The label might be modified according to the types of errors. 177 * 178 * @param label Input domain name label 179 * @param dest Destination string object 180 * @param info Output container of IDNA processing details. 181 * @return dest 182 */ labelToUnicode(CharSequence label, StringBuilder dest, Info info)183 public abstract StringBuilder labelToUnicode(CharSequence label, StringBuilder dest, Info info); 184 185 /** 186 * Converts a whole domain name into its ASCII form for DNS lookup. 187 * If any processing step fails, then info.hasErrors() will be true and 188 * the result might not be an ASCII string. 189 * The domain name might be modified according to the types of errors. 190 * Labels with severe errors will be left in (or turned into) their Unicode form. 191 * 192 * @param name Input domain name 193 * @param dest Destination string object 194 * @param info Output container of IDNA processing details. 195 * @return dest 196 */ nameToASCII(CharSequence name, StringBuilder dest, Info info)197 public abstract StringBuilder nameToASCII(CharSequence name, StringBuilder dest, Info info); 198 199 /** 200 * Converts a whole domain name into its Unicode form for human-readable display. 201 * If any processing step fails, then info.hasErrors() will be true. 202 * The domain name might be modified according to the types of errors. 203 * 204 * @param name Input domain name 205 * @param dest Destination string object 206 * @param info Output container of IDNA processing details. 207 * @return dest 208 */ nameToUnicode(CharSequence name, StringBuilder dest, Info info)209 public abstract StringBuilder nameToUnicode(CharSequence name, StringBuilder dest, Info info); 210 211 /** 212 * Output container for IDNA processing errors. 213 * The Info class is not suitable for subclassing. 214 */ 215 public static final class Info { 216 /** 217 * Constructor. 218 */ Info()219 public Info() { 220 errors=EnumSet.noneOf(Error.class); 221 labelErrors=EnumSet.noneOf(Error.class); 222 isTransDiff=false; 223 isBiDi=false; 224 isOkBiDi=true; 225 } 226 /** 227 * Were there IDNA processing errors? 228 * @return true if there were processing errors 229 */ hasErrors()230 public boolean hasErrors() { return !errors.isEmpty(); } 231 /** 232 * Returns a set indicating IDNA processing errors. 233 * @return set of processing errors (modifiable, and not null) 234 */ getErrors()235 public Set<Error> getErrors() { return errors; } 236 /** 237 * Returns true if transitional and nontransitional processing produce different results. 238 * This is the case when the input label or domain name contains 239 * one or more deviation characters outside a Punycode label (see UTS #46). 240 * <ul> 241 * <li>With nontransitional processing, such characters are 242 * copied to the destination string. 243 * <li>With transitional processing, such characters are 244 * mapped (sharp s/sigma) or removed (joiner/nonjoiner). 245 * </ul> 246 * @return true if transitional and nontransitional processing produce different results 247 */ isTransitionalDifferent()248 public boolean isTransitionalDifferent() { return isTransDiff; } 249 reset()250 private void reset() { 251 errors.clear(); 252 labelErrors.clear(); 253 isTransDiff=false; 254 isBiDi=false; 255 isOkBiDi=true; 256 } 257 258 private EnumSet<Error> errors, labelErrors; 259 private boolean isTransDiff; 260 private boolean isBiDi; 261 private boolean isOkBiDi; 262 } 263 264 // The following protected methods give IDNA subclasses access to the private IDNAInfo fields. 265 // The IDNAInfo also provides intermediate state that is publicly invisible, 266 // avoiding the allocation of another worker object. 267 /** 268 * @deprecated This API is ICU internal only. 269 * @hide original deprecated declaration 270 * @hide draft / provisional / internal are hidden on Android 271 */ 272 @Deprecated resetInfo(Info info)273 protected static void resetInfo(Info info) { 274 info.reset(); 275 } 276 /** 277 * @deprecated This API is ICU internal only. 278 * @hide original deprecated declaration 279 * @hide draft / provisional / internal are hidden on Android 280 */ 281 @Deprecated hasCertainErrors(Info info, EnumSet<Error> errors)282 protected static boolean hasCertainErrors(Info info, EnumSet<Error> errors) { 283 return !info.errors.isEmpty() && !Collections.disjoint(info.errors, errors); 284 } 285 /** 286 * @deprecated This API is ICU internal only. 287 * @hide original deprecated declaration 288 * @hide draft / provisional / internal are hidden on Android 289 */ 290 @Deprecated hasCertainLabelErrors(Info info, EnumSet<Error> errors)291 protected static boolean hasCertainLabelErrors(Info info, EnumSet<Error> errors) { 292 return !info.labelErrors.isEmpty() && !Collections.disjoint(info.labelErrors, errors); 293 } 294 /** 295 * @deprecated This API is ICU internal only. 296 * @hide original deprecated declaration 297 * @hide draft / provisional / internal are hidden on Android 298 */ 299 @Deprecated addLabelError(Info info, Error error)300 protected static void addLabelError(Info info, Error error) { 301 info.labelErrors.add(error); 302 } 303 /** 304 * @deprecated This API is ICU internal only. 305 * @hide original deprecated declaration 306 * @hide draft / provisional / internal are hidden on Android 307 */ 308 @Deprecated promoteAndResetLabelErrors(Info info)309 protected static void promoteAndResetLabelErrors(Info info) { 310 if(!info.labelErrors.isEmpty()) { 311 info.errors.addAll(info.labelErrors); 312 info.labelErrors.clear(); 313 } 314 } 315 /** 316 * @deprecated This API is ICU internal only. 317 * @hide original deprecated declaration 318 * @hide draft / provisional / internal are hidden on Android 319 */ 320 @Deprecated addError(Info info, Error error)321 protected static void addError(Info info, Error error) { 322 info.errors.add(error); 323 } 324 /** 325 * @deprecated This API is ICU internal only. 326 * @hide original deprecated declaration 327 * @hide draft / provisional / internal are hidden on Android 328 */ 329 @Deprecated setTransitionalDifferent(Info info)330 protected static void setTransitionalDifferent(Info info) { 331 info.isTransDiff=true; 332 } 333 /** 334 * @deprecated This API is ICU internal only. 335 * @hide original deprecated declaration 336 * @hide draft / provisional / internal are hidden on Android 337 */ 338 @Deprecated setBiDi(Info info)339 protected static void setBiDi(Info info) { 340 info.isBiDi=true; 341 } 342 /** 343 * @deprecated This API is ICU internal only. 344 * @hide original deprecated declaration 345 * @hide draft / provisional / internal are hidden on Android 346 */ 347 @Deprecated isBiDi(Info info)348 protected static boolean isBiDi(Info info) { 349 return info.isBiDi; 350 } 351 /** 352 * @deprecated This API is ICU internal only. 353 * @hide original deprecated declaration 354 * @hide draft / provisional / internal are hidden on Android 355 */ 356 @Deprecated setNotOkBiDi(Info info)357 protected static void setNotOkBiDi(Info info) { 358 info.isOkBiDi=false; 359 } 360 /** 361 * @deprecated This API is ICU internal only. 362 * @hide original deprecated declaration 363 * @hide draft / provisional / internal are hidden on Android 364 */ 365 @Deprecated isOkBiDi(Info info)366 protected static boolean isOkBiDi(Info info) { 367 return info.isOkBiDi; 368 } 369 370 /** 371 * IDNA error bit set values. 372 * When a domain name or label fails a processing step or does not meet the 373 * validity criteria, then one or more of these error bits are set. 374 */ 375 public static enum Error { 376 /** 377 * A non-final domain name label (or the whole domain name) is empty. 378 */ 379 EMPTY_LABEL, 380 /** 381 * A domain name label is longer than 63 bytes. 382 * (See STD13/RFC1034 3.1. Name space specifications and terminology.) 383 * This is only checked in ToASCII operations, and only if the output label is all-ASCII. 384 */ 385 LABEL_TOO_LONG, 386 /** 387 * A domain name is longer than 255 bytes in its storage form. 388 * (See STD13/RFC1034 3.1. Name space specifications and terminology.) 389 * This is only checked in ToASCII operations, and only if the output domain name is all-ASCII. 390 */ 391 DOMAIN_NAME_TOO_LONG, 392 /** 393 * A label starts with a hyphen-minus ('-'). 394 */ 395 LEADING_HYPHEN, 396 /** 397 * A label ends with a hyphen-minus ('-'). 398 */ 399 TRAILING_HYPHEN, 400 /** 401 * A label contains hyphen-minus ('-') in the third and fourth positions. 402 */ 403 HYPHEN_3_4, 404 /** 405 * A label starts with a combining mark. 406 */ 407 LEADING_COMBINING_MARK, 408 /** 409 * A label or domain name contains disallowed characters. 410 */ 411 DISALLOWED, 412 /** 413 * A label starts with "xn--" but does not contain valid Punycode. 414 * That is, an xn-- label failed Punycode decoding. 415 */ 416 PUNYCODE, 417 /** 418 * A label contains a dot=full stop. 419 * This can occur in an input string for a single-label function. 420 */ 421 LABEL_HAS_DOT, 422 /** 423 * An ACE label does not contain a valid label string. 424 * The label was successfully ACE (Punycode) decoded but the resulting 425 * string had severe validation errors. For example, 426 * it might contain characters that are not allowed in ACE labels, 427 * or it might not be normalized. 428 */ 429 INVALID_ACE_LABEL, 430 /** 431 * A label does not meet the IDNA BiDi requirements (for right-to-left characters). 432 */ 433 BIDI, 434 /** 435 * A label does not meet the IDNA CONTEXTJ requirements. 436 */ 437 CONTEXTJ, 438 /** 439 * A label does not meet the IDNA CONTEXTO requirements for punctuation characters. 440 * Some punctuation characters "Would otherwise have been DISALLOWED" 441 * but are allowed in certain contexts. (RFC 5892) 442 */ 443 CONTEXTO_PUNCTUATION, 444 /** 445 * A label does not meet the IDNA CONTEXTO requirements for digits. 446 * Arabic-Indic Digits (U+066x) must not be mixed with Extended Arabic-Indic Digits (U+06Fx). 447 */ 448 CONTEXTO_DIGITS 449 } 450 451 /** 452 * Sole constructor. (For invocation by subclass constructors, typically implicit.) 453 * @deprecated This API is ICU internal only. 454 * @hide original deprecated declaration 455 * @hide draft / provisional / internal are hidden on Android 456 */ 457 @Deprecated IDNA()458 protected IDNA() { 459 } 460 461 /* IDNA2003 API ------------------------------------------------------------- */ 462 463 /** 464 * IDNA2003: This function implements the ToASCII operation as defined in the IDNA RFC. 465 * This operation is done on <b>single labels</b> before sending it to something that expects 466 * ASCII names. A label is an individual part of a domain name. Labels are usually 467 * separated by dots; e.g." "www.example.com" is composed of 3 labels 468 * "www","example", and "com". 469 * 470 * @param src The input string to be processed 471 * @param options A bit set of options: 472 * - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points 473 * and do not use STD3 ASCII rules 474 * If unassigned code points are found the operation fails with 475 * StringPrepParseException. 476 * 477 * - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations 478 * If this option is set, the unassigned code points are in the input 479 * are treated as normal Unicode code points. 480 * 481 * - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 482 * If this option is set and the input does not satisfy STD3 rules, 483 * the operation will fail with ParseException 484 * @return StringBuffer the converted String 485 * @throws StringPrepParseException When an error occurs for parsing a string. 486 * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}. 487 * @hide original deprecated declaration 488 */ 489 @Deprecated convertToASCII(String src, int options)490 public static StringBuffer convertToASCII(String src, int options) 491 throws StringPrepParseException{ 492 UCharacterIterator iter = UCharacterIterator.getInstance(src); 493 return convertToASCII(iter,options); 494 } 495 496 /** 497 * IDNA2003: This function implements the ToASCII operation as defined in the IDNA RFC. 498 * This operation is done on <b>single labels</b> before sending it to something that expects 499 * ASCII names. A label is an individual part of a domain name. Labels are usually 500 * separated by dots; e.g." "www.example.com" is composed of 3 labels 501 * "www","example", and "com". 502 * 503 * @param src The input string as StringBuffer to be processed 504 * @param options A bit set of options: 505 * - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points 506 * and do not use STD3 ASCII rules 507 * If unassigned code points are found the operation fails with 508 * ParseException. 509 * 510 * - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations 511 * If this option is set, the unassigned code points are in the input 512 * are treated as normal Unicode code points. 513 * 514 * - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 515 * If this option is set and the input does not satisfy STD3 rules, 516 * the operation will fail with ParseException 517 * @return StringBuffer the converted String 518 * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}. 519 * @hide original deprecated declaration 520 */ 521 @Deprecated convertToASCII(StringBuffer src, int options)522 public static StringBuffer convertToASCII(StringBuffer src, int options) 523 throws StringPrepParseException{ 524 UCharacterIterator iter = UCharacterIterator.getInstance(src); 525 return convertToASCII(iter,options); 526 } 527 528 /** 529 * IDNA2003: This function implements the ToASCII operation as defined in the IDNA RFC. 530 * This operation is done on <b>single labels</b> before sending it to something that expects 531 * ASCII names. A label is an individual part of a domain name. Labels are usually 532 * separated by dots; e.g." "www.example.com" is composed of 3 labels 533 * "www","example", and "com". 534 * 535 * @param src The input string as UCharacterIterator to be processed 536 * @param options A bit set of options: 537 * - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points 538 * and do not use STD3 ASCII rules 539 * If unassigned code points are found the operation fails with 540 * ParseException. 541 * 542 * - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations 543 * If this option is set, the unassigned code points are in the input 544 * are treated as normal Unicode code points. 545 * 546 * - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 547 * If this option is set and the input does not satisfy STD3 rules, 548 * the operation will fail with ParseException 549 * @return StringBuffer the converted String 550 * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}. 551 * @hide original deprecated declaration 552 */ 553 @Deprecated convertToASCII(UCharacterIterator src, int options)554 public static StringBuffer convertToASCII(UCharacterIterator src, int options) 555 throws StringPrepParseException{ 556 return IDNA2003.convertToASCII(src, options); 557 } 558 559 /** 560 * IDNA2003: Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC. 561 * This operation is done on complete domain names, e.g: "www.example.com". 562 * It is important to note that this operation can fail. If it fails, then the input 563 * domain name cannot be used as an Internationalized Domain Name and the application 564 * should have methods defined to deal with the failure. 565 * 566 * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name 567 * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each, 568 * and then convert. This function does not offer that level of granularity. The options once 569 * set will apply to all labels in the domain name 570 * 571 * @param src The input string as UCharacterIterator to be processed 572 * @param options A bit set of options: 573 * - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points 574 * and do not use STD3 ASCII rules 575 * If unassigned code points are found the operation fails with 576 * ParseException. 577 * 578 * - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations 579 * If this option is set, the unassigned code points are in the input 580 * are treated as normal Unicode code points. 581 * 582 * - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 583 * If this option is set and the input does not satisfy STD3 rules, 584 * the operation will fail with ParseException 585 * @return StringBuffer the converted String 586 * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}. 587 * @hide original deprecated declaration 588 */ 589 @Deprecated convertIDNToASCII(UCharacterIterator src, int options)590 public static StringBuffer convertIDNToASCII(UCharacterIterator src, int options) 591 throws StringPrepParseException{ 592 return convertIDNToASCII(src.getText(), options); 593 } 594 595 /** 596 * IDNA2003: Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC. 597 * This operation is done on complete domain names, e.g: "www.example.com". 598 * It is important to note that this operation can fail. If it fails, then the input 599 * domain name cannot be used as an Internationalized Domain Name and the application 600 * should have methods defined to deal with the failure. 601 * 602 * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name 603 * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each, 604 * and then convert. This function does not offer that level of granularity. The options once 605 * set will apply to all labels in the domain name 606 * 607 * @param src The input string as a StringBuffer to be processed 608 * @param options A bit set of options: 609 * - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points 610 * and do not use STD3 ASCII rules 611 * If unassigned code points are found the operation fails with 612 * ParseException. 613 * 614 * - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations 615 * If this option is set, the unassigned code points are in the input 616 * are treated as normal Unicode code points. 617 * 618 * - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 619 * If this option is set and the input does not satisfy STD3 rules, 620 * the operation will fail with ParseException 621 * @return StringBuffer the converted String 622 * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}. 623 * @hide original deprecated declaration 624 */ 625 @Deprecated convertIDNToASCII(StringBuffer src, int options)626 public static StringBuffer convertIDNToASCII(StringBuffer src, int options) 627 throws StringPrepParseException{ 628 return convertIDNToASCII(src.toString(), options); 629 } 630 631 /** 632 * IDNA2003: Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC. 633 * This operation is done on complete domain names, e.g: "www.example.com". 634 * It is important to note that this operation can fail. If it fails, then the input 635 * domain name cannot be used as an Internationalized Domain Name and the application 636 * should have methods defined to deal with the failure. 637 * 638 * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name 639 * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each, 640 * and then convert. This function does not offer that level of granularity. The options once 641 * set will apply to all labels in the domain name 642 * 643 * @param src The input string to be processed 644 * @param options A bit set of options: 645 * - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points 646 * and do not use STD3 ASCII rules 647 * If unassigned code points are found the operation fails with 648 * ParseException. 649 * 650 * - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations 651 * If this option is set, the unassigned code points are in the input 652 * are treated as normal Unicode code points. 653 * 654 * - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 655 * If this option is set and the input does not satisfy STD3 rules, 656 * the operation will fail with ParseException 657 * @return StringBuffer the converted String 658 * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}. 659 * @hide original deprecated declaration 660 */ 661 @Deprecated convertIDNToASCII(String src,int options)662 public static StringBuffer convertIDNToASCII(String src,int options) 663 throws StringPrepParseException{ 664 return IDNA2003.convertIDNToASCII(src, options); 665 } 666 667 668 /** 669 * IDNA2003: This function implements the ToUnicode operation as defined in the IDNA RFC. 670 * This operation is done on <b>single labels</b> before sending it to something that expects 671 * Unicode names. A label is an individual part of a domain name. Labels are usually 672 * separated by dots; for e.g." "www.example.com" is composed of 3 labels 673 * "www","example", and "com". 674 * 675 * @param src The input string to be processed 676 * @param options A bit set of options: 677 * - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points 678 * and do not use STD3 ASCII rules 679 * If unassigned code points are found the operation fails with 680 * ParseException. 681 * 682 * - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations 683 * If this option is set, the unassigned code points are in the input 684 * are treated as normal Unicode code points. 685 * 686 * - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 687 * If this option is set and the input does not satisfy STD3 rules, 688 * the operation will fail with ParseException 689 * @return StringBuffer the converted String 690 * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}. 691 * @hide original deprecated declaration 692 */ 693 @Deprecated convertToUnicode(String src, int options)694 public static StringBuffer convertToUnicode(String src, int options) 695 throws StringPrepParseException{ 696 UCharacterIterator iter = UCharacterIterator.getInstance(src); 697 return convertToUnicode(iter,options); 698 } 699 700 /** 701 * IDNA2003: This function implements the ToUnicode operation as defined in the IDNA RFC. 702 * This operation is done on <b>single labels</b> before sending it to something that expects 703 * Unicode names. A label is an individual part of a domain name. Labels are usually 704 * separated by dots; for e.g." "www.example.com" is composed of 3 labels 705 * "www","example", and "com". 706 * 707 * @param src The input string as StringBuffer to be processed 708 * @param options A bit set of options: 709 * - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points 710 * and do not use STD3 ASCII rules 711 * If unassigned code points are found the operation fails with 712 * ParseException. 713 * 714 * - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations 715 * If this option is set, the unassigned code points are in the input 716 * are treated as normal Unicode code points. 717 * 718 * - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 719 * If this option is set and the input does not satisfy STD3 rules, 720 * the operation will fail with ParseException 721 * @return StringBuffer the converted String 722 * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}. 723 * @hide original deprecated declaration 724 */ 725 @Deprecated convertToUnicode(StringBuffer src, int options)726 public static StringBuffer convertToUnicode(StringBuffer src, int options) 727 throws StringPrepParseException{ 728 UCharacterIterator iter = UCharacterIterator.getInstance(src); 729 return convertToUnicode(iter,options); 730 } 731 732 /** 733 * IDNA2003: Function that implements the ToUnicode operation as defined in the IDNA RFC. 734 * This operation is done on <b>single labels</b> before sending it to something that expects 735 * Unicode names. A label is an individual part of a domain name. Labels are usually 736 * separated by dots; for e.g." "www.example.com" is composed of 3 labels 737 * "www","example", and "com". 738 * 739 * @param src The input string as UCharacterIterator to be processed 740 * @param options A bit set of options: 741 * - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points 742 * and do not use STD3 ASCII rules 743 * If unassigned code points are found the operation fails with 744 * ParseException. 745 * 746 * - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations 747 * If this option is set, the unassigned code points are in the input 748 * are treated as normal Unicode code points. 749 * 750 * - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 751 * If this option is set and the input does not satisfy STD3 rules, 752 * the operation will fail with ParseException 753 * @return StringBuffer the converted String 754 * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}. 755 * @hide original deprecated declaration 756 */ 757 @Deprecated convertToUnicode(UCharacterIterator src, int options)758 public static StringBuffer convertToUnicode(UCharacterIterator src, int options) 759 throws StringPrepParseException{ 760 return IDNA2003.convertToUnicode(src, options); 761 } 762 763 /** 764 * IDNA2003: Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC. 765 * This operation is done on complete domain names, e.g: "www.example.com". 766 * 767 * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name 768 * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each, 769 * and then convert. This function does not offer that level of granularity. The options once 770 * set will apply to all labels in the domain name 771 * 772 * @param src The input string as UCharacterIterator to be processed 773 * @param options A bit set of options: 774 * - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points 775 * and do not use STD3 ASCII rules 776 * If unassigned code points are found the operation fails with 777 * ParseException. 778 * 779 * - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations 780 * If this option is set, the unassigned code points are in the input 781 * are treated as normal Unicode code points. 782 * 783 * - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 784 * If this option is set and the input does not satisfy STD3 rules, 785 * the operation will fail with ParseException 786 * @return StringBuffer the converted String 787 * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}. 788 * @hide original deprecated declaration 789 */ 790 @Deprecated convertIDNToUnicode(UCharacterIterator src, int options)791 public static StringBuffer convertIDNToUnicode(UCharacterIterator src, int options) 792 throws StringPrepParseException{ 793 return convertIDNToUnicode(src.getText(), options); 794 } 795 796 /** 797 * IDNA2003: Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC. 798 * This operation is done on complete domain names, e.g: "www.example.com". 799 * 800 * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name 801 * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each, 802 * and then convert. This function does not offer that level of granularity. The options once 803 * set will apply to all labels in the domain name 804 * 805 * @param src The input string as StringBuffer to be processed 806 * @param options A bit set of options: 807 * - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points 808 * and do not use STD3 ASCII rules 809 * If unassigned code points are found the operation fails with 810 * ParseException. 811 * 812 * - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations 813 * If this option is set, the unassigned code points are in the input 814 * are treated as normal Unicode code points. 815 * 816 * - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 817 * If this option is set and the input does not satisfy STD3 rules, 818 * the operation will fail with ParseException 819 * @return StringBuffer the converted String 820 * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}. 821 * @hide original deprecated declaration 822 */ 823 @Deprecated convertIDNToUnicode(StringBuffer src, int options)824 public static StringBuffer convertIDNToUnicode(StringBuffer src, int options) 825 throws StringPrepParseException{ 826 return convertIDNToUnicode(src.toString(), options); 827 } 828 829 /** 830 * IDNA2003: Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC. 831 * This operation is done on complete domain names, e.g: "www.example.com". 832 * 833 * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name 834 * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each, 835 * and then convert. This function does not offer that level of granularity. The options once 836 * set will apply to all labels in the domain name 837 * 838 * @param src The input string to be processed 839 * @param options A bit set of options: 840 * - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points 841 * and do not use STD3 ASCII rules 842 * If unassigned code points are found the operation fails with 843 * ParseException. 844 * 845 * - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations 846 * If this option is set, the unassigned code points are in the input 847 * are treated as normal Unicode code points. 848 * 849 * - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 850 * If this option is set and the input does not satisfy STD3 rules, 851 * the operation will fail with ParseException 852 * @return StringBuffer the converted String 853 * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}. 854 * @hide original deprecated declaration 855 */ 856 @Deprecated convertIDNToUnicode(String src, int options)857 public static StringBuffer convertIDNToUnicode(String src, int options) 858 throws StringPrepParseException{ 859 return IDNA2003.convertIDNToUnicode(src, options); 860 } 861 862 /** 863 * IDNA2003: Compare two IDN strings for equivalence. 864 * This function splits the domain names into labels and compares them. 865 * According to IDN RFC, whenever two labels are compared, they are 866 * considered equal if and only if their ASCII forms (obtained by 867 * applying toASCII) match using an case-insensitive ASCII comparison. 868 * Two domain names are considered a match if and only if all labels 869 * match regardless of whether label separators match. 870 * 871 * @param s1 First IDN string as StringBuffer 872 * @param s2 Second IDN string as StringBuffer 873 * @param options A bit set of options: 874 * - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points 875 * and do not use STD3 ASCII rules 876 * If unassigned code points are found the operation fails with 877 * ParseException. 878 * 879 * - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations 880 * If this option is set, the unassigned code points are in the input 881 * are treated as normal Unicode code points. 882 * 883 * - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 884 * If this option is set and the input does not satisfy STD3 rules, 885 * the operation will fail with ParseException 886 * @return 0 if the strings are equal, > 0 if s1 > s2 and < 0 if s1 < s2 887 * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}. 888 * @hide original deprecated declaration 889 */ 890 @Deprecated compare(StringBuffer s1, StringBuffer s2, int options)891 public static int compare(StringBuffer s1, StringBuffer s2, int options) 892 throws StringPrepParseException{ 893 if(s1==null || s2 == null){ 894 throw new IllegalArgumentException("One of the source buffers is null"); 895 } 896 return IDNA2003.compare(s1.toString(), s2.toString(), options); 897 } 898 899 /** 900 * IDNA2003: Compare two IDN strings for equivalence. 901 * This function splits the domain names into labels and compares them. 902 * According to IDN RFC, whenever two labels are compared, they are 903 * considered equal if and only if their ASCII forms (obtained by 904 * applying toASCII) match using an case-insensitive ASCII comparison. 905 * Two domain names are considered a match if and only if all labels 906 * match regardless of whether label separators match. 907 * 908 * @param s1 First IDN string 909 * @param s2 Second IDN string 910 * @param options A bit set of options: 911 * - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points 912 * and do not use STD3 ASCII rules 913 * If unassigned code points are found the operation fails with 914 * ParseException. 915 * 916 * - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations 917 * If this option is set, the unassigned code points are in the input 918 * are treated as normal Unicode code points. 919 * 920 * - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 921 * If this option is set and the input does not satisfy STD3 rules, 922 * the operation will fail with ParseException 923 * @return 0 if the strings are equal, > 0 if s1 > s2 and < 0 if s1 < s2 924 * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}. 925 * @hide original deprecated declaration 926 */ 927 @Deprecated compare(String s1, String s2, int options)928 public static int compare(String s1, String s2, int options) throws StringPrepParseException{ 929 if(s1==null || s2 == null){ 930 throw new IllegalArgumentException("One of the source buffers is null"); 931 } 932 return IDNA2003.compare(s1, s2, options); 933 } 934 /** 935 * IDNA2003: Compare two IDN strings for equivalence. 936 * This function splits the domain names into labels and compares them. 937 * According to IDN RFC, whenever two labels are compared, they are 938 * considered equal if and only if their ASCII forms (obtained by 939 * applying toASCII) match using an case-insensitive ASCII comparison. 940 * Two domain names are considered a match if and only if all labels 941 * match regardless of whether label separators match. 942 * 943 * @param s1 First IDN string as UCharacterIterator 944 * @param s2 Second IDN string as UCharacterIterator 945 * @param options A bit set of options: 946 * - IDNA.DEFAULT Use default options, i.e., do not process unassigned code points 947 * and do not use STD3 ASCII rules 948 * If unassigned code points are found the operation fails with 949 * ParseException. 950 * 951 * - IDNA.ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations 952 * If this option is set, the unassigned code points are in the input 953 * are treated as normal Unicode code points. 954 * 955 * - IDNA.USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 956 * If this option is set and the input does not satisfy STD3 rules, 957 * the operation will fail with ParseException 958 * @return 0 if the strings are equal, > 0 if i1 > i2 and < 0 if i1 < i2 959 * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}. 960 * @hide original deprecated declaration 961 */ 962 @Deprecated compare(UCharacterIterator s1, UCharacterIterator s2, int options)963 public static int compare(UCharacterIterator s1, UCharacterIterator s2, int options) 964 throws StringPrepParseException{ 965 if(s1==null || s2 == null){ 966 throw new IllegalArgumentException("One of the source buffers is null"); 967 } 968 return IDNA2003.compare(s1.getText(), s2.getText(), options); 969 } 970 } 971