• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2016 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html
4 /*
5  *******************************************************************************
6  * Copyright (C) 2003-2016, International Business Machines Corporation and    *
7  * others. All Rights Reserved.                                                *
8  *******************************************************************************
9  */
10 
11 package android.icu.text;
12 
13 import java.util.Collections;
14 import java.util.EnumSet;
15 import java.util.Set;
16 
17 import android.icu.impl.IDNA2003;
18 import android.icu.impl.UTS46;
19 
20 /**
21  * Abstract base class for IDNA processing.
22  * See http://www.unicode.org/reports/tr46/
23  * and http://www.ietf.org/rfc/rfc3490.txt
24  * <p>
25  * The IDNA class is not intended for public subclassing.
26  * <p>
27  * The non-static methods implement UTS #46 and IDNA2008.
28  * IDNA2008 is implemented according to UTS #46, see {@link #getUTS46Instance(int)}.
29  * <p>
30  * IDNA2003 is obsolete. The static methods implement IDNA2003. They are all deprecated.
31  * <p>
32  * IDNA2003 API Overview:
33  * <p>
34  * The static IDNA API methods implement the IDNA protocol as defined in the
35  * <a href="http://www.ietf.org/rfc/rfc3490.txt">IDNA RFC</a>.
36  * The draft defines 2 operations: ToASCII and ToUnicode. Domain labels
37  * containing non-ASCII code points are required to be processed by
38  * ToASCII operation before passing it to resolver libraries. Domain names
39  * that are obtained from resolver libraries are required to be processed by
40  * ToUnicode operation before displaying the domain name to the user.
41  * IDNA requires that implementations process input strings with
42  * <a href="http://www.ietf.org/rfc/rfc3491.txt">Nameprep</a>,
43  * which is a profile of <a href="http://www.ietf.org/rfc/rfc3454.txt">Stringprep</a> ,
44  * and then with <a href="http://www.ietf.org/rfc/rfc3492.txt">Punycode</a>.
45  * Implementations of IDNA MUST fully implement Nameprep and Punycode;
46  * neither Nameprep nor Punycode are optional.
47  * The input and output of ToASCII and ToUnicode operations are Unicode
48  * and are designed to be chainable, i.e., applying ToASCII or ToUnicode operations
49  * multiple times to an input string will yield the same result as applying the operation
50  * once.
51  * ToUnicode(ToUnicode(ToUnicode...(ToUnicode(string)))) == ToUnicode(string)
52  * ToASCII(ToASCII(ToASCII...(ToASCII(string))) == ToASCII(string).
53  *
54  * @author Ram Viswanadha, Markus Scherer
55  */
56 public abstract class IDNA {
57     // Android-changed: ICU 76+ uses 0x30, but Android prefers not to change an API constant.
58     /**
59      * Default options value: None of the other options are set.
60      */
61     // public static final int DEFAULT = 0x30;
62     public static final int DEFAULT = 0;
63     /**
64      * Option to allow unassigned code points in domain names and labels.
65      * For use in static worker and factory methods.
66      * <p>This option is ignored by the UTS46 implementation.
67      * (UTS #46 disallows unassigned code points.)
68      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
69      * @hide original deprecated declaration
70      */
71     @Deprecated
72     public static final int ALLOW_UNASSIGNED = 1;
73     /**
74      * Option to check whether the input conforms to the STD3 ASCII rules,
75      * for example the restriction of labels to LDH characters
76      * (ASCII Letters, Digits and Hyphen-Minus).
77      * For use in static worker and factory methods.
78      */
79     public static final int USE_STD3_RULES = 2;
80     /**
81      * IDNA option to check for whether the input conforms to the BiDi rules.
82      * For use in static worker and factory methods.
83      * <p>This option is ignored by the IDNA2003 implementation.
84      * (IDNA2003 always performs a BiDi check.)
85      */
86     public static final int CHECK_BIDI = 4;
87     /**
88      * IDNA option to check for whether the input conforms to the CONTEXTJ rules.
89      * For use in static worker and factory methods.
90      * <p>This option is ignored by the IDNA2003 implementation.
91      * (The CONTEXTJ check is new in IDNA2008.)
92      */
93     public static final int CHECK_CONTEXTJ = 8;
94     /**
95      * IDNA option for nontransitional processing in ToASCII().
96      * For use in static worker and factory methods.
97      *
98      * <p>By default, ToASCII() uses transitional processing.
99      * Unicode 15.1 UTS #46 deprecated transitional processing.
100      *
101      * <p>This option is ignored by the IDNA2003 implementation.
102      * (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)
103      */
104     public static final int NONTRANSITIONAL_TO_ASCII = 0x10;
105     /**
106      * IDNA option for nontransitional processing in ToUnicode().
107      * For use in static worker and factory methods.
108      *
109      * <p>By default, ToUnicode() uses transitional processing.
110      * Unicode 15.1 UTS #46 deprecated transitional processing.
111      *
112      * <p>This option is ignored by the IDNA2003 implementation.
113      * (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)
114      */
115     public static final int NONTRANSITIONAL_TO_UNICODE = 0x20;
116     /**
117      * IDNA option to check for whether the input conforms to the CONTEXTO rules.
118      * For use in static worker and factory methods.
119      * <p>This option is ignored by the IDNA2003 implementation.
120      * (The CONTEXTO check is new in IDNA2008.)
121      * <p>This is for use by registries for IDNA2008 conformance.
122      * UTS #46 does not require the CONTEXTO check.
123      */
124     public static final int CHECK_CONTEXTO = 0x40;
125 
126     /**
127      * Returns an IDNA instance which implements UTS #46.
128      * Returns an unmodifiable instance, owned by the caller.
129      * Cache it for multiple operations, and delete it when done.
130      * The instance is thread-safe, that is, it can be used concurrently.
131      * <p>
132      * UTS #46 defines Unicode IDNA Compatibility Processing,
133      * updated to the latest version of Unicode and compatible with both
134      * IDNA2003 and IDNA2008.
135      * <p>
136      * The worker functions use transitional processing, including deviation mappings,
137      * unless {@link #NONTRANSITIONAL_TO_ASCII} or {@link #NONTRANSITIONAL_TO_UNICODE}
138      * is used in which case the deviation characters are passed through without change.
139      * <b>Unicode 15.1 UTS #46 deprecated transitional processing.</b>
140      * <p>
141      * Disallowed characters are mapped to U+FFFD.
142      * <p>
143      * Operations with the UTS #46 instance do not support the
144      * ALLOW_UNASSIGNED option.
145      * <p>
146      * By default, the UTS #46 implementation allows all ASCII characters (as valid or mapped).
147      * When the USE_STD3_RULES option is used, ASCII characters other than
148      * letters, digits, hyphen (LDH) and dot/full stop are disallowed and mapped to U+FFFD.
149      *
150      * @param options Bit set to modify the processing and error checking.
151      *                These should include {@link IDNA#DEFAULT}, or
152      *                {@link IDNA#NONTRANSITIONAL_TO_ASCII} | {@link IDNA#NONTRANSITIONAL_TO_UNICODE}.
153      * @return the UTS #46 IDNA instance, if successful
154      */
getUTS46Instance(int options)155     public static IDNA getUTS46Instance(int options) {
156         return new UTS46(options);
157     }
158 
159     /**
160      * Converts a single domain name label into its ASCII form for DNS lookup.
161      * If any processing step fails, then info.hasErrors() will be true and
162      * the result might not be an ASCII string.
163      * The label might be modified according to the types of errors.
164      * Labels with severe errors will be left in (or turned into) their Unicode form.
165      *
166      * @param label Input domain name label
167      * @param dest Destination string object
168      * @param info Output container of IDNA processing details.
169      * @return dest
170      */
labelToASCII(CharSequence label, StringBuilder dest, Info info)171     public abstract StringBuilder labelToASCII(CharSequence label, StringBuilder dest, Info info);
172 
173     /**
174      * Converts a single domain name label into its Unicode form for human-readable display.
175      * If any processing step fails, then info.hasErrors() will be true.
176      * The label might be modified according to the types of errors.
177      *
178      * @param label Input domain name label
179      * @param dest Destination string object
180      * @param info Output container of IDNA processing details.
181      * @return dest
182      */
labelToUnicode(CharSequence label, StringBuilder dest, Info info)183     public abstract StringBuilder labelToUnicode(CharSequence label, StringBuilder dest, Info info);
184 
185     /**
186      * Converts a whole domain name into its ASCII form for DNS lookup.
187      * If any processing step fails, then info.hasErrors() will be true and
188      * the result might not be an ASCII string.
189      * The domain name might be modified according to the types of errors.
190      * Labels with severe errors will be left in (or turned into) their Unicode form.
191      *
192      * @param name Input domain name
193      * @param dest Destination string object
194      * @param info Output container of IDNA processing details.
195      * @return dest
196      */
nameToASCII(CharSequence name, StringBuilder dest, Info info)197     public abstract StringBuilder nameToASCII(CharSequence name, StringBuilder dest, Info info);
198 
199     /**
200      * Converts a whole domain name into its Unicode form for human-readable display.
201      * If any processing step fails, then info.hasErrors() will be true.
202      * The domain name might be modified according to the types of errors.
203      *
204      * @param name Input domain name
205      * @param dest Destination string object
206      * @param info Output container of IDNA processing details.
207      * @return dest
208      */
nameToUnicode(CharSequence name, StringBuilder dest, Info info)209     public abstract StringBuilder nameToUnicode(CharSequence name, StringBuilder dest, Info info);
210 
211     /**
212      * Output container for IDNA processing errors.
213      * The Info class is not suitable for subclassing.
214      */
215     public static final class Info {
216         /**
217          * Constructor.
218          */
Info()219         public Info() {
220             errors=EnumSet.noneOf(Error.class);
221             labelErrors=EnumSet.noneOf(Error.class);
222             isTransDiff=false;
223             isBiDi=false;
224             isOkBiDi=true;
225         }
226         /**
227          * Were there IDNA processing errors?
228          * @return true if there were processing errors
229          */
hasErrors()230         public boolean hasErrors() { return !errors.isEmpty(); }
231         /**
232          * Returns a set indicating IDNA processing errors.
233          * @return set of processing errors (modifiable, and not null)
234          */
getErrors()235         public Set<Error> getErrors() { return errors; }
236         /**
237          * Returns true if transitional and nontransitional processing produce different results.
238          * This is the case when the input label or domain name contains
239          * one or more deviation characters outside a Punycode label (see UTS #46).
240          * <ul>
241          * <li>With nontransitional processing, such characters are
242          * copied to the destination string.
243          * <li>With transitional processing, such characters are
244          * mapped (sharp s/sigma) or removed (joiner/nonjoiner).
245          * </ul>
246          * @return true if transitional and nontransitional processing produce different results
247          */
isTransitionalDifferent()248         public boolean isTransitionalDifferent() { return isTransDiff; }
249 
reset()250         private void reset() {
251             errors.clear();
252             labelErrors.clear();
253             isTransDiff=false;
254             isBiDi=false;
255             isOkBiDi=true;
256         }
257 
258         private EnumSet<Error> errors, labelErrors;
259         private boolean isTransDiff;
260         private boolean isBiDi;
261         private boolean isOkBiDi;
262     }
263 
264     // The following protected methods give IDNA subclasses access to the private IDNAInfo fields.
265     // The IDNAInfo also provides intermediate state that is publicly invisible,
266     // avoiding the allocation of another worker object.
267     /**
268      * @deprecated This API is ICU internal only.
269      * @hide original deprecated declaration
270      * @hide draft / provisional / internal are hidden on Android
271      */
272     @Deprecated
resetInfo(Info info)273     protected static void resetInfo(Info info) {
274         info.reset();
275     }
276     /**
277      * @deprecated This API is ICU internal only.
278      * @hide original deprecated declaration
279      * @hide draft / provisional / internal are hidden on Android
280      */
281     @Deprecated
hasCertainErrors(Info info, EnumSet<Error> errors)282     protected static boolean hasCertainErrors(Info info, EnumSet<Error> errors) {
283         return !info.errors.isEmpty() && !Collections.disjoint(info.errors, errors);
284     }
285     /**
286      * @deprecated This API is ICU internal only.
287      * @hide original deprecated declaration
288      * @hide draft / provisional / internal are hidden on Android
289      */
290     @Deprecated
hasCertainLabelErrors(Info info, EnumSet<Error> errors)291     protected static boolean hasCertainLabelErrors(Info info, EnumSet<Error> errors) {
292         return !info.labelErrors.isEmpty() && !Collections.disjoint(info.labelErrors, errors);
293     }
294     /**
295      * @deprecated This API is ICU internal only.
296      * @hide original deprecated declaration
297      * @hide draft / provisional / internal are hidden on Android
298      */
299     @Deprecated
addLabelError(Info info, Error error)300     protected static void addLabelError(Info info, Error error) {
301         info.labelErrors.add(error);
302     }
303     /**
304      * @deprecated This API is ICU internal only.
305      * @hide original deprecated declaration
306      * @hide draft / provisional / internal are hidden on Android
307      */
308     @Deprecated
promoteAndResetLabelErrors(Info info)309     protected static void promoteAndResetLabelErrors(Info info) {
310         if(!info.labelErrors.isEmpty()) {
311             info.errors.addAll(info.labelErrors);
312             info.labelErrors.clear();
313         }
314     }
315     /**
316      * @deprecated This API is ICU internal only.
317      * @hide original deprecated declaration
318      * @hide draft / provisional / internal are hidden on Android
319      */
320     @Deprecated
addError(Info info, Error error)321     protected static void addError(Info info, Error error) {
322         info.errors.add(error);
323     }
324     /**
325      * @deprecated This API is ICU internal only.
326      * @hide original deprecated declaration
327      * @hide draft / provisional / internal are hidden on Android
328      */
329     @Deprecated
setTransitionalDifferent(Info info)330     protected static void setTransitionalDifferent(Info info) {
331         info.isTransDiff=true;
332     }
333     /**
334      * @deprecated This API is ICU internal only.
335      * @hide original deprecated declaration
336      * @hide draft / provisional / internal are hidden on Android
337      */
338     @Deprecated
setBiDi(Info info)339     protected static void setBiDi(Info info) {
340         info.isBiDi=true;
341     }
342     /**
343      * @deprecated This API is ICU internal only.
344      * @hide original deprecated declaration
345      * @hide draft / provisional / internal are hidden on Android
346      */
347     @Deprecated
isBiDi(Info info)348     protected static boolean isBiDi(Info info) {
349         return info.isBiDi;
350     }
351     /**
352      * @deprecated This API is ICU internal only.
353      * @hide original deprecated declaration
354      * @hide draft / provisional / internal are hidden on Android
355      */
356     @Deprecated
setNotOkBiDi(Info info)357     protected static void setNotOkBiDi(Info info) {
358         info.isOkBiDi=false;
359     }
360     /**
361      * @deprecated This API is ICU internal only.
362      * @hide original deprecated declaration
363      * @hide draft / provisional / internal are hidden on Android
364      */
365     @Deprecated
isOkBiDi(Info info)366     protected static boolean isOkBiDi(Info info) {
367         return info.isOkBiDi;
368     }
369 
370     /**
371      * IDNA error bit set values.
372      * When a domain name or label fails a processing step or does not meet the
373      * validity criteria, then one or more of these error bits are set.
374      */
375     public static enum Error {
376         /**
377          * A non-final domain name label (or the whole domain name) is empty.
378          */
379         EMPTY_LABEL,
380         /**
381          * A domain name label is longer than 63 bytes.
382          * (See STD13/RFC1034 3.1. Name space specifications and terminology.)
383          * This is only checked in ToASCII operations, and only if the output label is all-ASCII.
384          */
385         LABEL_TOO_LONG,
386         /**
387          * A domain name is longer than 255 bytes in its storage form.
388          * (See STD13/RFC1034 3.1. Name space specifications and terminology.)
389          * This is only checked in ToASCII operations, and only if the output domain name is all-ASCII.
390          */
391         DOMAIN_NAME_TOO_LONG,
392         /**
393          * A label starts with a hyphen-minus ('-').
394          */
395         LEADING_HYPHEN,
396         /**
397          * A label ends with a hyphen-minus ('-').
398          */
399         TRAILING_HYPHEN,
400         /**
401          * A label contains hyphen-minus ('-') in the third and fourth positions.
402          */
403         HYPHEN_3_4,
404         /**
405          * A label starts with a combining mark.
406          */
407         LEADING_COMBINING_MARK,
408         /**
409          * A label or domain name contains disallowed characters.
410          */
411         DISALLOWED,
412         /**
413          * A label starts with "xn--" but does not contain valid Punycode.
414          * That is, an xn-- label failed Punycode decoding.
415          */
416         PUNYCODE,
417         /**
418          * A label contains a dot=full stop.
419          * This can occur in an input string for a single-label function.
420          */
421         LABEL_HAS_DOT,
422         /**
423          * An ACE label does not contain a valid label string.
424          * The label was successfully ACE (Punycode) decoded but the resulting
425          * string had severe validation errors. For example,
426          * it might contain characters that are not allowed in ACE labels,
427          * or it might not be normalized.
428          */
429         INVALID_ACE_LABEL,
430         /**
431          * A label does not meet the IDNA BiDi requirements (for right-to-left characters).
432          */
433         BIDI,
434         /**
435          * A label does not meet the IDNA CONTEXTJ requirements.
436          */
437         CONTEXTJ,
438         /**
439          * A label does not meet the IDNA CONTEXTO requirements for punctuation characters.
440          * Some punctuation characters "Would otherwise have been DISALLOWED"
441          * but are allowed in certain contexts. (RFC 5892)
442          */
443         CONTEXTO_PUNCTUATION,
444         /**
445          * A label does not meet the IDNA CONTEXTO requirements for digits.
446          * Arabic-Indic Digits (U+066x) must not be mixed with Extended Arabic-Indic Digits (U+06Fx).
447          */
448         CONTEXTO_DIGITS
449     }
450 
451     /**
452      * Sole constructor. (For invocation by subclass constructors, typically implicit.)
453      * @deprecated This API is ICU internal only.
454      * @hide original deprecated declaration
455      * @hide draft / provisional / internal are hidden on Android
456      */
457     @Deprecated
IDNA()458     protected IDNA() {
459     }
460 
461     /* IDNA2003 API ------------------------------------------------------------- */
462 
463     /**
464      * IDNA2003: This function implements the ToASCII operation as defined in the IDNA RFC.
465      * This operation is done on <b>single labels</b> before sending it to something that expects
466      * ASCII names. A label is an individual part of a domain name. Labels are usually
467      * separated by dots; e.g." "www.example.com" is composed of 3 labels
468      * "www","example", and "com".
469      *
470      * @param src       The input string to be processed
471      * @param options   A bit set of options:
472      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
473      *                              and do not use STD3 ASCII rules
474      *                              If unassigned code points are found the operation fails with
475      *                              StringPrepParseException.
476      *
477      *  - IDNA.ALLOW_UNASSIGNED     Unassigned values can be converted to ASCII for query operations
478      *                              If this option is set, the unassigned code points are in the input
479      *                              are treated as normal Unicode code points.
480      *
481      *  - IDNA.USE_STD3_RULES       Use STD3 ASCII rules for host name syntax restrictions
482      *                              If this option is set and the input does not satisfy STD3 rules,
483      *                              the operation will fail with ParseException
484      * @return StringBuffer the converted String
485      * @throws StringPrepParseException When an error occurs for parsing a string.
486      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
487      * @hide original deprecated declaration
488      */
489     @Deprecated
convertToASCII(String src, int options)490     public static StringBuffer convertToASCII(String src, int options)
491         throws StringPrepParseException{
492         UCharacterIterator iter = UCharacterIterator.getInstance(src);
493         return convertToASCII(iter,options);
494     }
495 
496     /**
497      * IDNA2003: This function implements the ToASCII operation as defined in the IDNA RFC.
498      * This operation is done on <b>single labels</b> before sending it to something that expects
499      * ASCII names. A label is an individual part of a domain name. Labels are usually
500      * separated by dots; e.g." "www.example.com" is composed of 3 labels
501      * "www","example", and "com".
502      *
503      * @param src       The input string as StringBuffer to be processed
504      * @param options   A bit set of options:
505      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
506      *                              and do not use STD3 ASCII rules
507      *                              If unassigned code points are found the operation fails with
508      *                              ParseException.
509      *
510      *  - IDNA.ALLOW_UNASSIGNED     Unassigned values can be converted to ASCII for query operations
511      *                              If this option is set, the unassigned code points are in the input
512      *                              are treated as normal Unicode code points.
513      *
514      *  - IDNA.USE_STD3_RULES       Use STD3 ASCII rules for host name syntax restrictions
515      *                              If this option is set and the input does not satisfy STD3 rules,
516      *                              the operation will fail with ParseException
517      * @return StringBuffer the converted String
518      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
519      * @hide original deprecated declaration
520      */
521     @Deprecated
convertToASCII(StringBuffer src, int options)522     public static StringBuffer convertToASCII(StringBuffer src, int options)
523         throws StringPrepParseException{
524         UCharacterIterator iter = UCharacterIterator.getInstance(src);
525         return convertToASCII(iter,options);
526     }
527 
528     /**
529      * IDNA2003: This function implements the ToASCII operation as defined in the IDNA RFC.
530      * This operation is done on <b>single labels</b> before sending it to something that expects
531      * ASCII names. A label is an individual part of a domain name. Labels are usually
532      * separated by dots; e.g." "www.example.com" is composed of 3 labels
533      * "www","example", and "com".
534      *
535      * @param src       The input string as UCharacterIterator to be processed
536      * @param options   A bit set of options:
537      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
538      *                              and do not use STD3 ASCII rules
539      *                              If unassigned code points are found the operation fails with
540      *                              ParseException.
541      *
542      *  - IDNA.ALLOW_UNASSIGNED     Unassigned values can be converted to ASCII for query operations
543      *                              If this option is set, the unassigned code points are in the input
544      *                              are treated as normal Unicode code points.
545      *
546      *  - IDNA.USE_STD3_RULES       Use STD3 ASCII rules for host name syntax restrictions
547      *                              If this option is set and the input does not satisfy STD3 rules,
548      *                              the operation will fail with ParseException
549      * @return StringBuffer the converted String
550      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
551      * @hide original deprecated declaration
552      */
553     @Deprecated
convertToASCII(UCharacterIterator src, int options)554     public static StringBuffer convertToASCII(UCharacterIterator src, int options)
555                 throws StringPrepParseException{
556         return IDNA2003.convertToASCII(src, options);
557     }
558 
559     /**
560      * IDNA2003: Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC.
561      * This operation is done on complete domain names, e.g: "www.example.com".
562      * It is important to note that this operation can fail. If it fails, then the input
563      * domain name cannot be used as an Internationalized Domain Name and the application
564      * should have methods defined to deal with the failure.
565      *
566      * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
567      * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
568      * and then convert. This function does not offer that level of granularity. The options once
569      * set will apply to all labels in the domain name
570      *
571      * @param src       The input string as UCharacterIterator to be processed
572      * @param options   A bit set of options:
573      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
574      *                              and do not use STD3 ASCII rules
575      *                              If unassigned code points are found the operation fails with
576      *                              ParseException.
577      *
578      *  - IDNA.ALLOW_UNASSIGNED     Unassigned values can be converted to ASCII for query operations
579      *                              If this option is set, the unassigned code points are in the input
580      *                              are treated as normal Unicode code points.
581      *
582      *  - IDNA.USE_STD3_RULES       Use STD3 ASCII rules for host name syntax restrictions
583      *                              If this option is set and the input does not satisfy STD3 rules,
584      *                              the operation will fail with ParseException
585      * @return StringBuffer the converted String
586      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
587      * @hide original deprecated declaration
588      */
589     @Deprecated
convertIDNToASCII(UCharacterIterator src, int options)590     public static StringBuffer convertIDNToASCII(UCharacterIterator src, int options)
591             throws StringPrepParseException{
592         return convertIDNToASCII(src.getText(), options);
593     }
594 
595     /**
596      * IDNA2003: Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC.
597      * This operation is done on complete domain names, e.g: "www.example.com".
598      * It is important to note that this operation can fail. If it fails, then the input
599      * domain name cannot be used as an Internationalized Domain Name and the application
600      * should have methods defined to deal with the failure.
601      *
602      * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
603      * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
604      * and then convert. This function does not offer that level of granularity. The options once
605      * set will apply to all labels in the domain name
606      *
607      * @param src       The input string as a StringBuffer to be processed
608      * @param options   A bit set of options:
609      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
610      *                              and do not use STD3 ASCII rules
611      *                              If unassigned code points are found the operation fails with
612      *                              ParseException.
613      *
614      *  - IDNA.ALLOW_UNASSIGNED     Unassigned values can be converted to ASCII for query operations
615      *                              If this option is set, the unassigned code points are in the input
616      *                              are treated as normal Unicode code points.
617      *
618      *  - IDNA.USE_STD3_RULES       Use STD3 ASCII rules for host name syntax restrictions
619      *                              If this option is set and the input does not satisfy STD3 rules,
620      *                              the operation will fail with ParseException
621      * @return StringBuffer the converted String
622      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
623      * @hide original deprecated declaration
624      */
625     @Deprecated
convertIDNToASCII(StringBuffer src, int options)626     public static StringBuffer convertIDNToASCII(StringBuffer src, int options)
627             throws StringPrepParseException{
628             return convertIDNToASCII(src.toString(), options);
629     }
630 
631     /**
632      * IDNA2003: Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC.
633      * This operation is done on complete domain names, e.g: "www.example.com".
634      * It is important to note that this operation can fail. If it fails, then the input
635      * domain name cannot be used as an Internationalized Domain Name and the application
636      * should have methods defined to deal with the failure.
637      *
638      * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
639      * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
640      * and then convert. This function does not offer that level of granularity. The options once
641      * set will apply to all labels in the domain name
642      *
643      * @param src       The input string to be processed
644      * @param options   A bit set of options:
645      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
646      *                              and do not use STD3 ASCII rules
647      *                              If unassigned code points are found the operation fails with
648      *                              ParseException.
649      *
650      *  - IDNA.ALLOW_UNASSIGNED     Unassigned values can be converted to ASCII for query operations
651      *                              If this option is set, the unassigned code points are in the input
652      *                              are treated as normal Unicode code points.
653      *
654      *  - IDNA.USE_STD3_RULES       Use STD3 ASCII rules for host name syntax restrictions
655      *                              If this option is set and the input does not satisfy STD3 rules,
656      *                              the operation will fail with ParseException
657      * @return StringBuffer the converted String
658      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
659      * @hide original deprecated declaration
660      */
661     @Deprecated
convertIDNToASCII(String src,int options)662     public static StringBuffer convertIDNToASCII(String src,int options)
663             throws StringPrepParseException{
664         return IDNA2003.convertIDNToASCII(src, options);
665     }
666 
667 
668     /**
669      * IDNA2003: This function implements the ToUnicode operation as defined in the IDNA RFC.
670      * This operation is done on <b>single labels</b> before sending it to something that expects
671      * Unicode names. A label is an individual part of a domain name. Labels are usually
672      * separated by dots; for e.g." "www.example.com" is composed of 3 labels
673      * "www","example", and "com".
674      *
675      * @param src       The input string to be processed
676      * @param options   A bit set of options:
677      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
678      *                              and do not use STD3 ASCII rules
679      *                              If unassigned code points are found the operation fails with
680      *                              ParseException.
681      *
682      *  - IDNA.ALLOW_UNASSIGNED     Unassigned values can be converted to ASCII for query operations
683      *                              If this option is set, the unassigned code points are in the input
684      *                              are treated as normal Unicode code points.
685      *
686      *  - IDNA.USE_STD3_RULES       Use STD3 ASCII rules for host name syntax restrictions
687      *                              If this option is set and the input does not satisfy STD3 rules,
688      *                              the operation will fail with ParseException
689      * @return StringBuffer the converted String
690      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
691      * @hide original deprecated declaration
692      */
693     @Deprecated
convertToUnicode(String src, int options)694     public static StringBuffer convertToUnicode(String src, int options)
695            throws StringPrepParseException{
696         UCharacterIterator iter = UCharacterIterator.getInstance(src);
697         return convertToUnicode(iter,options);
698     }
699 
700     /**
701      * IDNA2003: This function implements the ToUnicode operation as defined in the IDNA RFC.
702      * This operation is done on <b>single labels</b> before sending it to something that expects
703      * Unicode names. A label is an individual part of a domain name. Labels are usually
704      * separated by dots; for e.g." "www.example.com" is composed of 3 labels
705      * "www","example", and "com".
706      *
707      * @param src       The input string as StringBuffer to be processed
708      * @param options   A bit set of options:
709      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
710      *                              and do not use STD3 ASCII rules
711      *                              If unassigned code points are found the operation fails with
712      *                              ParseException.
713      *
714      *  - IDNA.ALLOW_UNASSIGNED     Unassigned values can be converted to ASCII for query operations
715      *                              If this option is set, the unassigned code points are in the input
716      *                              are treated as normal Unicode code points.
717      *
718      *  - IDNA.USE_STD3_RULES       Use STD3 ASCII rules for host name syntax restrictions
719      *                              If this option is set and the input does not satisfy STD3 rules,
720      *                              the operation will fail with ParseException
721      * @return StringBuffer the converted String
722      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
723      * @hide original deprecated declaration
724      */
725     @Deprecated
convertToUnicode(StringBuffer src, int options)726     public static StringBuffer convertToUnicode(StringBuffer src, int options)
727            throws StringPrepParseException{
728         UCharacterIterator iter = UCharacterIterator.getInstance(src);
729         return convertToUnicode(iter,options);
730     }
731 
732     /**
733      * IDNA2003: Function that implements the ToUnicode operation as defined in the IDNA RFC.
734      * This operation is done on <b>single labels</b> before sending it to something that expects
735      * Unicode names. A label is an individual part of a domain name. Labels are usually
736      * separated by dots; for e.g." "www.example.com" is composed of 3 labels
737      * "www","example", and "com".
738      *
739      * @param src       The input string as UCharacterIterator to be processed
740      * @param options   A bit set of options:
741      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
742      *                              and do not use STD3 ASCII rules
743      *                              If unassigned code points are found the operation fails with
744      *                              ParseException.
745      *
746      *  - IDNA.ALLOW_UNASSIGNED     Unassigned values can be converted to ASCII for query operations
747      *                              If this option is set, the unassigned code points are in the input
748      *                              are treated as normal Unicode code points.
749      *
750      *  - IDNA.USE_STD3_RULES       Use STD3 ASCII rules for host name syntax restrictions
751      *                              If this option is set and the input does not satisfy STD3 rules,
752      *                              the operation will fail with ParseException
753      * @return StringBuffer the converted String
754      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
755      * @hide original deprecated declaration
756      */
757     @Deprecated
convertToUnicode(UCharacterIterator src, int options)758     public static StringBuffer convertToUnicode(UCharacterIterator src, int options)
759            throws StringPrepParseException{
760         return IDNA2003.convertToUnicode(src, options);
761     }
762 
763     /**
764      * IDNA2003: Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC.
765      * This operation is done on complete domain names, e.g: "www.example.com".
766      *
767      * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
768      * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
769      * and then convert. This function does not offer that level of granularity. The options once
770      * set will apply to all labels in the domain name
771      *
772      * @param src       The input string as UCharacterIterator to be processed
773      * @param options   A bit set of options:
774      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
775      *                              and do not use STD3 ASCII rules
776      *                              If unassigned code points are found the operation fails with
777      *                              ParseException.
778      *
779      *  - IDNA.ALLOW_UNASSIGNED     Unassigned values can be converted to ASCII for query operations
780      *                              If this option is set, the unassigned code points are in the input
781      *                              are treated as normal Unicode code points.
782      *
783      *  - IDNA.USE_STD3_RULES       Use STD3 ASCII rules for host name syntax restrictions
784      *                              If this option is set and the input does not satisfy STD3 rules,
785      *                              the operation will fail with ParseException
786      * @return StringBuffer the converted String
787      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
788      * @hide original deprecated declaration
789      */
790     @Deprecated
convertIDNToUnicode(UCharacterIterator src, int options)791     public static StringBuffer convertIDNToUnicode(UCharacterIterator src, int options)
792         throws StringPrepParseException{
793         return convertIDNToUnicode(src.getText(), options);
794     }
795 
796     /**
797      * IDNA2003: Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC.
798      * This operation is done on complete domain names, e.g: "www.example.com".
799      *
800      * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
801      * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
802      * and then convert. This function does not offer that level of granularity. The options once
803      * set will apply to all labels in the domain name
804      *
805      * @param src       The input string as StringBuffer to be processed
806      * @param options   A bit set of options:
807      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
808      *                              and do not use STD3 ASCII rules
809      *                              If unassigned code points are found the operation fails with
810      *                              ParseException.
811      *
812      *  - IDNA.ALLOW_UNASSIGNED     Unassigned values can be converted to ASCII for query operations
813      *                              If this option is set, the unassigned code points are in the input
814      *                              are treated as normal Unicode code points.
815      *
816      *  - IDNA.USE_STD3_RULES       Use STD3 ASCII rules for host name syntax restrictions
817      *                              If this option is set and the input does not satisfy STD3 rules,
818      *                              the operation will fail with ParseException
819      * @return StringBuffer the converted String
820      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
821      * @hide original deprecated declaration
822      */
823     @Deprecated
convertIDNToUnicode(StringBuffer src, int options)824     public static StringBuffer convertIDNToUnicode(StringBuffer src, int options)
825         throws StringPrepParseException{
826         return convertIDNToUnicode(src.toString(), options);
827     }
828 
829     /**
830      * IDNA2003: Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC.
831      * This operation is done on complete domain names, e.g: "www.example.com".
832      *
833      * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
834      * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
835      * and then convert. This function does not offer that level of granularity. The options once
836      * set will apply to all labels in the domain name
837      *
838      * @param src       The input string to be processed
839      * @param options   A bit set of options:
840      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
841      *                              and do not use STD3 ASCII rules
842      *                              If unassigned code points are found the operation fails with
843      *                              ParseException.
844      *
845      *  - IDNA.ALLOW_UNASSIGNED     Unassigned values can be converted to ASCII for query operations
846      *                              If this option is set, the unassigned code points are in the input
847      *                              are treated as normal Unicode code points.
848      *
849      *  - IDNA.USE_STD3_RULES       Use STD3 ASCII rules for host name syntax restrictions
850      *                              If this option is set and the input does not satisfy STD3 rules,
851      *                              the operation will fail with ParseException
852      * @return StringBuffer the converted String
853      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
854      * @hide original deprecated declaration
855      */
856     @Deprecated
convertIDNToUnicode(String src, int options)857     public static StringBuffer convertIDNToUnicode(String src, int options)
858             throws StringPrepParseException{
859         return IDNA2003.convertIDNToUnicode(src, options);
860     }
861 
862     /**
863      * IDNA2003: Compare two IDN strings for equivalence.
864      * This function splits the domain names into labels and compares them.
865      * According to IDN RFC, whenever two labels are compared, they are
866      * considered equal if and only if their ASCII forms (obtained by
867      * applying toASCII) match using an case-insensitive ASCII comparison.
868      * Two domain names are considered a match if and only if all labels
869      * match regardless of whether label separators match.
870      *
871      * @param s1        First IDN string as StringBuffer
872      * @param s2        Second IDN string as StringBuffer
873      * @param options   A bit set of options:
874      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
875      *                              and do not use STD3 ASCII rules
876      *                              If unassigned code points are found the operation fails with
877      *                              ParseException.
878      *
879      *  - IDNA.ALLOW_UNASSIGNED    Unassigned values can be converted to ASCII for query operations
880      *                              If this option is set, the unassigned code points are in the input
881      *                              are treated as normal Unicode code points.
882      *
883      *  - IDNA.USE_STD3_RULES      Use STD3 ASCII rules for host name syntax restrictions
884      *                              If this option is set and the input does not satisfy STD3 rules,
885      *                              the operation will fail with ParseException
886      * @return 0 if the strings are equal, &gt; 0 if s1 &gt; s2 and &lt; 0 if s1 &lt; s2
887      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
888      * @hide original deprecated declaration
889      */
890     @Deprecated
compare(StringBuffer s1, StringBuffer s2, int options)891     public static int compare(StringBuffer s1, StringBuffer s2, int options)
892         throws StringPrepParseException{
893         if(s1==null || s2 == null){
894             throw new IllegalArgumentException("One of the source buffers is null");
895         }
896         return IDNA2003.compare(s1.toString(), s2.toString(), options);
897     }
898 
899     /**
900      * IDNA2003: Compare two IDN strings for equivalence.
901      * This function splits the domain names into labels and compares them.
902      * According to IDN RFC, whenever two labels are compared, they are
903      * considered equal if and only if their ASCII forms (obtained by
904      * applying toASCII) match using an case-insensitive ASCII comparison.
905      * Two domain names are considered a match if and only if all labels
906      * match regardless of whether label separators match.
907      *
908      * @param s1        First IDN string
909      * @param s2        Second IDN string
910      * @param options   A bit set of options:
911      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
912      *                              and do not use STD3 ASCII rules
913      *                              If unassigned code points are found the operation fails with
914      *                              ParseException.
915      *
916      *  - IDNA.ALLOW_UNASSIGNED    Unassigned values can be converted to ASCII for query operations
917      *                              If this option is set, the unassigned code points are in the input
918      *                              are treated as normal Unicode code points.
919      *
920      *  - IDNA.USE_STD3_RULES      Use STD3 ASCII rules for host name syntax restrictions
921      *                              If this option is set and the input does not satisfy STD3 rules,
922      *                              the operation will fail with ParseException
923      * @return 0 if the strings are equal, &gt; 0 if s1 &gt; s2 and &lt; 0 if s1 &lt; s2
924      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
925      * @hide original deprecated declaration
926      */
927     @Deprecated
compare(String s1, String s2, int options)928     public static int compare(String s1, String s2, int options) throws StringPrepParseException{
929         if(s1==null || s2 == null){
930             throw new IllegalArgumentException("One of the source buffers is null");
931         }
932         return IDNA2003.compare(s1, s2, options);
933     }
934     /**
935      * IDNA2003: Compare two IDN strings for equivalence.
936      * This function splits the domain names into labels and compares them.
937      * According to IDN RFC, whenever two labels are compared, they are
938      * considered equal if and only if their ASCII forms (obtained by
939      * applying toASCII) match using an case-insensitive ASCII comparison.
940      * Two domain names are considered a match if and only if all labels
941      * match regardless of whether label separators match.
942      *
943      * @param s1        First IDN string as UCharacterIterator
944      * @param s2        Second IDN string as UCharacterIterator
945      * @param options   A bit set of options:
946      *  - IDNA.DEFAULT              Use default options, i.e., do not process unassigned code points
947      *                              and do not use STD3 ASCII rules
948      *                              If unassigned code points are found the operation fails with
949      *                              ParseException.
950      *
951      *  - IDNA.ALLOW_UNASSIGNED     Unassigned values can be converted to ASCII for query operations
952      *                              If this option is set, the unassigned code points are in the input
953      *                              are treated as normal Unicode code points.
954      *
955      *  - IDNA.USE_STD3_RULES       Use STD3 ASCII rules for host name syntax restrictions
956      *                              If this option is set and the input does not satisfy STD3 rules,
957      *                              the operation will fail with ParseException
958      * @return 0 if the strings are equal, &gt; 0 if i1 &gt; i2 and &lt; 0 if i1 &lt; i2
959      * @deprecated ICU 55 Use UTS 46 instead via {@link #getUTS46Instance(int)}.
960      * @hide original deprecated declaration
961      */
962     @Deprecated
compare(UCharacterIterator s1, UCharacterIterator s2, int options)963     public static int compare(UCharacterIterator s1, UCharacterIterator s2, int options)
964         throws StringPrepParseException{
965         if(s1==null || s2 == null){
966             throw new IllegalArgumentException("One of the source buffers is null");
967         }
968         return IDNA2003.compare(s1.getText(), s2.getText(), options);
969     }
970 }
971