• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4  *******************************************************************************
5  *
6  *   Copyright (C) 2003-2014, International Business Machines
7  *   Corporation and others.  All Rights Reserved.
8  *
9  *******************************************************************************
10  *   file name:  uidna.h
11  *   encoding:   UTF-8
12  *   tab size:   8 (not used)
13  *   indentation:4
14  *
15  *   created on: 2003feb1
16  *   created by: Ram Viswanadha
17  */
18 
19 #ifndef __UIDNA_H__
20 #define __UIDNA_H__
21 
22 #include "unicode/utypes.h"
23 
24 #if !UCONFIG_NO_IDNA
25 
26 #include <stdbool.h>
27 #include "unicode/parseerr.h"
28 
29 #if U_SHOW_CPLUSPLUS_API
30 #include "unicode/localpointer.h"
31 #endif   // U_SHOW_CPLUSPLUS_API
32 
33 /**
34  * \file
35  * \brief C API: Internationalizing Domain Names in Applications (IDNA)
36  *
37  * IDNA2008 is implemented according to UTS #46, see the IDNA C++ class in idna.h.
38  *
39  * The C API functions which do take a UIDNA * service object pointer
40  * implement UTS #46 and IDNA2008.
41  *
42  * IDNA2003 is obsolete.
43  * The C API functions which do not take a service object pointer
44  * implement IDNA2003. They are all deprecated.
45  */
46 
47 /*
48  * IDNA option bit set values.
49  */
50 enum {
51     /**
52      * Default options value: UTS #46 nontransitional processing.
53      * For use in static worker and factory methods.
54      *
55      * Since ICU 76, this is the same as
56      * UIDNA_NONTRANSITIONAL_TO_ASCII | UIDNA_NONTRANSITIONAL_TO_UNICODE,
57      * corresponding to Unicode 15.1 UTS #46 deprecating transitional processing.
58      * (These options are ignored by the IDNA2003 implementation.)
59      *
60      * Before ICU 76, this constant did not set any of the options.
61      *
62      * @stable ICU 2.6
63      */
64     UIDNA_DEFAULT=0x30,
65 #ifndef U_HIDE_DEPRECATED_API
66     /**
67      * Option to allow unassigned code points in domain names and labels.
68      * For use in static worker and factory methods.
69      * <p>This option is ignored by the UTS46 implementation.
70      * (UTS #46 disallows unassigned code points.)
71      * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
72      */
73     UIDNA_ALLOW_UNASSIGNED=1,
74 #endif  /* U_HIDE_DEPRECATED_API */
75     /**
76      * Option to check whether the input conforms to the STD3 ASCII rules,
77      * for example the restriction of labels to LDH characters
78      * (ASCII Letters, Digits and Hyphen-Minus).
79      * For use in static worker and factory methods.
80      * @stable ICU 2.6
81      */
82     UIDNA_USE_STD3_RULES=2,
83     /**
84      * IDNA option to check for whether the input conforms to the BiDi rules.
85      * For use in static worker and factory methods.
86      * <p>This option is ignored by the IDNA2003 implementation.
87      * (IDNA2003 always performs a BiDi check.)
88      * @stable ICU 4.6
89      */
90     UIDNA_CHECK_BIDI=4,
91     /**
92      * IDNA option to check for whether the input conforms to the CONTEXTJ rules.
93      * For use in static worker and factory methods.
94      * <p>This option is ignored by the IDNA2003 implementation.
95      * (The CONTEXTJ check is new in IDNA2008.)
96      * @stable ICU 4.6
97      */
98     UIDNA_CHECK_CONTEXTJ=8,
99     /**
100      * IDNA option for nontransitional processing in ToASCII().
101      * For use in static worker and factory methods.
102      *
103      * <p>By default, ToASCII() uses transitional processing.
104      * Unicode 15.1 UTS #46 deprecated transitional processing.
105      *
106      * <p>This option is ignored by the IDNA2003 implementation.
107      * (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)
108      * @stable ICU 4.6
109      * @see UIDNA_DEFAULT
110      */
111     UIDNA_NONTRANSITIONAL_TO_ASCII=0x10,
112     /**
113      * IDNA option for nontransitional processing in ToUnicode().
114      * For use in static worker and factory methods.
115      *
116      * <p>By default, ToUnicode() uses transitional processing.
117      * Unicode 15.1 UTS #46 deprecated transitional processing.
118      *
119      * <p>This option is ignored by the IDNA2003 implementation.
120      * (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)
121      * @stable ICU 4.6
122      * @see UIDNA_DEFAULT
123      */
124     UIDNA_NONTRANSITIONAL_TO_UNICODE=0x20,
125     /**
126      * IDNA option to check for whether the input conforms to the CONTEXTO rules.
127      * For use in static worker and factory methods.
128      * <p>This option is ignored by the IDNA2003 implementation.
129      * (The CONTEXTO check is new in IDNA2008.)
130      * <p>This is for use by registries for IDNA2008 conformance.
131      * UTS #46 does not require the CONTEXTO check.
132      * @stable ICU 49
133      */
134     UIDNA_CHECK_CONTEXTO=0x40
135 };
136 
137 /**
138  * Opaque C service object type for the new IDNA API.
139  * @stable ICU 4.6
140  */
141 struct UIDNA;
142 typedef struct UIDNA UIDNA;  /**< C typedef for struct UIDNA. @stable ICU 4.6 */
143 
144 /**
145  * Returns a UIDNA instance which implements UTS #46.
146  * Returns an unmodifiable instance, owned by the caller.
147  * Cache it for multiple operations, and uidna_close() it when done.
148  * The instance is thread-safe, that is, it can be used concurrently.
149  *
150  * For details about the UTS #46 implementation see the IDNA C++ class in idna.h.
151  *
152  * @param options Bit set to modify the processing and error checking.
153  *                These should include UIDNA_DEFAULT, or
154  *                UIDNA_NONTRANSITIONAL_TO_ASCII | UIDNA_NONTRANSITIONAL_TO_UNICODE.
155  *                See option bit set values in uidna.h.
156  * @param pErrorCode Standard ICU error code. Its input value must
157  *                  pass the U_SUCCESS() test, or else the function returns
158  *                  immediately. Check for U_FAILURE() on output or use with
159  *                  function chaining. (See User Guide for details.)
160  * @return the UTS #46 UIDNA instance, if successful
161  * @stable ICU 4.6
162  */
163 U_CAPI UIDNA * U_EXPORT2
164 uidna_openUTS46(uint32_t options, UErrorCode *pErrorCode);
165 
166 /**
167  * Closes a UIDNA instance.
168  * @param idna UIDNA instance to be closed
169  * @stable ICU 4.6
170  */
171 U_CAPI void U_EXPORT2
172 uidna_close(UIDNA *idna);
173 
174 #if U_SHOW_CPLUSPLUS_API
175 
176 U_NAMESPACE_BEGIN
177 
178 /**
179  * \class LocalUIDNAPointer
180  * "Smart pointer" class, closes a UIDNA via uidna_close().
181  * For most methods see the LocalPointerBase base class.
182  *
183  * @see LocalPointerBase
184  * @see LocalPointer
185  * @stable ICU 4.6
186  */
187 U_DEFINE_LOCAL_OPEN_POINTER(LocalUIDNAPointer, UIDNA, uidna_close);
188 
189 U_NAMESPACE_END
190 
191 #endif
192 
193 /**
194  * Output container for IDNA processing errors.
195  * Initialize with UIDNA_INFO_INITIALIZER:
196  * \code
197  * UIDNAInfo info = UIDNA_INFO_INITIALIZER;
198  * int32_t length = uidna_nameToASCII(..., &info, &errorCode);
199  * if(U_SUCCESS(errorCode) && info.errors!=0) { ... }
200  * \endcode
201  * @stable ICU 4.6
202  */
203 typedef struct UIDNAInfo {
204     /** sizeof(UIDNAInfo) @stable ICU 4.6 */
205     int16_t size;
206     /**
207      * Set to true if transitional and nontransitional processing produce different results.
208      * For details see C++ IDNAInfo::isTransitionalDifferent().
209      * @stable ICU 4.6
210      */
211     UBool isTransitionalDifferent;
212     UBool reservedB3;  /**< Reserved field, do not use. @internal */
213     /**
214      * Bit set indicating IDNA processing errors. 0 if no errors.
215      * See UIDNA_ERROR_... constants.
216      * @stable ICU 4.6
217      */
218     uint32_t errors;
219     int32_t reservedI2;  /**< Reserved field, do not use. @internal */
220     int32_t reservedI3;  /**< Reserved field, do not use. @internal */
221 } UIDNAInfo;
222 
223 /**
224  * Static initializer for a UIDNAInfo struct.
225  * @stable ICU 4.6
226  */
227 #define UIDNA_INFO_INITIALIZER { \
228     (int16_t)sizeof(UIDNAInfo), \
229     false, false, \
230     0, 0, 0 }
231 
232 /**
233  * Converts a single domain name label into its ASCII form for DNS lookup.
234  * If any processing step fails, then pInfo->errors will be non-zero and
235  * the result might not be an ASCII string.
236  * The label might be modified according to the types of errors.
237  * Labels with severe errors will be left in (or turned into) their Unicode form.
238  *
239  * The UErrorCode indicates an error only in exceptional cases,
240  * such as a U_MEMORY_ALLOCATION_ERROR.
241  *
242  * @param idna UIDNA instance
243  * @param label Input domain name label
244  * @param length Label length, or -1 if NUL-terminated
245  * @param dest Destination string buffer
246  * @param capacity Destination buffer capacity
247  * @param pInfo Output container of IDNA processing details.
248  * @param pErrorCode Standard ICU error code. Its input value must
249  *                  pass the U_SUCCESS() test, or else the function returns
250  *                  immediately. Check for U_FAILURE() on output or use with
251  *                  function chaining. (See User Guide for details.)
252  * @return destination string length
253  * @stable ICU 4.6
254  */
255 U_CAPI int32_t U_EXPORT2
256 uidna_labelToASCII(const UIDNA *idna,
257                    const UChar *label, int32_t length,
258                    UChar *dest, int32_t capacity,
259                    UIDNAInfo *pInfo, UErrorCode *pErrorCode);
260 
261 /**
262  * Converts a single domain name label into its Unicode form for human-readable display.
263  * If any processing step fails, then pInfo->errors will be non-zero.
264  * The label might be modified according to the types of errors.
265  *
266  * The UErrorCode indicates an error only in exceptional cases,
267  * such as a U_MEMORY_ALLOCATION_ERROR.
268  *
269  * @param idna UIDNA instance
270  * @param label Input domain name label
271  * @param length Label length, or -1 if NUL-terminated
272  * @param dest Destination string buffer
273  * @param capacity Destination buffer capacity
274  * @param pInfo Output container of IDNA processing details.
275  * @param pErrorCode Standard ICU error code. Its input value must
276  *                  pass the U_SUCCESS() test, or else the function returns
277  *                  immediately. Check for U_FAILURE() on output or use with
278  *                  function chaining. (See User Guide for details.)
279  * @return destination string length
280  * @stable ICU 4.6
281  */
282 U_CAPI int32_t U_EXPORT2
283 uidna_labelToUnicode(const UIDNA *idna,
284                      const UChar *label, int32_t length,
285                      UChar *dest, int32_t capacity,
286                      UIDNAInfo *pInfo, UErrorCode *pErrorCode);
287 
288 /**
289  * Converts a whole domain name into its ASCII form for DNS lookup.
290  * If any processing step fails, then pInfo->errors will be non-zero and
291  * the result might not be an ASCII string.
292  * The domain name might be modified according to the types of errors.
293  * Labels with severe errors will be left in (or turned into) their Unicode form.
294  *
295  * The UErrorCode indicates an error only in exceptional cases,
296  * such as a U_MEMORY_ALLOCATION_ERROR.
297  *
298  * @param idna UIDNA instance
299  * @param name Input domain name
300  * @param length Domain name length, or -1 if NUL-terminated
301  * @param dest Destination string buffer
302  * @param capacity Destination buffer capacity
303  * @param pInfo Output container of IDNA processing details.
304  * @param pErrorCode Standard ICU error code. Its input value must
305  *                  pass the U_SUCCESS() test, or else the function returns
306  *                  immediately. Check for U_FAILURE() on output or use with
307  *                  function chaining. (See User Guide for details.)
308  * @return destination string length
309  * @stable ICU 4.6
310  */
311 U_CAPI int32_t U_EXPORT2
312 uidna_nameToASCII(const UIDNA *idna,
313                   const UChar *name, int32_t length,
314                   UChar *dest, int32_t capacity,
315                   UIDNAInfo *pInfo, UErrorCode *pErrorCode);
316 
317 /**
318  * Converts a whole domain name into its Unicode form for human-readable display.
319  * If any processing step fails, then pInfo->errors will be non-zero.
320  * The domain name might be modified according to the types of errors.
321  *
322  * The UErrorCode indicates an error only in exceptional cases,
323  * such as a U_MEMORY_ALLOCATION_ERROR.
324  *
325  * @param idna UIDNA instance
326  * @param name Input domain name
327  * @param length Domain name length, or -1 if NUL-terminated
328  * @param dest Destination string buffer
329  * @param capacity Destination buffer capacity
330  * @param pInfo Output container of IDNA processing details.
331  * @param pErrorCode Standard ICU error code. Its input value must
332  *                  pass the U_SUCCESS() test, or else the function returns
333  *                  immediately. Check for U_FAILURE() on output or use with
334  *                  function chaining. (See User Guide for details.)
335  * @return destination string length
336  * @stable ICU 4.6
337  */
338 U_CAPI int32_t U_EXPORT2
339 uidna_nameToUnicode(const UIDNA *idna,
340                     const UChar *name, int32_t length,
341                     UChar *dest, int32_t capacity,
342                     UIDNAInfo *pInfo, UErrorCode *pErrorCode);
343 
344 /* UTF-8 versions of the processing methods --------------------------------- */
345 
346 /**
347  * Converts a single domain name label into its ASCII form for DNS lookup.
348  * UTF-8 version of uidna_labelToASCII(), same behavior.
349  *
350  * @param idna UIDNA instance
351  * @param label Input domain name label
352  * @param length Label length, or -1 if NUL-terminated
353  * @param dest Destination string buffer
354  * @param capacity Destination buffer capacity
355  * @param pInfo Output container of IDNA processing details.
356  * @param pErrorCode Standard ICU error code. Its input value must
357  *                  pass the U_SUCCESS() test, or else the function returns
358  *                  immediately. Check for U_FAILURE() on output or use with
359  *                  function chaining. (See User Guide for details.)
360  * @return destination string length
361  * @stable ICU 4.6
362  */
363 U_CAPI int32_t U_EXPORT2
364 uidna_labelToASCII_UTF8(const UIDNA *idna,
365                         const char *label, int32_t length,
366                         char *dest, int32_t capacity,
367                         UIDNAInfo *pInfo, UErrorCode *pErrorCode);
368 
369 /**
370  * Converts a single domain name label into its Unicode form for human-readable display.
371  * UTF-8 version of uidna_labelToUnicode(), same behavior.
372  *
373  * @param idna UIDNA instance
374  * @param label Input domain name label
375  * @param length Label length, or -1 if NUL-terminated
376  * @param dest Destination string buffer
377  * @param capacity Destination buffer capacity
378  * @param pInfo Output container of IDNA processing details.
379  * @param pErrorCode Standard ICU error code. Its input value must
380  *                  pass the U_SUCCESS() test, or else the function returns
381  *                  immediately. Check for U_FAILURE() on output or use with
382  *                  function chaining. (See User Guide for details.)
383  * @return destination string length
384  * @stable ICU 4.6
385  */
386 U_CAPI int32_t U_EXPORT2
387 uidna_labelToUnicodeUTF8(const UIDNA *idna,
388                          const char *label, int32_t length,
389                          char *dest, int32_t capacity,
390                          UIDNAInfo *pInfo, UErrorCode *pErrorCode);
391 
392 /**
393  * Converts a whole domain name into its ASCII form for DNS lookup.
394  * UTF-8 version of uidna_nameToASCII(), same behavior.
395  *
396  * @param idna UIDNA instance
397  * @param name Input domain name
398  * @param length Domain name length, or -1 if NUL-terminated
399  * @param dest Destination string buffer
400  * @param capacity Destination buffer capacity
401  * @param pInfo Output container of IDNA processing details.
402  * @param pErrorCode Standard ICU error code. Its input value must
403  *                  pass the U_SUCCESS() test, or else the function returns
404  *                  immediately. Check for U_FAILURE() on output or use with
405  *                  function chaining. (See User Guide for details.)
406  * @return destination string length
407  * @stable ICU 4.6
408  */
409 U_CAPI int32_t U_EXPORT2
410 uidna_nameToASCII_UTF8(const UIDNA *idna,
411                        const char *name, int32_t length,
412                        char *dest, int32_t capacity,
413                        UIDNAInfo *pInfo, UErrorCode *pErrorCode);
414 
415 /**
416  * Converts a whole domain name into its Unicode form for human-readable display.
417  * UTF-8 version of uidna_nameToUnicode(), same behavior.
418  *
419  * @param idna UIDNA instance
420  * @param name Input domain name
421  * @param length Domain name length, or -1 if NUL-terminated
422  * @param dest Destination string buffer
423  * @param capacity Destination buffer capacity
424  * @param pInfo Output container of IDNA processing details.
425  * @param pErrorCode Standard ICU error code. Its input value must
426  *                  pass the U_SUCCESS() test, or else the function returns
427  *                  immediately. Check for U_FAILURE() on output or use with
428  *                  function chaining. (See User Guide for details.)
429  * @return destination string length
430  * @stable ICU 4.6
431  */
432 U_CAPI int32_t U_EXPORT2
433 uidna_nameToUnicodeUTF8(const UIDNA *idna,
434                         const char *name, int32_t length,
435                         char *dest, int32_t capacity,
436                         UIDNAInfo *pInfo, UErrorCode *pErrorCode);
437 
438 /*
439  * IDNA error bit set values.
440  * When a domain name or label fails a processing step or does not meet the
441  * validity criteria, then one or more of these error bits are set.
442  */
443 enum {
444     /**
445      * A non-final domain name label (or the whole domain name) is empty.
446      * @stable ICU 4.6
447      */
448     UIDNA_ERROR_EMPTY_LABEL=1,
449     /**
450      * A domain name label is longer than 63 bytes.
451      * (See STD13/RFC1034 3.1. Name space specifications and terminology.)
452      * This is only checked in ToASCII operations, and only if the output label is all-ASCII.
453      * @stable ICU 4.6
454      */
455     UIDNA_ERROR_LABEL_TOO_LONG=2,
456     /**
457      * A domain name is longer than 255 bytes in its storage form.
458      * (See STD13/RFC1034 3.1. Name space specifications and terminology.)
459      * This is only checked in ToASCII operations, and only if the output domain name is all-ASCII.
460      * @stable ICU 4.6
461      */
462     UIDNA_ERROR_DOMAIN_NAME_TOO_LONG=4,
463     /**
464      * A label starts with a hyphen-minus ('-').
465      * @stable ICU 4.6
466      */
467     UIDNA_ERROR_LEADING_HYPHEN=8,
468     /**
469      * A label ends with a hyphen-minus ('-').
470      * @stable ICU 4.6
471      */
472     UIDNA_ERROR_TRAILING_HYPHEN=0x10,
473     /**
474      * A label contains hyphen-minus ('-') in the third and fourth positions.
475      * @stable ICU 4.6
476      */
477     UIDNA_ERROR_HYPHEN_3_4=0x20,
478     /**
479      * A label starts with a combining mark.
480      * @stable ICU 4.6
481      */
482     UIDNA_ERROR_LEADING_COMBINING_MARK=0x40,
483     /**
484      * A label or domain name contains disallowed characters.
485      * @stable ICU 4.6
486      */
487     UIDNA_ERROR_DISALLOWED=0x80,
488     /**
489      * A label starts with "xn--" but does not contain valid Punycode.
490      * That is, an xn-- label failed Punycode decoding.
491      * @stable ICU 4.6
492      */
493     UIDNA_ERROR_PUNYCODE=0x100,
494     /**
495      * A label contains a dot=full stop.
496      * This can occur in an input string for a single-label function.
497      * @stable ICU 4.6
498      */
499     UIDNA_ERROR_LABEL_HAS_DOT=0x200,
500     /**
501      * An ACE label does not contain a valid label string.
502      * The label was successfully ACE (Punycode) decoded but the resulting
503      * string had severe validation errors. For example,
504      * it might contain characters that are not allowed in ACE labels,
505      * or it might not be normalized.
506      * @stable ICU 4.6
507      */
508     UIDNA_ERROR_INVALID_ACE_LABEL=0x400,
509     /**
510      * A label does not meet the IDNA BiDi requirements (for right-to-left characters).
511      * @stable ICU 4.6
512      */
513     UIDNA_ERROR_BIDI=0x800,
514     /**
515      * A label does not meet the IDNA CONTEXTJ requirements.
516      * @stable ICU 4.6
517      */
518     UIDNA_ERROR_CONTEXTJ=0x1000,
519     /**
520      * A label does not meet the IDNA CONTEXTO requirements for punctuation characters.
521      * Some punctuation characters "Would otherwise have been DISALLOWED"
522      * but are allowed in certain contexts. (RFC 5892)
523      * @stable ICU 49
524      */
525     UIDNA_ERROR_CONTEXTO_PUNCTUATION=0x2000,
526     /**
527      * A label does not meet the IDNA CONTEXTO requirements for digits.
528      * Arabic-Indic Digits (U+066x) must not be mixed with Extended Arabic-Indic Digits (U+06Fx).
529      * @stable ICU 49
530      */
531     UIDNA_ERROR_CONTEXTO_DIGITS=0x4000
532 };
533 
534 #ifndef U_HIDE_DEPRECATED_API
535 
536 /* IDNA2003 API ------------------------------------------------------------- */
537 
538 /**
539  * IDNA2003: This function implements the ToASCII operation as defined in the IDNA RFC.
540  * This operation is done on <b>single labels</b> before sending it to something that expects
541  * ASCII names. A label is an individual part of a domain name. Labels are usually
542  * separated by dots; e.g. "www.example.com" is composed of 3 labels "www","example", and "com".
543  *
544  * IDNA2003 API Overview:
545  *
546  * The uidna_ API implements the IDNA protocol as defined in the IDNA RFC
547  * (http://www.ietf.org/rfc/rfc3490.txt).
548  * The RFC defines 2 operations: ToASCII and ToUnicode. Domain name labels
549  * containing non-ASCII code points are processed by the
550  * ToASCII operation before passing it to resolver libraries. Domain names
551  * that are obtained from resolver libraries are processed by the
552  * ToUnicode operation before displaying the domain name to the user.
553  * IDNA requires that implementations process input strings with Nameprep
554  * (http://www.ietf.org/rfc/rfc3491.txt),
555  * which is a profile of Stringprep (http://www.ietf.org/rfc/rfc3454.txt),
556  * and then with Punycode (http://www.ietf.org/rfc/rfc3492.txt).
557  * Implementations of IDNA MUST fully implement Nameprep and Punycode;
558  * neither Nameprep nor Punycode are optional.
559  * The input and output of ToASCII and ToUnicode operations are Unicode
560  * and are designed to be chainable, i.e., applying ToASCII or ToUnicode operations
561  * multiple times to an input string will yield the same result as applying the operation
562  * once.
563  * ToUnicode(ToUnicode(ToUnicode...(ToUnicode(string)))) == ToUnicode(string)
564  * ToASCII(ToASCII(ToASCII...(ToASCII(string))) == ToASCII(string).
565  *
566  * @param src               Input UChar array containing label in Unicode.
567  * @param srcLength         Number of UChars in src, or -1 if NUL-terminated.
568  * @param dest              Output UChar array with ASCII (ACE encoded) label.
569  * @param destCapacity      Size of dest.
570  * @param options           A bit set of options:
571  *
572  *  - UIDNA_DEFAULT             Use default options, i.e., do not process unassigned code points
573  *                              and do not use STD3 ASCII rules
574  *                              If unassigned code points are found the operation fails with
575  *                              U_UNASSIGNED_ERROR error code.
576  *
577  *  - UIDNA_ALLOW_UNASSIGNED    Unassigned values can be converted to ASCII for query operations
578  *                              If this option is set, the unassigned code points are in the input
579  *                              are treated as normal Unicode code points.
580  *
581  *  - UIDNA_USE_STD3_RULES      Use STD3 ASCII rules for host name syntax restrictions
582  *                              If this option is set and the input does not satisfy STD3 rules,
583  *                              the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
584  *
585  * @param parseError        Pointer to UParseError struct to receive information on position
586  *                          of error if an error is encountered. Can be NULL.
587  * @param status            ICU in/out error code parameter.
588  *                          U_INVALID_CHAR_FOUND if src contains
589  *                          unmatched single surrogates.
590  *                          U_INDEX_OUTOFBOUNDS_ERROR if src contains
591  *                          too many code points.
592  *                          U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
593  * @return The length of the result string, if successful - or in case of a buffer overflow,
594  *         in which case it will be greater than destCapacity.
595  * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
596  */
597 U_DEPRECATED int32_t U_EXPORT2
598 uidna_toASCII(const UChar* src, int32_t srcLength,
599               UChar* dest, int32_t destCapacity,
600               int32_t options,
601               UParseError* parseError,
602               UErrorCode* status);
603 
604 
605 /**
606  * IDNA2003: This function implements the ToUnicode operation as defined in the IDNA RFC.
607  * This operation is done on <b>single labels</b> before sending it to something that expects
608  * Unicode names. A label is an individual part of a domain name. Labels are usually
609  * separated by dots; for e.g. "www.example.com" is composed of 3 labels "www","example", and "com".
610  *
611  * @param src               Input UChar array containing ASCII (ACE encoded) label.
612  * @param srcLength         Number of UChars in src, or -1 if NUL-terminated.
613  * @param dest Output       Converted UChar array containing Unicode equivalent of label.
614  * @param destCapacity      Size of dest.
615  * @param options           A bit set of options:
616  *
617  *  - UIDNA_DEFAULT             Use default options, i.e., do not process unassigned code points
618  *                              and do not use STD3 ASCII rules
619  *                              If unassigned code points are found the operation fails with
620  *                              U_UNASSIGNED_ERROR error code.
621  *
622  *  - UIDNA_ALLOW_UNASSIGNED      Unassigned values can be converted to ASCII for query operations
623  *                              If this option is set, the unassigned code points are in the input
624  *                              are treated as normal Unicode code points. <b> Note: </b> This option is
625  *                              required on toUnicode operation because the RFC mandates
626  *                              verification of decoded ACE input by applying toASCII and comparing
627  *                              its output with source
628  *
629  *  - UIDNA_USE_STD3_RULES      Use STD3 ASCII rules for host name syntax restrictions
630  *                              If this option is set and the input does not satisfy STD3 rules,
631  *                              the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
632  *
633  * @param parseError        Pointer to UParseError struct to receive information on position
634  *                          of error if an error is encountered. Can be NULL.
635  * @param status            ICU in/out error code parameter.
636  *                          U_INVALID_CHAR_FOUND if src contains
637  *                          unmatched single surrogates.
638  *                          U_INDEX_OUTOFBOUNDS_ERROR if src contains
639  *                          too many code points.
640  *                          U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
641  * @return The length of the result string, if successful - or in case of a buffer overflow,
642  *         in which case it will be greater than destCapacity.
643  * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
644  */
645 U_DEPRECATED int32_t U_EXPORT2
646 uidna_toUnicode(const UChar* src, int32_t srcLength,
647                 UChar* dest, int32_t destCapacity,
648                 int32_t options,
649                 UParseError* parseError,
650                 UErrorCode* status);
651 
652 
653 /**
654  * IDNA2003: Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC.
655  * This operation is done on complete domain names, e.g: "www.example.com".
656  * It is important to note that this operation can fail. If it fails, then the input
657  * domain name cannot be used as an Internationalized Domain Name and the application
658  * should have methods defined to deal with the failure.
659  *
660  * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
661  * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
662  * and then convert. This function does not offer that level of granularity. The options once
663  * set will apply to all labels in the domain name
664  *
665  * @param src               Input UChar array containing IDN in Unicode.
666  * @param srcLength         Number of UChars in src, or -1 if NUL-terminated.
667  * @param dest              Output UChar array with ASCII (ACE encoded) IDN.
668  * @param destCapacity      Size of dest.
669  * @param options           A bit set of options:
670  *
671  *  - UIDNA_DEFAULT             Use default options, i.e., do not process unassigned code points
672  *                              and do not use STD3 ASCII rules
673  *                              If unassigned code points are found the operation fails with
674  *                              U_UNASSIGNED_CODE_POINT_FOUND error code.
675  *
676  *  - UIDNA_ALLOW_UNASSIGNED    Unassigned values can be converted to ASCII for query operations
677  *                              If this option is set, the unassigned code points are in the input
678  *                              are treated as normal Unicode code points.
679  *
680  *  - UIDNA_USE_STD3_RULES      Use STD3 ASCII rules for host name syntax restrictions
681  *                              If this option is set and the input does not satisfy STD3 rules,
682  *                              the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
683  *
684  * @param parseError        Pointer to UParseError struct to receive information on position
685  *                          of error if an error is encountered. Can be NULL.
686  * @param status            ICU in/out error code parameter.
687  *                          U_INVALID_CHAR_FOUND if src contains
688  *                          unmatched single surrogates.
689  *                          U_INDEX_OUTOFBOUNDS_ERROR if src contains
690  *                          too many code points.
691  *                          U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
692  * @return The length of the result string, if successful - or in case of a buffer overflow,
693  *         in which case it will be greater than destCapacity.
694  * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
695  */
696 U_DEPRECATED int32_t U_EXPORT2
697 uidna_IDNToASCII(  const UChar* src, int32_t srcLength,
698                    UChar* dest, int32_t destCapacity,
699                    int32_t options,
700                    UParseError* parseError,
701                    UErrorCode* status);
702 
703 /**
704  * IDNA2003: Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC.
705  * This operation is done on complete domain names, e.g: "www.example.com".
706  *
707  * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
708  * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
709  * and then convert. This function does not offer that level of granularity. The options once
710  * set will apply to all labels in the domain name
711  *
712  * @param src               Input UChar array containing IDN in ASCII (ACE encoded) form.
713  * @param srcLength         Number of UChars in src, or -1 if NUL-terminated.
714  * @param dest Output       UChar array containing Unicode equivalent of source IDN.
715  * @param destCapacity      Size of dest.
716  * @param options           A bit set of options:
717  *
718  *  - UIDNA_DEFAULT             Use default options, i.e., do not process unassigned code points
719  *                              and do not use STD3 ASCII rules
720  *                              If unassigned code points are found the operation fails with
721  *                              U_UNASSIGNED_CODE_POINT_FOUND error code.
722  *
723  *  - UIDNA_ALLOW_UNASSIGNED    Unassigned values can be converted to ASCII for query operations
724  *                              If this option is set, the unassigned code points are in the input
725  *                              are treated as normal Unicode code points.
726  *
727  *  - UIDNA_USE_STD3_RULES      Use STD3 ASCII rules for host name syntax restrictions
728  *                              If this option is set and the input does not satisfy STD3 rules,
729  *                              the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
730  *
731  * @param parseError        Pointer to UParseError struct to receive information on position
732  *                          of error if an error is encountered. Can be NULL.
733  * @param status            ICU in/out error code parameter.
734  *                          U_INVALID_CHAR_FOUND if src contains
735  *                          unmatched single surrogates.
736  *                          U_INDEX_OUTOFBOUNDS_ERROR if src contains
737  *                          too many code points.
738  *                          U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
739  * @return The length of the result string, if successful - or in case of a buffer overflow,
740  *         in which case it will be greater than destCapacity.
741  * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
742  */
743 U_DEPRECATED int32_t U_EXPORT2
744 uidna_IDNToUnicode(  const UChar* src, int32_t srcLength,
745                      UChar* dest, int32_t destCapacity,
746                      int32_t options,
747                      UParseError* parseError,
748                      UErrorCode* status);
749 
750 /**
751  * IDNA2003: Compare two IDN strings for equivalence.
752  * This function splits the domain names into labels and compares them.
753  * According to IDN RFC, whenever two labels are compared, they are
754  * considered equal if and only if their ASCII forms (obtained by
755  * applying toASCII) match using an case-insensitive ASCII comparison.
756  * Two domain names are considered a match if and only if all labels
757  * match regardless of whether label separators match.
758  *
759  * @param s1                First source string.
760  * @param length1           Length of first source string, or -1 if NUL-terminated.
761  *
762  * @param s2                Second source string.
763  * @param length2           Length of second source string, or -1 if NUL-terminated.
764  * @param options           A bit set of options:
765  *
766  *  - UIDNA_DEFAULT             Use default options, i.e., do not process unassigned code points
767  *                              and do not use STD3 ASCII rules
768  *                              If unassigned code points are found the operation fails with
769  *                              U_UNASSIGNED_CODE_POINT_FOUND error code.
770  *
771  *  - UIDNA_ALLOW_UNASSIGNED    Unassigned values can be converted to ASCII for query operations
772  *                              If this option is set, the unassigned code points are in the input
773  *                              are treated as normal Unicode code points.
774  *
775  *  - UIDNA_USE_STD3_RULES      Use STD3 ASCII rules for host name syntax restrictions
776  *                              If this option is set and the input does not satisfy STD3 rules,
777  *                              the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
778  *
779  * @param status            ICU error code in/out parameter.
780  *                          Must fulfill U_SUCCESS before the function call.
781  * @return <0 or 0 or >0 as usual for string comparisons
782  * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
783  */
784 U_DEPRECATED int32_t U_EXPORT2
785 uidna_compare(  const UChar *s1, int32_t length1,
786                 const UChar *s2, int32_t length2,
787                 int32_t options,
788                 UErrorCode* status);
789 
790 #endif  /* U_HIDE_DEPRECATED_API */
791 
792 #endif /* #if !UCONFIG_NO_IDNA */
793 
794 #endif
795