• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 *   Copyright (C) 2002-2003, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 *
9 *******************************************************************************
10 *   file name:  punycode.h
11 *   encoding:   UTF-8
12 *   tab size:   8 (not used)
13 *   indentation:4
14 *
15 *   created on: 2002jan31
16 *   created by: Markus W. Scherer
17 */
18 
19 /* This ICU code derived from: */
20 /*
21 punycode.c 0.4.0 (2001-Nov-17-Sat)
22 http://www.cs.berkeley.edu/~amc/idn/
23 Adam M. Costello
24 http://www.nicemice.net/amc/
25 */
26 
27 #ifndef __PUNYCODE_H__
28 #define __PUNYCODE_H__
29 
30 #include "unicode/utypes.h"
31 
32 #if !UCONFIG_NO_IDNA
33 
34 /**
35  * u_strToPunycode() converts Unicode to Punycode.
36  *
37  * The input string must not contain single, unpaired surrogates.
38  * The output will be represented as an array of ASCII code points.
39  *
40  * The output string is NUL-terminated according to normal ICU
41  * string output rules.
42  *
43  * @param src Input Unicode string.
44  *            This function handles a limited amount of code points
45  *            (the limit is >=64).
46  *            U_INDEX_OUTOFBOUNDS_ERROR is set if the limit is exceeded.
47  * @param srcLength Number of UChars in src, or -1 if NUL-terminated.
48  * @param dest Output Punycode array.
49  * @param destCapacity Size of dest.
50  * @param caseFlags Vector of boolean values, one per input UChar,
51  *                  indicating that the corresponding character is to be
52  *                  marked for the decoder optionally
53  *                  uppercasing (TRUE) or lowercasing (FALSE)
54  *                  the character.
55  *                  ASCII characters are output directly in the case as marked.
56  *                  Flags corresponding to trail surrogates are ignored.
57  *                  If caseFlags==NULL then input characters are not
58  *                  case-mapped.
59  * @param pErrorCode ICU in/out error code parameter.
60  *                   U_INVALID_CHAR_FOUND if src contains
61  *                   unmatched single surrogates.
62  *                   U_INDEX_OUTOFBOUNDS_ERROR if src contains
63  *                   too many code points.
64  * @return Number of ASCII characters in puny.
65  *
66  * @see u_strFromPunycode
67  */
68 U_CFUNC int32_t
69 u_strToPunycode(const UChar *src, int32_t srcLength,
70                 UChar *dest, int32_t destCapacity,
71                 const UBool *caseFlags,
72                 UErrorCode *pErrorCode);
73 
74 /**
75  * u_strFromPunycode() converts Punycode to Unicode.
76  * The Unicode string will be at most as long (in UChars)
77  * than the Punycode string (in chars).
78  *
79  * @param src Input Punycode string.
80  * @param srcLength Length of puny, or -1 if NUL-terminated
81  * @param dest Output Unicode string buffer.
82  * @param destCapacity Size of dest in number of UChars,
83  *                     and of caseFlags in numbers of UBools.
84  * @param caseFlags Output array for case flags as
85  *                  defined by the Punycode string.
86  *                  The caller should uppercase (TRUE) or lowercase (FASLE)
87  *                  the corresponding character in dest.
88  *                  For supplementary characters, only the lead surrogate
89  *                  is marked, and FALSE is stored for the trail surrogate.
90  *                  This is redundant and not necessary for ASCII characters
91  *                  because they are already in the case indicated.
92  *                  Can be NULL if the case flags are not needed.
93  * @param pErrorCode ICU in/out error code parameter.
94  *                   U_INVALID_CHAR_FOUND if a non-ASCII character
95  *                   precedes the last delimiter ('-'),
96  *                   or if an invalid character (not a-zA-Z0-9) is found
97  *                   after the last delimiter.
98  *                   U_ILLEGAL_CHAR_FOUND if the delta sequence is ill-formed.
99  * @return Number of UChars written to dest.
100  *
101  * @see u_strToPunycode
102  */
103 U_CFUNC int32_t
104 u_strFromPunycode(const UChar *src, int32_t srcLength,
105                   UChar *dest, int32_t destCapacity,
106                   UBool *caseFlags,
107                   UErrorCode *pErrorCode);
108 
109 #endif /* #if !UCONFIG_NO_IDNA */
110 
111 #endif
112 
113 /*
114  * Hey, Emacs, please set the following:
115  *
116  * Local Variables:
117  * indent-tabs-mode: nil
118  * End:
119  *
120  */
121