• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 *******************************************************************************
3 *   Copyright (C) 2010, International Business Machines
4 *   Corporation and others.  All Rights Reserved.
5 *******************************************************************************
6 *   file name:  idna.h
7 *   encoding:   US-ASCII
8 *   tab size:   8 (not used)
9 *   indentation:4
10 *
11 *   created on: 2010mar05
12 *   created by: Markus W. Scherer
13 */
14 
15 #ifndef __IDNA_H__
16 #define __IDNA_H__
17 
18 /**
19  * \file
20  * \brief C++ API: Internationalizing Domain Names in Applications (IDNA)
21  */
22 
23 #include "unicode/utypes.h"
24 
25 #if !UCONFIG_NO_IDNA
26 
27 #include "unicode/bytestream.h"
28 #include "unicode/stringpiece.h"
29 #include "unicode/uidna.h"
30 #include "unicode/unistr.h"
31 
32 U_NAMESPACE_BEGIN
33 
34 class U_COMMON_API IDNAInfo;
35 
36 /**
37  * Abstract base class for IDNA processing.
38  * See http://www.unicode.org/reports/tr46/
39  * and http://www.ietf.org/rfc/rfc3490.txt
40  *
41  * The IDNA class is not intended for public subclassing.
42  *
43  * This C++ API currently only implements UTS #46.
44  * The uidna.h C API implements both UTS #46 (functions using UIDNA service object)
45  * and IDNA2003 (functions that do not use a service object).
46  * @draft ICU 4.6
47  */
48 class U_COMMON_API IDNA : public UObject {
49 public:
50     /**
51      * Returns an IDNA instance which implements UTS #46.
52      * Returns an unmodifiable instance, owned by the caller.
53      * Cache it for multiple operations, and delete it when done.
54      * The instance is thread-safe, that is, it can be used concurrently.
55      *
56      * UTS #46 defines Unicode IDNA Compatibility Processing,
57      * updated to the latest version of Unicode and compatible with both
58      * IDNA2003 and IDNA2008.
59      *
60      * The worker functions use transitional processing, including deviation mappings,
61      * unless UIDNA_NONTRANSITIONAL_TO_ASCII or UIDNA_NONTRANSITIONAL_TO_UNICODE
62      * is used in which case the deviation characters are passed through without change.
63      *
64      * Disallowed characters are mapped to U+FFFD.
65      *
66      * For available options see the uidna.h header.
67      * Operations with the UTS #46 instance do not support the
68      * UIDNA_ALLOW_UNASSIGNED option.
69      *
70      * By default, the UTS #46 implementation allows all ASCII characters (as valid or mapped).
71      * When the UIDNA_USE_STD3_RULES option is used, ASCII characters other than
72      * letters, digits, hyphen (LDH) and dot/full stop are disallowed and mapped to U+FFFD.
73      *
74      * @param options Bit set to modify the processing and error checking.
75      *                See option bit set values in uidna.h.
76      * @param errorCode Standard ICU error code. Its input value must
77      *                  pass the U_SUCCESS() test, or else the function returns
78      *                  immediately. Check for U_FAILURE() on output or use with
79      *                  function chaining. (See User Guide for details.)
80      * @return the UTS #46 IDNA instance, if successful
81      * @draft ICU 4.6
82      */
83     static IDNA *
84     createUTS46Instance(uint32_t options, UErrorCode &errorCode);
85 
86     /**
87      * Converts a single domain name label into its ASCII form for DNS lookup.
88      * If any processing step fails, then info.hasErrors() will be TRUE and
89      * the result might not be an ASCII string.
90      * The label might be modified according to the types of errors.
91      * Labels with severe errors will be left in (or turned into) their Unicode form.
92      *
93      * The UErrorCode indicates an error only in exceptional cases,
94      * such as a U_MEMORY_ALLOCATION_ERROR.
95      *
96      * @param label Input domain name label
97      * @param dest Destination string object
98      * @param info Output container of IDNA processing details.
99      * @param errorCode Standard ICU error code. Its input value must
100      *                  pass the U_SUCCESS() test, or else the function returns
101      *                  immediately. Check for U_FAILURE() on output or use with
102      *                  function chaining. (See User Guide for details.)
103      * @return dest
104      * @draft ICU 4.6
105      */
106     virtual UnicodeString &
107     labelToASCII(const UnicodeString &label, UnicodeString &dest,
108                  IDNAInfo &info, UErrorCode &errorCode) const = 0;
109 
110     /**
111      * Converts a single domain name label into its Unicode form for human-readable display.
112      * If any processing step fails, then info.hasErrors() will be TRUE.
113      * The label might be modified according to the types of errors.
114      *
115      * The UErrorCode indicates an error only in exceptional cases,
116      * such as a U_MEMORY_ALLOCATION_ERROR.
117      *
118      * @param label Input domain name label
119      * @param dest Destination string object
120      * @param info Output container of IDNA processing details.
121      * @param errorCode Standard ICU error code. Its input value must
122      *                  pass the U_SUCCESS() test, or else the function returns
123      *                  immediately. Check for U_FAILURE() on output or use with
124      *                  function chaining. (See User Guide for details.)
125      * @return dest
126      * @draft ICU 4.6
127      */
128     virtual UnicodeString &
129     labelToUnicode(const UnicodeString &label, UnicodeString &dest,
130                    IDNAInfo &info, UErrorCode &errorCode) const = 0;
131 
132     /**
133      * Converts a whole domain name into its ASCII form for DNS lookup.
134      * If any processing step fails, then info.hasErrors() will be TRUE and
135      * the result might not be an ASCII string.
136      * The domain name might be modified according to the types of errors.
137      * Labels with severe errors will be left in (or turned into) their Unicode form.
138      *
139      * The UErrorCode indicates an error only in exceptional cases,
140      * such as a U_MEMORY_ALLOCATION_ERROR.
141      *
142      * @param name Input domain name
143      * @param dest Destination string object
144      * @param info Output container of IDNA processing details.
145      * @param errorCode Standard ICU error code. Its input value must
146      *                  pass the U_SUCCESS() test, or else the function returns
147      *                  immediately. Check for U_FAILURE() on output or use with
148      *                  function chaining. (See User Guide for details.)
149      * @return dest
150      * @draft ICU 4.6
151      */
152     virtual UnicodeString &
153     nameToASCII(const UnicodeString &name, UnicodeString &dest,
154                 IDNAInfo &info, UErrorCode &errorCode) const = 0;
155 
156     /**
157      * Converts a whole domain name into its Unicode form for human-readable display.
158      * If any processing step fails, then info.hasErrors() will be TRUE.
159      * The domain name might be modified according to the types of errors.
160      *
161      * The UErrorCode indicates an error only in exceptional cases,
162      * such as a U_MEMORY_ALLOCATION_ERROR.
163      *
164      * @param name Input domain name
165      * @param dest Destination string object
166      * @param info Output container of IDNA processing details.
167      * @param errorCode Standard ICU error code. Its input value must
168      *                  pass the U_SUCCESS() test, or else the function returns
169      *                  immediately. Check for U_FAILURE() on output or use with
170      *                  function chaining. (See User Guide for details.)
171      * @return dest
172      * @draft ICU 4.6
173      */
174     virtual UnicodeString &
175     nameToUnicode(const UnicodeString &name, UnicodeString &dest,
176                   IDNAInfo &info, UErrorCode &errorCode) const = 0;
177 
178     // UTF-8 versions of the processing methods ---------------------------- ***
179 
180     /**
181      * Converts a single domain name label into its ASCII form for DNS lookup.
182      * UTF-8 version of labelToASCII(), same behavior.
183      *
184      * @param label Input domain name label
185      * @param dest Destination byte sink; Flush()ed if successful
186      * @param info Output container of IDNA processing details.
187      * @param errorCode Standard ICU error code. Its input value must
188      *                  pass the U_SUCCESS() test, or else the function returns
189      *                  immediately. Check for U_FAILURE() on output or use with
190      *                  function chaining. (See User Guide for details.)
191      * @return dest
192      * @draft ICU 4.6
193      */
194     virtual void
195     labelToASCII_UTF8(const StringPiece &label, ByteSink &dest,
196                       IDNAInfo &info, UErrorCode &errorCode) const;
197 
198     /**
199      * Converts a single domain name label into its Unicode form for human-readable display.
200      * UTF-8 version of labelToUnicode(), same behavior.
201      *
202      * @param label Input domain name label
203      * @param dest Destination byte sink; Flush()ed if successful
204      * @param info Output container of IDNA processing details.
205      * @param errorCode Standard ICU error code. Its input value must
206      *                  pass the U_SUCCESS() test, or else the function returns
207      *                  immediately. Check for U_FAILURE() on output or use with
208      *                  function chaining. (See User Guide for details.)
209      * @return dest
210      * @draft ICU 4.6
211      */
212     virtual void
213     labelToUnicodeUTF8(const StringPiece &label, ByteSink &dest,
214                        IDNAInfo &info, UErrorCode &errorCode) const;
215 
216     /**
217      * Converts a whole domain name into its ASCII form for DNS lookup.
218      * UTF-8 version of nameToASCII(), same behavior.
219      *
220      * @param name Input domain name
221      * @param dest Destination byte sink; Flush()ed if successful
222      * @param info Output container of IDNA processing details.
223      * @param errorCode Standard ICU error code. Its input value must
224      *                  pass the U_SUCCESS() test, or else the function returns
225      *                  immediately. Check for U_FAILURE() on output or use with
226      *                  function chaining. (See User Guide for details.)
227      * @return dest
228      * @draft ICU 4.6
229      */
230     virtual void
231     nameToASCII_UTF8(const StringPiece &name, ByteSink &dest,
232                      IDNAInfo &info, UErrorCode &errorCode) const;
233 
234     /**
235      * Converts a whole domain name into its Unicode form for human-readable display.
236      * UTF-8 version of nameToUnicode(), same behavior.
237      *
238      * @param name Input domain name
239      * @param dest Destination byte sink; Flush()ed if successful
240      * @param info Output container of IDNA processing details.
241      * @param errorCode Standard ICU error code. Its input value must
242      *                  pass the U_SUCCESS() test, or else the function returns
243      *                  immediately. Check for U_FAILURE() on output or use with
244      *                  function chaining. (See User Guide for details.)
245      * @return dest
246      * @draft ICU 4.6
247      */
248     virtual void
249     nameToUnicodeUTF8(const StringPiece &name, ByteSink &dest,
250                       IDNAInfo &info, UErrorCode &errorCode) const;
251 
252 private:
253     // No ICU "poor man's RTTI" for this class nor its subclasses.
254     virtual UClassID getDynamicClassID() const;
255 };
256 
257 class UTS46;
258 
259 /**
260  * Output container for IDNA processing errors.
261  * The IDNAInfo class is not suitable for subclassing.
262  * @draft ICU 4.6
263  */
264 class U_COMMON_API IDNAInfo : public UMemory {
265 public:
266     /**
267      * Constructor for stack allocation.
268      * @draft ICU 4.6
269      */
IDNAInfo()270     IDNAInfo() : errors(0), labelErrors(0), isTransDiff(FALSE), isBiDi(FALSE), isOkBiDi(TRUE) {}
271     /**
272      * Were there IDNA processing errors?
273      * @return TRUE if there were processing errors
274      * @draft ICU 4.6
275      */
hasErrors()276     UBool hasErrors() const { return errors!=0; }
277     /**
278      * Returns a bit set indicating IDNA processing errors.
279      * See UIDNA_ERROR_... constants in uidna.h.
280      * @return bit set of processing errors
281      * @draft ICU 4.6
282      */
getErrors()283     uint32_t getErrors() const { return errors; }
284     /**
285      * Returns TRUE if transitional and nontransitional processing produce different results.
286      * This is the case when the input label or domain name contains
287      * one or more deviation characters outside a Punycode label (see UTS #46).
288      * <ul>
289      * <li>With nontransitional processing, such characters are
290      * copied to the destination string.
291      * <li>With transitional processing, such characters are
292      * mapped (sharp s/sigma) or removed (joiner/nonjoiner).
293      * </ul>
294      * @return TRUE if transitional and nontransitional processing produce different results
295      * @draft ICU 4.6
296      */
isTransitionalDifferent()297     UBool isTransitionalDifferent() const { return isTransDiff; }
298 
299 private:
300     friend class UTS46;
301 
302     IDNAInfo(const IDNAInfo &other);  // no copying
303     IDNAInfo &operator=(const IDNAInfo &other);  // no copying
304 
reset()305     void reset() {
306         errors=labelErrors=0;
307         isTransDiff=FALSE;
308         isBiDi=FALSE;
309         isOkBiDi=TRUE;
310     }
311 
312     uint32_t errors, labelErrors;
313     UBool isTransDiff;
314     UBool isBiDi;
315     UBool isOkBiDi;
316 };
317 
318 U_NAMESPACE_END
319 
320 #endif  // UCONFIG_NO_IDNA
321 #endif  // __IDNA_H__
322