• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *   Copyright (C) 2010-2012, International Business Machines
6 *   Corporation and others.  All Rights Reserved.
7 *******************************************************************************
8 *   file name:  idna.h
9 *   encoding:   UTF-8
10 *   tab size:   8 (not used)
11 *   indentation:4
12 *
13 *   created on: 2010mar05
14 *   created by: Markus W. Scherer
15 */
16 
17 #ifndef __IDNA_H__
18 #define __IDNA_H__
19 
20 /**
21  * \file
22  * \brief C++ API: Internationalizing Domain Names in Applications (IDNA)
23  */
24 
25 #include "unicode/utypes.h"
26 
27 #if U_SHOW_CPLUSPLUS_API
28 
29 #if !UCONFIG_NO_IDNA
30 
31 #include "unicode/bytestream.h"
32 #include "unicode/stringpiece.h"
33 #include "unicode/uidna.h"
34 #include "unicode/unistr.h"
35 
36 U_NAMESPACE_BEGIN
37 
38 class IDNAInfo;
39 
40 /**
41  * Abstract base class for IDNA processing.
42  * See http://www.unicode.org/reports/tr46/
43  * and http://www.ietf.org/rfc/rfc3490.txt
44  *
45  * The IDNA class is not intended for public subclassing.
46  *
47  * This C++ API currently only implements UTS #46.
48  * The uidna.h C API implements both UTS #46 (functions using UIDNA service object)
49  * and IDNA2003 (functions that do not use a service object).
50  * @stable ICU 4.6
51  */
52 class U_COMMON_API IDNA : public UObject {
53 public:
54     /**
55      * Destructor.
56      * @stable ICU 4.6
57      */
58     ~IDNA();
59 
60     /**
61      * Returns an IDNA instance which implements UTS #46.
62      * Returns an unmodifiable instance, owned by the caller.
63      * Cache it for multiple operations, and delete it when done.
64      * The instance is thread-safe, that is, it can be used concurrently.
65      *
66      * UTS #46 defines Unicode IDNA Compatibility Processing,
67      * updated to the latest version of Unicode and compatible with both
68      * IDNA2003 and IDNA2008.
69      *
70      * The worker functions use transitional processing, including deviation mappings,
71      * unless UIDNA_NONTRANSITIONAL_TO_ASCII or UIDNA_NONTRANSITIONAL_TO_UNICODE
72      * is used in which case the deviation characters are passed through without change.
73      * <b>Unicode 15.1 UTS #46 deprecated transitional processing.</b>
74      *
75      * Disallowed characters are mapped to U+FFFD.
76      *
77      * For available options see the uidna.h header.
78      * Operations with the UTS #46 instance do not support the
79      * UIDNA_ALLOW_UNASSIGNED option.
80      *
81      * By default, the UTS #46 implementation allows all ASCII characters (as valid or mapped).
82      * When the UIDNA_USE_STD3_RULES option is used, ASCII characters other than
83      * letters, digits, hyphen (LDH) and dot/full stop are disallowed and mapped to U+FFFD.
84      *
85      * @param options Bit set to modify the processing and error checking.
86      *                These should include UIDNA_DEFAULT, or
87      *                UIDNA_NONTRANSITIONAL_TO_ASCII | UIDNA_NONTRANSITIONAL_TO_UNICODE.
88      *                See option bit set values in uidna.h.
89      * @param errorCode Standard ICU error code. Its input value must
90      *                  pass the U_SUCCESS() test, or else the function returns
91      *                  immediately. Check for U_FAILURE() on output or use with
92      *                  function chaining. (See User Guide for details.)
93      * @return the UTS #46 IDNA instance, if successful
94      * @stable ICU 4.6
95      */
96     static IDNA *
97     createUTS46Instance(uint32_t options, UErrorCode &errorCode);
98 
99     /**
100      * Converts a single domain name label into its ASCII form for DNS lookup.
101      * If any processing step fails, then info.hasErrors() will be true and
102      * the result might not be an ASCII string.
103      * The label might be modified according to the types of errors.
104      * Labels with severe errors will be left in (or turned into) their Unicode form.
105      *
106      * The UErrorCode indicates an error only in exceptional cases,
107      * such as a U_MEMORY_ALLOCATION_ERROR.
108      *
109      * @param label Input domain name label
110      * @param dest Destination string object
111      * @param info Output container of IDNA processing details.
112      * @param errorCode Standard ICU error code. Its input value must
113      *                  pass the U_SUCCESS() test, or else the function returns
114      *                  immediately. Check for U_FAILURE() on output or use with
115      *                  function chaining. (See User Guide for details.)
116      * @return dest
117      * @stable ICU 4.6
118      */
119     virtual UnicodeString &
120     labelToASCII(const UnicodeString &label, UnicodeString &dest,
121                  IDNAInfo &info, UErrorCode &errorCode) const = 0;
122 
123     /**
124      * Converts a single domain name label into its Unicode form for human-readable display.
125      * If any processing step fails, then info.hasErrors() will be true.
126      * The label might be modified according to the types of errors.
127      *
128      * The UErrorCode indicates an error only in exceptional cases,
129      * such as a U_MEMORY_ALLOCATION_ERROR.
130      *
131      * @param label Input domain name label
132      * @param dest Destination string object
133      * @param info Output container of IDNA processing details.
134      * @param errorCode Standard ICU error code. Its input value must
135      *                  pass the U_SUCCESS() test, or else the function returns
136      *                  immediately. Check for U_FAILURE() on output or use with
137      *                  function chaining. (See User Guide for details.)
138      * @return dest
139      * @stable ICU 4.6
140      */
141     virtual UnicodeString &
142     labelToUnicode(const UnicodeString &label, UnicodeString &dest,
143                    IDNAInfo &info, UErrorCode &errorCode) const = 0;
144 
145     /**
146      * Converts a whole domain name into its ASCII form for DNS lookup.
147      * If any processing step fails, then info.hasErrors() will be true and
148      * the result might not be an ASCII string.
149      * The domain name might be modified according to the types of errors.
150      * Labels with severe errors will be left in (or turned into) their Unicode form.
151      *
152      * The UErrorCode indicates an error only in exceptional cases,
153      * such as a U_MEMORY_ALLOCATION_ERROR.
154      *
155      * @param name Input domain name
156      * @param dest Destination string object
157      * @param info Output container of IDNA processing details.
158      * @param errorCode Standard ICU error code. Its input value must
159      *                  pass the U_SUCCESS() test, or else the function returns
160      *                  immediately. Check for U_FAILURE() on output or use with
161      *                  function chaining. (See User Guide for details.)
162      * @return dest
163      * @stable ICU 4.6
164      */
165     virtual UnicodeString &
166     nameToASCII(const UnicodeString &name, UnicodeString &dest,
167                 IDNAInfo &info, UErrorCode &errorCode) const = 0;
168 
169     /**
170      * Converts a whole domain name into its Unicode form for human-readable display.
171      * If any processing step fails, then info.hasErrors() will be true.
172      * The domain name might be modified according to the types of errors.
173      *
174      * The UErrorCode indicates an error only in exceptional cases,
175      * such as a U_MEMORY_ALLOCATION_ERROR.
176      *
177      * @param name Input domain name
178      * @param dest Destination string object
179      * @param info Output container of IDNA processing details.
180      * @param errorCode Standard ICU error code. Its input value must
181      *                  pass the U_SUCCESS() test, or else the function returns
182      *                  immediately. Check for U_FAILURE() on output or use with
183      *                  function chaining. (See User Guide for details.)
184      * @return dest
185      * @stable ICU 4.6
186      */
187     virtual UnicodeString &
188     nameToUnicode(const UnicodeString &name, UnicodeString &dest,
189                   IDNAInfo &info, UErrorCode &errorCode) const = 0;
190 
191     // UTF-8 versions of the processing methods ---------------------------- ***
192 
193     /**
194      * Converts a single domain name label into its ASCII form for DNS lookup.
195      * UTF-8 version of labelToASCII(), same behavior.
196      *
197      * @param label Input domain name label
198      * @param dest Destination byte sink; Flush()ed if successful
199      * @param info Output container of IDNA processing details.
200      * @param errorCode Standard ICU error code. Its input value must
201      *                  pass the U_SUCCESS() test, or else the function returns
202      *                  immediately. Check for U_FAILURE() on output or use with
203      *                  function chaining. (See User Guide for details.)
204      * @return dest
205      * @stable ICU 4.6
206      */
207     virtual void
208     labelToASCII_UTF8(StringPiece label, ByteSink &dest,
209                       IDNAInfo &info, UErrorCode &errorCode) const;
210 
211     /**
212      * Converts a single domain name label into its Unicode form for human-readable display.
213      * UTF-8 version of labelToUnicode(), same behavior.
214      *
215      * @param label Input domain name label
216      * @param dest Destination byte sink; Flush()ed if successful
217      * @param info Output container of IDNA processing details.
218      * @param errorCode Standard ICU error code. Its input value must
219      *                  pass the U_SUCCESS() test, or else the function returns
220      *                  immediately. Check for U_FAILURE() on output or use with
221      *                  function chaining. (See User Guide for details.)
222      * @return dest
223      * @stable ICU 4.6
224      */
225     virtual void
226     labelToUnicodeUTF8(StringPiece label, ByteSink &dest,
227                        IDNAInfo &info, UErrorCode &errorCode) const;
228 
229     /**
230      * Converts a whole domain name into its ASCII form for DNS lookup.
231      * UTF-8 version of nameToASCII(), same behavior.
232      *
233      * @param name Input domain name
234      * @param dest Destination byte sink; Flush()ed if successful
235      * @param info Output container of IDNA processing details.
236      * @param errorCode Standard ICU error code. Its input value must
237      *                  pass the U_SUCCESS() test, or else the function returns
238      *                  immediately. Check for U_FAILURE() on output or use with
239      *                  function chaining. (See User Guide for details.)
240      * @return dest
241      * @stable ICU 4.6
242      */
243     virtual void
244     nameToASCII_UTF8(StringPiece name, ByteSink &dest,
245                      IDNAInfo &info, UErrorCode &errorCode) const;
246 
247     /**
248      * Converts a whole domain name into its Unicode form for human-readable display.
249      * UTF-8 version of nameToUnicode(), same behavior.
250      *
251      * @param name Input domain name
252      * @param dest Destination byte sink; Flush()ed if successful
253      * @param info Output container of IDNA processing details.
254      * @param errorCode Standard ICU error code. Its input value must
255      *                  pass the U_SUCCESS() test, or else the function returns
256      *                  immediately. Check for U_FAILURE() on output or use with
257      *                  function chaining. (See User Guide for details.)
258      * @return dest
259      * @stable ICU 4.6
260      */
261     virtual void
262     nameToUnicodeUTF8(StringPiece name, ByteSink &dest,
263                       IDNAInfo &info, UErrorCode &errorCode) const;
264 };
265 
266 class UTS46;
267 
268 /**
269  * Output container for IDNA processing errors.
270  * The IDNAInfo class is not suitable for subclassing.
271  * @stable ICU 4.6
272  */
273 class U_COMMON_API IDNAInfo : public UMemory {
274 public:
275     /**
276      * Constructor for stack allocation.
277      * @stable ICU 4.6
278      */
IDNAInfo()279     IDNAInfo() : errors(0), labelErrors(0), isTransDiff(false), isBiDi(false), isOkBiDi(true) {}
280     /**
281      * Were there IDNA processing errors?
282      * @return true if there were processing errors
283      * @stable ICU 4.6
284      */
hasErrors()285     UBool hasErrors() const { return errors!=0; }
286     /**
287      * Returns a bit set indicating IDNA processing errors.
288      * See UIDNA_ERROR_... constants in uidna.h.
289      * @return bit set of processing errors
290      * @stable ICU 4.6
291      */
getErrors()292     uint32_t getErrors() const { return errors; }
293     /**
294      * Returns true if transitional and nontransitional processing produce different results.
295      * This is the case when the input label or domain name contains
296      * one or more deviation characters outside a Punycode label (see UTS #46).
297      * <ul>
298      * <li>With nontransitional processing, such characters are
299      * copied to the destination string.
300      * <li>With transitional processing, such characters are
301      * mapped (sharp s/sigma) or removed (joiner/nonjoiner).
302      * </ul>
303      * @return true if transitional and nontransitional processing produce different results
304      * @stable ICU 4.6
305      */
isTransitionalDifferent()306     UBool isTransitionalDifferent() const { return isTransDiff; }
307 
308 private:
309     friend class UTS46;
310 
311     IDNAInfo(const IDNAInfo &other) = delete;  // no copying
312     IDNAInfo &operator=(const IDNAInfo &other) = delete;  // no copying
313 
reset()314     void reset() {
315         errors=labelErrors=0;
316         isTransDiff=false;
317         isBiDi=false;
318         isOkBiDi=true;
319     }
320 
321     uint32_t errors, labelErrors;
322     UBool isTransDiff;
323     UBool isBiDi;
324     UBool isOkBiDi;
325 };
326 
327 U_NAMESPACE_END
328 
329 #endif  // UCONFIG_NO_IDNA
330 
331 #endif /* U_SHOW_CPLUSPLUS_API */
332 
333 #endif  // __IDNA_H__
334