1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * Copyright (C) 2010-2012, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ******************************************************************************* 8 * file name: idna.h 9 * encoding: UTF-8 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2010mar05 14 * created by: Markus W. Scherer 15 */ 16 17 #ifndef __IDNA_H__ 18 #define __IDNA_H__ 19 20 /** 21 * \file 22 * \brief C++ API: Internationalizing Domain Names in Applications (IDNA) 23 */ 24 25 #include "unicode/utypes.h" 26 27 #if U_SHOW_CPLUSPLUS_API 28 29 #if !UCONFIG_NO_IDNA 30 31 #include "unicode/bytestream.h" 32 #include "unicode/stringpiece.h" 33 #include "unicode/uidna.h" 34 #include "unicode/unistr.h" 35 36 U_NAMESPACE_BEGIN 37 38 class IDNAInfo; 39 40 /** 41 * Abstract base class for IDNA processing. 42 * See http://www.unicode.org/reports/tr46/ 43 * and http://www.ietf.org/rfc/rfc3490.txt 44 * 45 * The IDNA class is not intended for public subclassing. 46 * 47 * This C++ API currently only implements UTS #46. 48 * The uidna.h C API implements both UTS #46 (functions using UIDNA service object) 49 * and IDNA2003 (functions that do not use a service object). 50 * @stable ICU 4.6 51 */ 52 class U_COMMON_API IDNA : public UObject { 53 public: 54 /** 55 * Destructor. 56 * @stable ICU 4.6 57 */ 58 ~IDNA(); 59 60 /** 61 * Returns an IDNA instance which implements UTS #46. 62 * Returns an unmodifiable instance, owned by the caller. 63 * Cache it for multiple operations, and delete it when done. 64 * The instance is thread-safe, that is, it can be used concurrently. 65 * 66 * UTS #46 defines Unicode IDNA Compatibility Processing, 67 * updated to the latest version of Unicode and compatible with both 68 * IDNA2003 and IDNA2008. 69 * 70 * The worker functions use transitional processing, including deviation mappings, 71 * unless UIDNA_NONTRANSITIONAL_TO_ASCII or UIDNA_NONTRANSITIONAL_TO_UNICODE 72 * is used in which case the deviation characters are passed through without change. 73 * 74 * Disallowed characters are mapped to U+FFFD. 75 * 76 * For available options see the uidna.h header. 77 * Operations with the UTS #46 instance do not support the 78 * UIDNA_ALLOW_UNASSIGNED option. 79 * 80 * By default, the UTS #46 implementation allows all ASCII characters (as valid or mapped). 81 * When the UIDNA_USE_STD3_RULES option is used, ASCII characters other than 82 * letters, digits, hyphen (LDH) and dot/full stop are disallowed and mapped to U+FFFD. 83 * 84 * @param options Bit set to modify the processing and error checking. 85 * See option bit set values in uidna.h. 86 * @param errorCode Standard ICU error code. Its input value must 87 * pass the U_SUCCESS() test, or else the function returns 88 * immediately. Check for U_FAILURE() on output or use with 89 * function chaining. (See User Guide for details.) 90 * @return the UTS #46 IDNA instance, if successful 91 * @stable ICU 4.6 92 */ 93 static IDNA * 94 createUTS46Instance(uint32_t options, UErrorCode &errorCode); 95 96 /** 97 * Converts a single domain name label into its ASCII form for DNS lookup. 98 * If any processing step fails, then info.hasErrors() will be true and 99 * the result might not be an ASCII string. 100 * The label might be modified according to the types of errors. 101 * Labels with severe errors will be left in (or turned into) their Unicode form. 102 * 103 * The UErrorCode indicates an error only in exceptional cases, 104 * such as a U_MEMORY_ALLOCATION_ERROR. 105 * 106 * @param label Input domain name label 107 * @param dest Destination string object 108 * @param info Output container of IDNA processing details. 109 * @param errorCode Standard ICU error code. Its input value must 110 * pass the U_SUCCESS() test, or else the function returns 111 * immediately. Check for U_FAILURE() on output or use with 112 * function chaining. (See User Guide for details.) 113 * @return dest 114 * @stable ICU 4.6 115 */ 116 virtual UnicodeString & 117 labelToASCII(const UnicodeString &label, UnicodeString &dest, 118 IDNAInfo &info, UErrorCode &errorCode) const = 0; 119 120 /** 121 * Converts a single domain name label into its Unicode form for human-readable display. 122 * If any processing step fails, then info.hasErrors() will be true. 123 * The label might be modified according to the types of errors. 124 * 125 * The UErrorCode indicates an error only in exceptional cases, 126 * such as a U_MEMORY_ALLOCATION_ERROR. 127 * 128 * @param label Input domain name label 129 * @param dest Destination string object 130 * @param info Output container of IDNA processing details. 131 * @param errorCode Standard ICU error code. Its input value must 132 * pass the U_SUCCESS() test, or else the function returns 133 * immediately. Check for U_FAILURE() on output or use with 134 * function chaining. (See User Guide for details.) 135 * @return dest 136 * @stable ICU 4.6 137 */ 138 virtual UnicodeString & 139 labelToUnicode(const UnicodeString &label, UnicodeString &dest, 140 IDNAInfo &info, UErrorCode &errorCode) const = 0; 141 142 /** 143 * Converts a whole domain name into its ASCII form for DNS lookup. 144 * If any processing step fails, then info.hasErrors() will be true and 145 * the result might not be an ASCII string. 146 * The domain name might be modified according to the types of errors. 147 * Labels with severe errors will be left in (or turned into) their Unicode form. 148 * 149 * The UErrorCode indicates an error only in exceptional cases, 150 * such as a U_MEMORY_ALLOCATION_ERROR. 151 * 152 * @param name Input domain name 153 * @param dest Destination string object 154 * @param info Output container of IDNA processing details. 155 * @param errorCode Standard ICU error code. Its input value must 156 * pass the U_SUCCESS() test, or else the function returns 157 * immediately. Check for U_FAILURE() on output or use with 158 * function chaining. (See User Guide for details.) 159 * @return dest 160 * @stable ICU 4.6 161 */ 162 virtual UnicodeString & 163 nameToASCII(const UnicodeString &name, UnicodeString &dest, 164 IDNAInfo &info, UErrorCode &errorCode) const = 0; 165 166 /** 167 * Converts a whole domain name into its Unicode form for human-readable display. 168 * If any processing step fails, then info.hasErrors() will be true. 169 * The domain name might be modified according to the types of errors. 170 * 171 * The UErrorCode indicates an error only in exceptional cases, 172 * such as a U_MEMORY_ALLOCATION_ERROR. 173 * 174 * @param name Input domain name 175 * @param dest Destination string object 176 * @param info Output container of IDNA processing details. 177 * @param errorCode Standard ICU error code. Its input value must 178 * pass the U_SUCCESS() test, or else the function returns 179 * immediately. Check for U_FAILURE() on output or use with 180 * function chaining. (See User Guide for details.) 181 * @return dest 182 * @stable ICU 4.6 183 */ 184 virtual UnicodeString & 185 nameToUnicode(const UnicodeString &name, UnicodeString &dest, 186 IDNAInfo &info, UErrorCode &errorCode) const = 0; 187 188 // UTF-8 versions of the processing methods ---------------------------- *** 189 190 /** 191 * Converts a single domain name label into its ASCII form for DNS lookup. 192 * UTF-8 version of labelToASCII(), same behavior. 193 * 194 * @param label Input domain name label 195 * @param dest Destination byte sink; Flush()ed if successful 196 * @param info Output container of IDNA processing details. 197 * @param errorCode Standard ICU error code. Its input value must 198 * pass the U_SUCCESS() test, or else the function returns 199 * immediately. Check for U_FAILURE() on output or use with 200 * function chaining. (See User Guide for details.) 201 * @return dest 202 * @stable ICU 4.6 203 */ 204 virtual void 205 labelToASCII_UTF8(StringPiece label, ByteSink &dest, 206 IDNAInfo &info, UErrorCode &errorCode) const; 207 208 /** 209 * Converts a single domain name label into its Unicode form for human-readable display. 210 * UTF-8 version of labelToUnicode(), same behavior. 211 * 212 * @param label Input domain name label 213 * @param dest Destination byte sink; Flush()ed if successful 214 * @param info Output container of IDNA processing details. 215 * @param errorCode Standard ICU error code. Its input value must 216 * pass the U_SUCCESS() test, or else the function returns 217 * immediately. Check for U_FAILURE() on output or use with 218 * function chaining. (See User Guide for details.) 219 * @return dest 220 * @stable ICU 4.6 221 */ 222 virtual void 223 labelToUnicodeUTF8(StringPiece label, ByteSink &dest, 224 IDNAInfo &info, UErrorCode &errorCode) const; 225 226 /** 227 * Converts a whole domain name into its ASCII form for DNS lookup. 228 * UTF-8 version of nameToASCII(), same behavior. 229 * 230 * @param name Input domain name 231 * @param dest Destination byte sink; Flush()ed if successful 232 * @param info Output container of IDNA processing details. 233 * @param errorCode Standard ICU error code. Its input value must 234 * pass the U_SUCCESS() test, or else the function returns 235 * immediately. Check for U_FAILURE() on output or use with 236 * function chaining. (See User Guide for details.) 237 * @return dest 238 * @stable ICU 4.6 239 */ 240 virtual void 241 nameToASCII_UTF8(StringPiece name, ByteSink &dest, 242 IDNAInfo &info, UErrorCode &errorCode) const; 243 244 /** 245 * Converts a whole domain name into its Unicode form for human-readable display. 246 * UTF-8 version of nameToUnicode(), same behavior. 247 * 248 * @param name Input domain name 249 * @param dest Destination byte sink; Flush()ed if successful 250 * @param info Output container of IDNA processing details. 251 * @param errorCode Standard ICU error code. Its input value must 252 * pass the U_SUCCESS() test, or else the function returns 253 * immediately. Check for U_FAILURE() on output or use with 254 * function chaining. (See User Guide for details.) 255 * @return dest 256 * @stable ICU 4.6 257 */ 258 virtual void 259 nameToUnicodeUTF8(StringPiece name, ByteSink &dest, 260 IDNAInfo &info, UErrorCode &errorCode) const; 261 }; 262 263 class UTS46; 264 265 /** 266 * Output container for IDNA processing errors. 267 * The IDNAInfo class is not suitable for subclassing. 268 * @stable ICU 4.6 269 */ 270 class U_COMMON_API IDNAInfo : public UMemory { 271 public: 272 /** 273 * Constructor for stack allocation. 274 * @stable ICU 4.6 275 */ IDNAInfo()276 IDNAInfo() : errors(0), labelErrors(0), isTransDiff(false), isBiDi(false), isOkBiDi(true) {} 277 /** 278 * Were there IDNA processing errors? 279 * @return true if there were processing errors 280 * @stable ICU 4.6 281 */ hasErrors()282 UBool hasErrors() const { return errors!=0; } 283 /** 284 * Returns a bit set indicating IDNA processing errors. 285 * See UIDNA_ERROR_... constants in uidna.h. 286 * @return bit set of processing errors 287 * @stable ICU 4.6 288 */ getErrors()289 uint32_t getErrors() const { return errors; } 290 /** 291 * Returns true if transitional and nontransitional processing produce different results. 292 * This is the case when the input label or domain name contains 293 * one or more deviation characters outside a Punycode label (see UTS #46). 294 * <ul> 295 * <li>With nontransitional processing, such characters are 296 * copied to the destination string. 297 * <li>With transitional processing, such characters are 298 * mapped (sharp s/sigma) or removed (joiner/nonjoiner). 299 * </ul> 300 * @return true if transitional and nontransitional processing produce different results 301 * @stable ICU 4.6 302 */ isTransitionalDifferent()303 UBool isTransitionalDifferent() const { return isTransDiff; } 304 305 private: 306 friend class UTS46; 307 308 IDNAInfo(const IDNAInfo &other); // no copying 309 IDNAInfo &operator=(const IDNAInfo &other); // no copying 310 reset()311 void reset() { 312 errors=labelErrors=0; 313 isTransDiff=false; 314 isBiDi=false; 315 isOkBiDi=true; 316 } 317 318 uint32_t errors, labelErrors; 319 UBool isTransDiff; 320 UBool isBiDi; 321 UBool isOkBiDi; 322 }; 323 324 U_NAMESPACE_END 325 326 #endif // UCONFIG_NO_IDNA 327 328 #endif /* U_SHOW_CPLUSPLUS_API */ 329 330 #endif // __IDNA_H__ 331