1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * Copyright (C) 2010-2012, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ******************************************************************************* 8 * file name: idna.h 9 * encoding: UTF-8 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2010mar05 14 * created by: Markus W. Scherer 15 */ 16 17 #ifndef __IDNA_H__ 18 #define __IDNA_H__ 19 20 /** 21 * \file 22 * \brief C++ API: Internationalizing Domain Names in Applications (IDNA) 23 */ 24 25 #include "unicode/utypes.h" 26 27 #if U_SHOW_CPLUSPLUS_API 28 29 #if !UCONFIG_NO_IDNA 30 31 #include "unicode/bytestream.h" 32 #include "unicode/stringpiece.h" 33 #include "unicode/uidna.h" 34 #include "unicode/unistr.h" 35 36 U_NAMESPACE_BEGIN 37 38 class IDNAInfo; 39 40 /** 41 * Abstract base class for IDNA processing. 42 * See http://www.unicode.org/reports/tr46/ 43 * and http://www.ietf.org/rfc/rfc3490.txt 44 * 45 * The IDNA class is not intended for public subclassing. 46 * 47 * This C++ API currently only implements UTS #46. 48 * The uidna.h C API implements both UTS #46 (functions using UIDNA service object) 49 * and IDNA2003 (functions that do not use a service object). 50 * @stable ICU 4.6 51 */ 52 class U_COMMON_API IDNA : public UObject { 53 public: 54 /** 55 * Destructor. 56 * @stable ICU 4.6 57 */ 58 ~IDNA(); 59 60 /** 61 * Returns an IDNA instance which implements UTS #46. 62 * Returns an unmodifiable instance, owned by the caller. 63 * Cache it for multiple operations, and delete it when done. 64 * The instance is thread-safe, that is, it can be used concurrently. 65 * 66 * UTS #46 defines Unicode IDNA Compatibility Processing, 67 * updated to the latest version of Unicode and compatible with both 68 * IDNA2003 and IDNA2008. 69 * 70 * The worker functions use transitional processing, including deviation mappings, 71 * unless UIDNA_NONTRANSITIONAL_TO_ASCII or UIDNA_NONTRANSITIONAL_TO_UNICODE 72 * is used in which case the deviation characters are passed through without change. 73 * <b>Unicode 15.1 UTS #46 deprecated transitional processing.</b> 74 * 75 * Disallowed characters are mapped to U+FFFD. 76 * 77 * For available options see the uidna.h header. 78 * Operations with the UTS #46 instance do not support the 79 * UIDNA_ALLOW_UNASSIGNED option. 80 * 81 * By default, the UTS #46 implementation allows all ASCII characters (as valid or mapped). 82 * When the UIDNA_USE_STD3_RULES option is used, ASCII characters other than 83 * letters, digits, hyphen (LDH) and dot/full stop are disallowed and mapped to U+FFFD. 84 * 85 * @param options Bit set to modify the processing and error checking. 86 * These should include UIDNA_DEFAULT, or 87 * UIDNA_NONTRANSITIONAL_TO_ASCII | UIDNA_NONTRANSITIONAL_TO_UNICODE. 88 * See option bit set values in uidna.h. 89 * @param errorCode Standard ICU error code. Its input value must 90 * pass the U_SUCCESS() test, or else the function returns 91 * immediately. Check for U_FAILURE() on output or use with 92 * function chaining. (See User Guide for details.) 93 * @return the UTS #46 IDNA instance, if successful 94 * @stable ICU 4.6 95 */ 96 static IDNA * 97 createUTS46Instance(uint32_t options, UErrorCode &errorCode); 98 99 /** 100 * Converts a single domain name label into its ASCII form for DNS lookup. 101 * If any processing step fails, then info.hasErrors() will be true and 102 * the result might not be an ASCII string. 103 * The label might be modified according to the types of errors. 104 * Labels with severe errors will be left in (or turned into) their Unicode form. 105 * 106 * The UErrorCode indicates an error only in exceptional cases, 107 * such as a U_MEMORY_ALLOCATION_ERROR. 108 * 109 * @param label Input domain name label 110 * @param dest Destination string object 111 * @param info Output container of IDNA processing details. 112 * @param errorCode Standard ICU error code. Its input value must 113 * pass the U_SUCCESS() test, or else the function returns 114 * immediately. Check for U_FAILURE() on output or use with 115 * function chaining. (See User Guide for details.) 116 * @return dest 117 * @stable ICU 4.6 118 */ 119 virtual UnicodeString & 120 labelToASCII(const UnicodeString &label, UnicodeString &dest, 121 IDNAInfo &info, UErrorCode &errorCode) const = 0; 122 123 /** 124 * Converts a single domain name label into its Unicode form for human-readable display. 125 * If any processing step fails, then info.hasErrors() will be true. 126 * The label might be modified according to the types of errors. 127 * 128 * The UErrorCode indicates an error only in exceptional cases, 129 * such as a U_MEMORY_ALLOCATION_ERROR. 130 * 131 * @param label Input domain name label 132 * @param dest Destination string object 133 * @param info Output container of IDNA processing details. 134 * @param errorCode Standard ICU error code. Its input value must 135 * pass the U_SUCCESS() test, or else the function returns 136 * immediately. Check for U_FAILURE() on output or use with 137 * function chaining. (See User Guide for details.) 138 * @return dest 139 * @stable ICU 4.6 140 */ 141 virtual UnicodeString & 142 labelToUnicode(const UnicodeString &label, UnicodeString &dest, 143 IDNAInfo &info, UErrorCode &errorCode) const = 0; 144 145 /** 146 * Converts a whole domain name into its ASCII form for DNS lookup. 147 * If any processing step fails, then info.hasErrors() will be true and 148 * the result might not be an ASCII string. 149 * The domain name might be modified according to the types of errors. 150 * Labels with severe errors will be left in (or turned into) their Unicode form. 151 * 152 * The UErrorCode indicates an error only in exceptional cases, 153 * such as a U_MEMORY_ALLOCATION_ERROR. 154 * 155 * @param name Input domain name 156 * @param dest Destination string object 157 * @param info Output container of IDNA processing details. 158 * @param errorCode Standard ICU error code. Its input value must 159 * pass the U_SUCCESS() test, or else the function returns 160 * immediately. Check for U_FAILURE() on output or use with 161 * function chaining. (See User Guide for details.) 162 * @return dest 163 * @stable ICU 4.6 164 */ 165 virtual UnicodeString & 166 nameToASCII(const UnicodeString &name, UnicodeString &dest, 167 IDNAInfo &info, UErrorCode &errorCode) const = 0; 168 169 /** 170 * Converts a whole domain name into its Unicode form for human-readable display. 171 * If any processing step fails, then info.hasErrors() will be true. 172 * The domain name might be modified according to the types of errors. 173 * 174 * The UErrorCode indicates an error only in exceptional cases, 175 * such as a U_MEMORY_ALLOCATION_ERROR. 176 * 177 * @param name Input domain name 178 * @param dest Destination string object 179 * @param info Output container of IDNA processing details. 180 * @param errorCode Standard ICU error code. Its input value must 181 * pass the U_SUCCESS() test, or else the function returns 182 * immediately. Check for U_FAILURE() on output or use with 183 * function chaining. (See User Guide for details.) 184 * @return dest 185 * @stable ICU 4.6 186 */ 187 virtual UnicodeString & 188 nameToUnicode(const UnicodeString &name, UnicodeString &dest, 189 IDNAInfo &info, UErrorCode &errorCode) const = 0; 190 191 // UTF-8 versions of the processing methods ---------------------------- *** 192 193 /** 194 * Converts a single domain name label into its ASCII form for DNS lookup. 195 * UTF-8 version of labelToASCII(), same behavior. 196 * 197 * @param label Input domain name label 198 * @param dest Destination byte sink; Flush()ed if successful 199 * @param info Output container of IDNA processing details. 200 * @param errorCode Standard ICU error code. Its input value must 201 * pass the U_SUCCESS() test, or else the function returns 202 * immediately. Check for U_FAILURE() on output or use with 203 * function chaining. (See User Guide for details.) 204 * @return dest 205 * @stable ICU 4.6 206 */ 207 virtual void 208 labelToASCII_UTF8(StringPiece label, ByteSink &dest, 209 IDNAInfo &info, UErrorCode &errorCode) const; 210 211 /** 212 * Converts a single domain name label into its Unicode form for human-readable display. 213 * UTF-8 version of labelToUnicode(), same behavior. 214 * 215 * @param label Input domain name label 216 * @param dest Destination byte sink; Flush()ed if successful 217 * @param info Output container of IDNA processing details. 218 * @param errorCode Standard ICU error code. Its input value must 219 * pass the U_SUCCESS() test, or else the function returns 220 * immediately. Check for U_FAILURE() on output or use with 221 * function chaining. (See User Guide for details.) 222 * @return dest 223 * @stable ICU 4.6 224 */ 225 virtual void 226 labelToUnicodeUTF8(StringPiece label, ByteSink &dest, 227 IDNAInfo &info, UErrorCode &errorCode) const; 228 229 /** 230 * Converts a whole domain name into its ASCII form for DNS lookup. 231 * UTF-8 version of nameToASCII(), same behavior. 232 * 233 * @param name Input domain name 234 * @param dest Destination byte sink; Flush()ed if successful 235 * @param info Output container of IDNA processing details. 236 * @param errorCode Standard ICU error code. Its input value must 237 * pass the U_SUCCESS() test, or else the function returns 238 * immediately. Check for U_FAILURE() on output or use with 239 * function chaining. (See User Guide for details.) 240 * @return dest 241 * @stable ICU 4.6 242 */ 243 virtual void 244 nameToASCII_UTF8(StringPiece name, ByteSink &dest, 245 IDNAInfo &info, UErrorCode &errorCode) const; 246 247 /** 248 * Converts a whole domain name into its Unicode form for human-readable display. 249 * UTF-8 version of nameToUnicode(), same behavior. 250 * 251 * @param name Input domain name 252 * @param dest Destination byte sink; Flush()ed if successful 253 * @param info Output container of IDNA processing details. 254 * @param errorCode Standard ICU error code. Its input value must 255 * pass the U_SUCCESS() test, or else the function returns 256 * immediately. Check for U_FAILURE() on output or use with 257 * function chaining. (See User Guide for details.) 258 * @return dest 259 * @stable ICU 4.6 260 */ 261 virtual void 262 nameToUnicodeUTF8(StringPiece name, ByteSink &dest, 263 IDNAInfo &info, UErrorCode &errorCode) const; 264 }; 265 266 class UTS46; 267 268 /** 269 * Output container for IDNA processing errors. 270 * The IDNAInfo class is not suitable for subclassing. 271 * @stable ICU 4.6 272 */ 273 class U_COMMON_API IDNAInfo : public UMemory { 274 public: 275 /** 276 * Constructor for stack allocation. 277 * @stable ICU 4.6 278 */ IDNAInfo()279 IDNAInfo() : errors(0), labelErrors(0), isTransDiff(false), isBiDi(false), isOkBiDi(true) {} 280 /** 281 * Were there IDNA processing errors? 282 * @return true if there were processing errors 283 * @stable ICU 4.6 284 */ hasErrors()285 UBool hasErrors() const { return errors!=0; } 286 /** 287 * Returns a bit set indicating IDNA processing errors. 288 * See UIDNA_ERROR_... constants in uidna.h. 289 * @return bit set of processing errors 290 * @stable ICU 4.6 291 */ getErrors()292 uint32_t getErrors() const { return errors; } 293 /** 294 * Returns true if transitional and nontransitional processing produce different results. 295 * This is the case when the input label or domain name contains 296 * one or more deviation characters outside a Punycode label (see UTS #46). 297 * <ul> 298 * <li>With nontransitional processing, such characters are 299 * copied to the destination string. 300 * <li>With transitional processing, such characters are 301 * mapped (sharp s/sigma) or removed (joiner/nonjoiner). 302 * </ul> 303 * @return true if transitional and nontransitional processing produce different results 304 * @stable ICU 4.6 305 */ isTransitionalDifferent()306 UBool isTransitionalDifferent() const { return isTransDiff; } 307 308 private: 309 friend class UTS46; 310 311 IDNAInfo(const IDNAInfo &other) = delete; // no copying 312 IDNAInfo &operator=(const IDNAInfo &other) = delete; // no copying 313 reset()314 void reset() { 315 errors=labelErrors=0; 316 isTransDiff=false; 317 isBiDi=false; 318 isOkBiDi=true; 319 } 320 321 uint32_t errors, labelErrors; 322 UBool isTransDiff; 323 UBool isBiDi; 324 UBool isOkBiDi; 325 }; 326 327 U_NAMESPACE_END 328 329 #endif // UCONFIG_NO_IDNA 330 331 #endif /* U_SHOW_CPLUSPLUS_API */ 332 333 #endif // __IDNA_H__ 334