1 // Copyright (C) 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * Copyright (C) 2010-2012, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ******************************************************************************* 8 * file name: idna.h 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2010mar05 14 * created by: Markus W. Scherer 15 */ 16 17 #ifndef __IDNA_H__ 18 #define __IDNA_H__ 19 20 /** 21 * \file 22 * \brief C++ API: Internationalizing Domain Names in Applications (IDNA) 23 */ 24 25 #include "unicode/utypes.h" 26 27 #if !UCONFIG_NO_IDNA 28 29 #include "unicode/bytestream.h" 30 #include "unicode/stringpiece.h" 31 #include "unicode/uidna.h" 32 #include "unicode/unistr.h" 33 34 U_NAMESPACE_BEGIN 35 36 class IDNAInfo; 37 38 /** 39 * Abstract base class for IDNA processing. 40 * See http://www.unicode.org/reports/tr46/ 41 * and http://www.ietf.org/rfc/rfc3490.txt 42 * 43 * The IDNA class is not intended for public subclassing. 44 * 45 * This C++ API currently only implements UTS #46. 46 * The uidna.h C API implements both UTS #46 (functions using UIDNA service object) 47 * and IDNA2003 (functions that do not use a service object). 48 * @stable ICU 4.6 49 */ 50 class U_COMMON_API IDNA : public UObject { 51 public: 52 /** 53 * Destructor. 54 * @stable ICU 4.6 55 */ 56 ~IDNA(); 57 58 /** 59 * Returns an IDNA instance which implements UTS #46. 60 * Returns an unmodifiable instance, owned by the caller. 61 * Cache it for multiple operations, and delete it when done. 62 * The instance is thread-safe, that is, it can be used concurrently. 63 * 64 * UTS #46 defines Unicode IDNA Compatibility Processing, 65 * updated to the latest version of Unicode and compatible with both 66 * IDNA2003 and IDNA2008. 67 * 68 * The worker functions use transitional processing, including deviation mappings, 69 * unless UIDNA_NONTRANSITIONAL_TO_ASCII or UIDNA_NONTRANSITIONAL_TO_UNICODE 70 * is used in which case the deviation characters are passed through without change. 71 * 72 * Disallowed characters are mapped to U+FFFD. 73 * 74 * For available options see the uidna.h header. 75 * Operations with the UTS #46 instance do not support the 76 * UIDNA_ALLOW_UNASSIGNED option. 77 * 78 * By default, the UTS #46 implementation allows all ASCII characters (as valid or mapped). 79 * When the UIDNA_USE_STD3_RULES option is used, ASCII characters other than 80 * letters, digits, hyphen (LDH) and dot/full stop are disallowed and mapped to U+FFFD. 81 * 82 * @param options Bit set to modify the processing and error checking. 83 * See option bit set values in uidna.h. 84 * @param errorCode Standard ICU error code. Its input value must 85 * pass the U_SUCCESS() test, or else the function returns 86 * immediately. Check for U_FAILURE() on output or use with 87 * function chaining. (See User Guide for details.) 88 * @return the UTS #46 IDNA instance, if successful 89 * @stable ICU 4.6 90 */ 91 static IDNA * 92 createUTS46Instance(uint32_t options, UErrorCode &errorCode); 93 94 /** 95 * Converts a single domain name label into its ASCII form for DNS lookup. 96 * If any processing step fails, then info.hasErrors() will be TRUE and 97 * the result might not be an ASCII string. 98 * The label might be modified according to the types of errors. 99 * Labels with severe errors will be left in (or turned into) their Unicode form. 100 * 101 * The UErrorCode indicates an error only in exceptional cases, 102 * such as a U_MEMORY_ALLOCATION_ERROR. 103 * 104 * @param label Input domain name label 105 * @param dest Destination string object 106 * @param info Output container of IDNA processing details. 107 * @param errorCode Standard ICU error code. Its input value must 108 * pass the U_SUCCESS() test, or else the function returns 109 * immediately. Check for U_FAILURE() on output or use with 110 * function chaining. (See User Guide for details.) 111 * @return dest 112 * @stable ICU 4.6 113 */ 114 virtual UnicodeString & 115 labelToASCII(const UnicodeString &label, UnicodeString &dest, 116 IDNAInfo &info, UErrorCode &errorCode) const = 0; 117 118 /** 119 * Converts a single domain name label into its Unicode form for human-readable display. 120 * If any processing step fails, then info.hasErrors() will be TRUE. 121 * The label might be modified according to the types of errors. 122 * 123 * The UErrorCode indicates an error only in exceptional cases, 124 * such as a U_MEMORY_ALLOCATION_ERROR. 125 * 126 * @param label Input domain name label 127 * @param dest Destination string object 128 * @param info Output container of IDNA processing details. 129 * @param errorCode Standard ICU error code. Its input value must 130 * pass the U_SUCCESS() test, or else the function returns 131 * immediately. Check for U_FAILURE() on output or use with 132 * function chaining. (See User Guide for details.) 133 * @return dest 134 * @stable ICU 4.6 135 */ 136 virtual UnicodeString & 137 labelToUnicode(const UnicodeString &label, UnicodeString &dest, 138 IDNAInfo &info, UErrorCode &errorCode) const = 0; 139 140 /** 141 * Converts a whole domain name into its ASCII form for DNS lookup. 142 * If any processing step fails, then info.hasErrors() will be TRUE and 143 * the result might not be an ASCII string. 144 * The domain name might be modified according to the types of errors. 145 * Labels with severe errors will be left in (or turned into) their Unicode form. 146 * 147 * The UErrorCode indicates an error only in exceptional cases, 148 * such as a U_MEMORY_ALLOCATION_ERROR. 149 * 150 * @param name Input domain name 151 * @param dest Destination string object 152 * @param info Output container of IDNA processing details. 153 * @param errorCode Standard ICU error code. Its input value must 154 * pass the U_SUCCESS() test, or else the function returns 155 * immediately. Check for U_FAILURE() on output or use with 156 * function chaining. (See User Guide for details.) 157 * @return dest 158 * @stable ICU 4.6 159 */ 160 virtual UnicodeString & 161 nameToASCII(const UnicodeString &name, UnicodeString &dest, 162 IDNAInfo &info, UErrorCode &errorCode) const = 0; 163 164 /** 165 * Converts a whole domain name into its Unicode form for human-readable display. 166 * If any processing step fails, then info.hasErrors() will be TRUE. 167 * The domain name might be modified according to the types of errors. 168 * 169 * The UErrorCode indicates an error only in exceptional cases, 170 * such as a U_MEMORY_ALLOCATION_ERROR. 171 * 172 * @param name Input domain name 173 * @param dest Destination string object 174 * @param info Output container of IDNA processing details. 175 * @param errorCode Standard ICU error code. Its input value must 176 * pass the U_SUCCESS() test, or else the function returns 177 * immediately. Check for U_FAILURE() on output or use with 178 * function chaining. (See User Guide for details.) 179 * @return dest 180 * @stable ICU 4.6 181 */ 182 virtual UnicodeString & 183 nameToUnicode(const UnicodeString &name, UnicodeString &dest, 184 IDNAInfo &info, UErrorCode &errorCode) const = 0; 185 186 // UTF-8 versions of the processing methods ---------------------------- *** 187 188 /** 189 * Converts a single domain name label into its ASCII form for DNS lookup. 190 * UTF-8 version of labelToASCII(), same behavior. 191 * 192 * @param label Input domain name label 193 * @param dest Destination byte sink; Flush()ed if successful 194 * @param info Output container of IDNA processing details. 195 * @param errorCode Standard ICU error code. Its input value must 196 * pass the U_SUCCESS() test, or else the function returns 197 * immediately. Check for U_FAILURE() on output or use with 198 * function chaining. (See User Guide for details.) 199 * @return dest 200 * @stable ICU 4.6 201 */ 202 virtual void 203 labelToASCII_UTF8(StringPiece label, ByteSink &dest, 204 IDNAInfo &info, UErrorCode &errorCode) const; 205 206 /** 207 * Converts a single domain name label into its Unicode form for human-readable display. 208 * UTF-8 version of labelToUnicode(), same behavior. 209 * 210 * @param label Input domain name label 211 * @param dest Destination byte sink; Flush()ed if successful 212 * @param info Output container of IDNA processing details. 213 * @param errorCode Standard ICU error code. Its input value must 214 * pass the U_SUCCESS() test, or else the function returns 215 * immediately. Check for U_FAILURE() on output or use with 216 * function chaining. (See User Guide for details.) 217 * @return dest 218 * @stable ICU 4.6 219 */ 220 virtual void 221 labelToUnicodeUTF8(StringPiece label, ByteSink &dest, 222 IDNAInfo &info, UErrorCode &errorCode) const; 223 224 /** 225 * Converts a whole domain name into its ASCII form for DNS lookup. 226 * UTF-8 version of nameToASCII(), same behavior. 227 * 228 * @param name Input domain name 229 * @param dest Destination byte sink; Flush()ed if successful 230 * @param info Output container of IDNA processing details. 231 * @param errorCode Standard ICU error code. Its input value must 232 * pass the U_SUCCESS() test, or else the function returns 233 * immediately. Check for U_FAILURE() on output or use with 234 * function chaining. (See User Guide for details.) 235 * @return dest 236 * @stable ICU 4.6 237 */ 238 virtual void 239 nameToASCII_UTF8(StringPiece name, ByteSink &dest, 240 IDNAInfo &info, UErrorCode &errorCode) const; 241 242 /** 243 * Converts a whole domain name into its Unicode form for human-readable display. 244 * UTF-8 version of nameToUnicode(), same behavior. 245 * 246 * @param name Input domain name 247 * @param dest Destination byte sink; Flush()ed if successful 248 * @param info Output container of IDNA processing details. 249 * @param errorCode Standard ICU error code. Its input value must 250 * pass the U_SUCCESS() test, or else the function returns 251 * immediately. Check for U_FAILURE() on output or use with 252 * function chaining. (See User Guide for details.) 253 * @return dest 254 * @stable ICU 4.6 255 */ 256 virtual void 257 nameToUnicodeUTF8(StringPiece name, ByteSink &dest, 258 IDNAInfo &info, UErrorCode &errorCode) const; 259 }; 260 261 class UTS46; 262 263 /** 264 * Output container for IDNA processing errors. 265 * The IDNAInfo class is not suitable for subclassing. 266 * @stable ICU 4.6 267 */ 268 class U_COMMON_API IDNAInfo : public UMemory { 269 public: 270 /** 271 * Constructor for stack allocation. 272 * @stable ICU 4.6 273 */ IDNAInfo()274 IDNAInfo() : errors(0), labelErrors(0), isTransDiff(FALSE), isBiDi(FALSE), isOkBiDi(TRUE) {} 275 /** 276 * Were there IDNA processing errors? 277 * @return TRUE if there were processing errors 278 * @stable ICU 4.6 279 */ hasErrors()280 UBool hasErrors() const { return errors!=0; } 281 /** 282 * Returns a bit set indicating IDNA processing errors. 283 * See UIDNA_ERROR_... constants in uidna.h. 284 * @return bit set of processing errors 285 * @stable ICU 4.6 286 */ getErrors()287 uint32_t getErrors() const { return errors; } 288 /** 289 * Returns TRUE if transitional and nontransitional processing produce different results. 290 * This is the case when the input label or domain name contains 291 * one or more deviation characters outside a Punycode label (see UTS #46). 292 * <ul> 293 * <li>With nontransitional processing, such characters are 294 * copied to the destination string. 295 * <li>With transitional processing, such characters are 296 * mapped (sharp s/sigma) or removed (joiner/nonjoiner). 297 * </ul> 298 * @return TRUE if transitional and nontransitional processing produce different results 299 * @stable ICU 4.6 300 */ isTransitionalDifferent()301 UBool isTransitionalDifferent() const { return isTransDiff; } 302 303 private: 304 friend class UTS46; 305 306 IDNAInfo(const IDNAInfo &other); // no copying 307 IDNAInfo &operator=(const IDNAInfo &other); // no copying 308 reset()309 void reset() { 310 errors=labelErrors=0; 311 isTransDiff=FALSE; 312 isBiDi=FALSE; 313 isOkBiDi=TRUE; 314 } 315 316 uint32_t errors, labelErrors; 317 UBool isTransDiff; 318 UBool isBiDi; 319 UBool isOkBiDi; 320 }; 321 322 U_NAMESPACE_END 323 324 #endif // UCONFIG_NO_IDNA 325 #endif // __IDNA_H__ 326