1 /* 2 ******************************************************************************* 3 * Copyright (C) 2010-2012, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ******************************************************************************* 6 * file name: idna.h 7 * encoding: US-ASCII 8 * tab size: 8 (not used) 9 * indentation:4 10 * 11 * created on: 2010mar05 12 * created by: Markus W. Scherer 13 */ 14 15 #ifndef __IDNA_H__ 16 #define __IDNA_H__ 17 18 /** 19 * \file 20 * \brief C++ API: Internationalizing Domain Names in Applications (IDNA) 21 */ 22 23 #include "unicode/utypes.h" 24 25 #if !UCONFIG_NO_IDNA 26 27 #include "unicode/bytestream.h" 28 #include "unicode/stringpiece.h" 29 #include "unicode/uidna.h" 30 #include "unicode/unistr.h" 31 32 U_NAMESPACE_BEGIN 33 34 class IDNAInfo; 35 36 /** 37 * Abstract base class for IDNA processing. 38 * See http://www.unicode.org/reports/tr46/ 39 * and http://www.ietf.org/rfc/rfc3490.txt 40 * 41 * The IDNA class is not intended for public subclassing. 42 * 43 * This C++ API currently only implements UTS #46. 44 * The uidna.h C API implements both UTS #46 (functions using UIDNA service object) 45 * and IDNA2003 (functions that do not use a service object). 46 * @stable ICU 4.6 47 */ 48 class U_COMMON_API IDNA : public UObject { 49 public: 50 /** 51 * Destructor. 52 * @stable ICU 4.6 53 */ 54 ~IDNA(); 55 56 /** 57 * Returns an IDNA instance which implements UTS #46. 58 * Returns an unmodifiable instance, owned by the caller. 59 * Cache it for multiple operations, and delete it when done. 60 * The instance is thread-safe, that is, it can be used concurrently. 61 * 62 * UTS #46 defines Unicode IDNA Compatibility Processing, 63 * updated to the latest version of Unicode and compatible with both 64 * IDNA2003 and IDNA2008. 65 * 66 * The worker functions use transitional processing, including deviation mappings, 67 * unless UIDNA_NONTRANSITIONAL_TO_ASCII or UIDNA_NONTRANSITIONAL_TO_UNICODE 68 * is used in which case the deviation characters are passed through without change. 69 * 70 * Disallowed characters are mapped to U+FFFD. 71 * 72 * For available options see the uidna.h header. 73 * Operations with the UTS #46 instance do not support the 74 * UIDNA_ALLOW_UNASSIGNED option. 75 * 76 * By default, the UTS #46 implementation allows all ASCII characters (as valid or mapped). 77 * When the UIDNA_USE_STD3_RULES option is used, ASCII characters other than 78 * letters, digits, hyphen (LDH) and dot/full stop are disallowed and mapped to U+FFFD. 79 * 80 * @param options Bit set to modify the processing and error checking. 81 * See option bit set values in uidna.h. 82 * @param errorCode Standard ICU error code. Its input value must 83 * pass the U_SUCCESS() test, or else the function returns 84 * immediately. Check for U_FAILURE() on output or use with 85 * function chaining. (See User Guide for details.) 86 * @return the UTS #46 IDNA instance, if successful 87 * @stable ICU 4.6 88 */ 89 static IDNA * 90 createUTS46Instance(uint32_t options, UErrorCode &errorCode); 91 92 /** 93 * Converts a single domain name label into its ASCII form for DNS lookup. 94 * If any processing step fails, then info.hasErrors() will be TRUE and 95 * the result might not be an ASCII string. 96 * The label might be modified according to the types of errors. 97 * Labels with severe errors will be left in (or turned into) their Unicode form. 98 * 99 * The UErrorCode indicates an error only in exceptional cases, 100 * such as a U_MEMORY_ALLOCATION_ERROR. 101 * 102 * @param label Input domain name label 103 * @param dest Destination string object 104 * @param info Output container of IDNA processing details. 105 * @param errorCode Standard ICU error code. Its input value must 106 * pass the U_SUCCESS() test, or else the function returns 107 * immediately. Check for U_FAILURE() on output or use with 108 * function chaining. (See User Guide for details.) 109 * @return dest 110 * @stable ICU 4.6 111 */ 112 virtual UnicodeString & 113 labelToASCII(const UnicodeString &label, UnicodeString &dest, 114 IDNAInfo &info, UErrorCode &errorCode) const = 0; 115 116 /** 117 * Converts a single domain name label into its Unicode form for human-readable display. 118 * If any processing step fails, then info.hasErrors() will be TRUE. 119 * The label might be modified according to the types of errors. 120 * 121 * The UErrorCode indicates an error only in exceptional cases, 122 * such as a U_MEMORY_ALLOCATION_ERROR. 123 * 124 * @param label Input domain name label 125 * @param dest Destination string object 126 * @param info Output container of IDNA processing details. 127 * @param errorCode Standard ICU error code. Its input value must 128 * pass the U_SUCCESS() test, or else the function returns 129 * immediately. Check for U_FAILURE() on output or use with 130 * function chaining. (See User Guide for details.) 131 * @return dest 132 * @stable ICU 4.6 133 */ 134 virtual UnicodeString & 135 labelToUnicode(const UnicodeString &label, UnicodeString &dest, 136 IDNAInfo &info, UErrorCode &errorCode) const = 0; 137 138 /** 139 * Converts a whole domain name into its ASCII form for DNS lookup. 140 * If any processing step fails, then info.hasErrors() will be TRUE and 141 * the result might not be an ASCII string. 142 * The domain name might be modified according to the types of errors. 143 * Labels with severe errors will be left in (or turned into) their Unicode form. 144 * 145 * The UErrorCode indicates an error only in exceptional cases, 146 * such as a U_MEMORY_ALLOCATION_ERROR. 147 * 148 * @param name Input domain name 149 * @param dest Destination string object 150 * @param info Output container of IDNA processing details. 151 * @param errorCode Standard ICU error code. Its input value must 152 * pass the U_SUCCESS() test, or else the function returns 153 * immediately. Check for U_FAILURE() on output or use with 154 * function chaining. (See User Guide for details.) 155 * @return dest 156 * @stable ICU 4.6 157 */ 158 virtual UnicodeString & 159 nameToASCII(const UnicodeString &name, UnicodeString &dest, 160 IDNAInfo &info, UErrorCode &errorCode) const = 0; 161 162 /** 163 * Converts a whole domain name into its Unicode form for human-readable display. 164 * If any processing step fails, then info.hasErrors() will be TRUE. 165 * The domain name might be modified according to the types of errors. 166 * 167 * The UErrorCode indicates an error only in exceptional cases, 168 * such as a U_MEMORY_ALLOCATION_ERROR. 169 * 170 * @param name Input domain name 171 * @param dest Destination string object 172 * @param info Output container of IDNA processing details. 173 * @param errorCode Standard ICU error code. Its input value must 174 * pass the U_SUCCESS() test, or else the function returns 175 * immediately. Check for U_FAILURE() on output or use with 176 * function chaining. (See User Guide for details.) 177 * @return dest 178 * @stable ICU 4.6 179 */ 180 virtual UnicodeString & 181 nameToUnicode(const UnicodeString &name, UnicodeString &dest, 182 IDNAInfo &info, UErrorCode &errorCode) const = 0; 183 184 // UTF-8 versions of the processing methods ---------------------------- *** 185 186 /** 187 * Converts a single domain name label into its ASCII form for DNS lookup. 188 * UTF-8 version of labelToASCII(), same behavior. 189 * 190 * @param label Input domain name label 191 * @param dest Destination byte sink; Flush()ed if successful 192 * @param info Output container of IDNA processing details. 193 * @param errorCode Standard ICU error code. Its input value must 194 * pass the U_SUCCESS() test, or else the function returns 195 * immediately. Check for U_FAILURE() on output or use with 196 * function chaining. (See User Guide for details.) 197 * @return dest 198 * @stable ICU 4.6 199 */ 200 virtual void 201 labelToASCII_UTF8(const StringPiece &label, ByteSink &dest, 202 IDNAInfo &info, UErrorCode &errorCode) const; 203 204 /** 205 * Converts a single domain name label into its Unicode form for human-readable display. 206 * UTF-8 version of labelToUnicode(), same behavior. 207 * 208 * @param label Input domain name label 209 * @param dest Destination byte sink; Flush()ed if successful 210 * @param info Output container of IDNA processing details. 211 * @param errorCode Standard ICU error code. Its input value must 212 * pass the U_SUCCESS() test, or else the function returns 213 * immediately. Check for U_FAILURE() on output or use with 214 * function chaining. (See User Guide for details.) 215 * @return dest 216 * @stable ICU 4.6 217 */ 218 virtual void 219 labelToUnicodeUTF8(const StringPiece &label, ByteSink &dest, 220 IDNAInfo &info, UErrorCode &errorCode) const; 221 222 /** 223 * Converts a whole domain name into its ASCII form for DNS lookup. 224 * UTF-8 version of nameToASCII(), same behavior. 225 * 226 * @param name Input domain name 227 * @param dest Destination byte sink; Flush()ed if successful 228 * @param info Output container of IDNA processing details. 229 * @param errorCode Standard ICU error code. Its input value must 230 * pass the U_SUCCESS() test, or else the function returns 231 * immediately. Check for U_FAILURE() on output or use with 232 * function chaining. (See User Guide for details.) 233 * @return dest 234 * @stable ICU 4.6 235 */ 236 virtual void 237 nameToASCII_UTF8(const StringPiece &name, ByteSink &dest, 238 IDNAInfo &info, UErrorCode &errorCode) const; 239 240 /** 241 * Converts a whole domain name into its Unicode form for human-readable display. 242 * UTF-8 version of nameToUnicode(), same behavior. 243 * 244 * @param name Input domain name 245 * @param dest Destination byte sink; Flush()ed if successful 246 * @param info Output container of IDNA processing details. 247 * @param errorCode Standard ICU error code. Its input value must 248 * pass the U_SUCCESS() test, or else the function returns 249 * immediately. Check for U_FAILURE() on output or use with 250 * function chaining. (See User Guide for details.) 251 * @return dest 252 * @stable ICU 4.6 253 */ 254 virtual void 255 nameToUnicodeUTF8(const StringPiece &name, ByteSink &dest, 256 IDNAInfo &info, UErrorCode &errorCode) const; 257 }; 258 259 class UTS46; 260 261 /** 262 * Output container for IDNA processing errors. 263 * The IDNAInfo class is not suitable for subclassing. 264 * @stable ICU 4.6 265 */ 266 class U_COMMON_API IDNAInfo : public UMemory { 267 public: 268 /** 269 * Constructor for stack allocation. 270 * @stable ICU 4.6 271 */ IDNAInfo()272 IDNAInfo() : errors(0), labelErrors(0), isTransDiff(FALSE), isBiDi(FALSE), isOkBiDi(TRUE) {} 273 /** 274 * Were there IDNA processing errors? 275 * @return TRUE if there were processing errors 276 * @stable ICU 4.6 277 */ hasErrors()278 UBool hasErrors() const { return errors!=0; } 279 /** 280 * Returns a bit set indicating IDNA processing errors. 281 * See UIDNA_ERROR_... constants in uidna.h. 282 * @return bit set of processing errors 283 * @stable ICU 4.6 284 */ getErrors()285 uint32_t getErrors() const { return errors; } 286 /** 287 * Returns TRUE if transitional and nontransitional processing produce different results. 288 * This is the case when the input label or domain name contains 289 * one or more deviation characters outside a Punycode label (see UTS #46). 290 * <ul> 291 * <li>With nontransitional processing, such characters are 292 * copied to the destination string. 293 * <li>With transitional processing, such characters are 294 * mapped (sharp s/sigma) or removed (joiner/nonjoiner). 295 * </ul> 296 * @return TRUE if transitional and nontransitional processing produce different results 297 * @stable ICU 4.6 298 */ isTransitionalDifferent()299 UBool isTransitionalDifferent() const { return isTransDiff; } 300 301 private: 302 friend class UTS46; 303 304 IDNAInfo(const IDNAInfo &other); // no copying 305 IDNAInfo &operator=(const IDNAInfo &other); // no copying 306 reset()307 void reset() { 308 errors=labelErrors=0; 309 isTransDiff=FALSE; 310 isBiDi=FALSE; 311 isOkBiDi=TRUE; 312 } 313 314 uint32_t errors, labelErrors; 315 UBool isTransDiff; 316 UBool isBiDi; 317 UBool isOkBiDi; 318 }; 319 320 U_NAMESPACE_END 321 322 #endif // UCONFIG_NO_IDNA 323 #endif // __IDNA_H__ 324