1 /* 2 ******************************************************************************* 3 * Copyright (C) 2010, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ******************************************************************************* 6 * file name: idna.h 7 * encoding: US-ASCII 8 * tab size: 8 (not used) 9 * indentation:4 10 * 11 * created on: 2010mar05 12 * created by: Markus W. Scherer 13 */ 14 15 #ifndef __IDNA_H__ 16 #define __IDNA_H__ 17 18 /** 19 * \file 20 * \brief C++ API: Internationalizing Domain Names in Applications (IDNA) 21 */ 22 23 #include "unicode/utypes.h" 24 25 #if !UCONFIG_NO_IDNA 26 27 #include "unicode/bytestream.h" 28 #include "unicode/stringpiece.h" 29 #include "unicode/uidna.h" 30 #include "unicode/unistr.h" 31 32 U_NAMESPACE_BEGIN 33 34 class U_COMMON_API IDNAInfo; 35 36 /** 37 * Abstract base class for IDNA processing. 38 * See http://www.unicode.org/reports/tr46/ 39 * and http://www.ietf.org/rfc/rfc3490.txt 40 * 41 * The IDNA class is not intended for public subclassing. 42 * 43 * This C++ API currently only implements UTS #46. 44 * The uidna.h C API implements both UTS #46 (functions using UIDNA service object) 45 * and IDNA2003 (functions that do not use a service object). 46 * @draft ICU 4.6 47 */ 48 class U_COMMON_API IDNA : public UObject { 49 public: 50 /** 51 * Returns an IDNA instance which implements UTS #46. 52 * Returns an unmodifiable instance, owned by the caller. 53 * Cache it for multiple operations, and delete it when done. 54 * The instance is thread-safe, that is, it can be used concurrently. 55 * 56 * UTS #46 defines Unicode IDNA Compatibility Processing, 57 * updated to the latest version of Unicode and compatible with both 58 * IDNA2003 and IDNA2008. 59 * 60 * The worker functions use transitional processing, including deviation mappings, 61 * unless UIDNA_NONTRANSITIONAL_TO_ASCII or UIDNA_NONTRANSITIONAL_TO_UNICODE 62 * is used in which case the deviation characters are passed through without change. 63 * 64 * Disallowed characters are mapped to U+FFFD. 65 * 66 * For available options see the uidna.h header. 67 * Operations with the UTS #46 instance do not support the 68 * UIDNA_ALLOW_UNASSIGNED option. 69 * 70 * By default, the UTS #46 implementation allows all ASCII characters (as valid or mapped). 71 * When the UIDNA_USE_STD3_RULES option is used, ASCII characters other than 72 * letters, digits, hyphen (LDH) and dot/full stop are disallowed and mapped to U+FFFD. 73 * 74 * @param options Bit set to modify the processing and error checking. 75 * See option bit set values in uidna.h. 76 * @param errorCode Standard ICU error code. Its input value must 77 * pass the U_SUCCESS() test, or else the function returns 78 * immediately. Check for U_FAILURE() on output or use with 79 * function chaining. (See User Guide for details.) 80 * @return the UTS #46 IDNA instance, if successful 81 * @draft ICU 4.6 82 */ 83 static IDNA * 84 createUTS46Instance(uint32_t options, UErrorCode &errorCode); 85 86 /** 87 * Converts a single domain name label into its ASCII form for DNS lookup. 88 * If any processing step fails, then info.hasErrors() will be TRUE and 89 * the result might not be an ASCII string. 90 * The label might be modified according to the types of errors. 91 * Labels with severe errors will be left in (or turned into) their Unicode form. 92 * 93 * The UErrorCode indicates an error only in exceptional cases, 94 * such as a U_MEMORY_ALLOCATION_ERROR. 95 * 96 * @param label Input domain name label 97 * @param dest Destination string object 98 * @param info Output container of IDNA processing details. 99 * @param errorCode Standard ICU error code. Its input value must 100 * pass the U_SUCCESS() test, or else the function returns 101 * immediately. Check for U_FAILURE() on output or use with 102 * function chaining. (See User Guide for details.) 103 * @return dest 104 * @draft ICU 4.6 105 */ 106 virtual UnicodeString & 107 labelToASCII(const UnicodeString &label, UnicodeString &dest, 108 IDNAInfo &info, UErrorCode &errorCode) const = 0; 109 110 /** 111 * Converts a single domain name label into its Unicode form for human-readable display. 112 * If any processing step fails, then info.hasErrors() will be TRUE. 113 * The label might be modified according to the types of errors. 114 * 115 * The UErrorCode indicates an error only in exceptional cases, 116 * such as a U_MEMORY_ALLOCATION_ERROR. 117 * 118 * @param label Input domain name label 119 * @param dest Destination string object 120 * @param info Output container of IDNA processing details. 121 * @param errorCode Standard ICU error code. Its input value must 122 * pass the U_SUCCESS() test, or else the function returns 123 * immediately. Check for U_FAILURE() on output or use with 124 * function chaining. (See User Guide for details.) 125 * @return dest 126 * @draft ICU 4.6 127 */ 128 virtual UnicodeString & 129 labelToUnicode(const UnicodeString &label, UnicodeString &dest, 130 IDNAInfo &info, UErrorCode &errorCode) const = 0; 131 132 /** 133 * Converts a whole domain name into its ASCII form for DNS lookup. 134 * If any processing step fails, then info.hasErrors() will be TRUE and 135 * the result might not be an ASCII string. 136 * The domain name might be modified according to the types of errors. 137 * Labels with severe errors will be left in (or turned into) their Unicode form. 138 * 139 * The UErrorCode indicates an error only in exceptional cases, 140 * such as a U_MEMORY_ALLOCATION_ERROR. 141 * 142 * @param name Input domain name 143 * @param dest Destination string object 144 * @param info Output container of IDNA processing details. 145 * @param errorCode Standard ICU error code. Its input value must 146 * pass the U_SUCCESS() test, or else the function returns 147 * immediately. Check for U_FAILURE() on output or use with 148 * function chaining. (See User Guide for details.) 149 * @return dest 150 * @draft ICU 4.6 151 */ 152 virtual UnicodeString & 153 nameToASCII(const UnicodeString &name, UnicodeString &dest, 154 IDNAInfo &info, UErrorCode &errorCode) const = 0; 155 156 /** 157 * Converts a whole domain name into its Unicode form for human-readable display. 158 * If any processing step fails, then info.hasErrors() will be TRUE. 159 * The domain name might be modified according to the types of errors. 160 * 161 * The UErrorCode indicates an error only in exceptional cases, 162 * such as a U_MEMORY_ALLOCATION_ERROR. 163 * 164 * @param name Input domain name 165 * @param dest Destination string object 166 * @param info Output container of IDNA processing details. 167 * @param errorCode Standard ICU error code. Its input value must 168 * pass the U_SUCCESS() test, or else the function returns 169 * immediately. Check for U_FAILURE() on output or use with 170 * function chaining. (See User Guide for details.) 171 * @return dest 172 * @draft ICU 4.6 173 */ 174 virtual UnicodeString & 175 nameToUnicode(const UnicodeString &name, UnicodeString &dest, 176 IDNAInfo &info, UErrorCode &errorCode) const = 0; 177 178 // UTF-8 versions of the processing methods ---------------------------- *** 179 180 /** 181 * Converts a single domain name label into its ASCII form for DNS lookup. 182 * UTF-8 version of labelToASCII(), same behavior. 183 * 184 * @param label Input domain name label 185 * @param dest Destination byte sink; Flush()ed if successful 186 * @param info Output container of IDNA processing details. 187 * @param errorCode Standard ICU error code. Its input value must 188 * pass the U_SUCCESS() test, or else the function returns 189 * immediately. Check for U_FAILURE() on output or use with 190 * function chaining. (See User Guide for details.) 191 * @return dest 192 * @draft ICU 4.6 193 */ 194 virtual void 195 labelToASCII_UTF8(const StringPiece &label, ByteSink &dest, 196 IDNAInfo &info, UErrorCode &errorCode) const; 197 198 /** 199 * Converts a single domain name label into its Unicode form for human-readable display. 200 * UTF-8 version of labelToUnicode(), same behavior. 201 * 202 * @param label Input domain name label 203 * @param dest Destination byte sink; Flush()ed if successful 204 * @param info Output container of IDNA processing details. 205 * @param errorCode Standard ICU error code. Its input value must 206 * pass the U_SUCCESS() test, or else the function returns 207 * immediately. Check for U_FAILURE() on output or use with 208 * function chaining. (See User Guide for details.) 209 * @return dest 210 * @draft ICU 4.6 211 */ 212 virtual void 213 labelToUnicodeUTF8(const StringPiece &label, ByteSink &dest, 214 IDNAInfo &info, UErrorCode &errorCode) const; 215 216 /** 217 * Converts a whole domain name into its ASCII form for DNS lookup. 218 * UTF-8 version of nameToASCII(), same behavior. 219 * 220 * @param name Input domain name 221 * @param dest Destination byte sink; Flush()ed if successful 222 * @param info Output container of IDNA processing details. 223 * @param errorCode Standard ICU error code. Its input value must 224 * pass the U_SUCCESS() test, or else the function returns 225 * immediately. Check for U_FAILURE() on output or use with 226 * function chaining. (See User Guide for details.) 227 * @return dest 228 * @draft ICU 4.6 229 */ 230 virtual void 231 nameToASCII_UTF8(const StringPiece &name, ByteSink &dest, 232 IDNAInfo &info, UErrorCode &errorCode) const; 233 234 /** 235 * Converts a whole domain name into its Unicode form for human-readable display. 236 * UTF-8 version of nameToUnicode(), same behavior. 237 * 238 * @param name Input domain name 239 * @param dest Destination byte sink; Flush()ed if successful 240 * @param info Output container of IDNA processing details. 241 * @param errorCode Standard ICU error code. Its input value must 242 * pass the U_SUCCESS() test, or else the function returns 243 * immediately. Check for U_FAILURE() on output or use with 244 * function chaining. (See User Guide for details.) 245 * @return dest 246 * @draft ICU 4.6 247 */ 248 virtual void 249 nameToUnicodeUTF8(const StringPiece &name, ByteSink &dest, 250 IDNAInfo &info, UErrorCode &errorCode) const; 251 252 private: 253 // No ICU "poor man's RTTI" for this class nor its subclasses. 254 virtual UClassID getDynamicClassID() const; 255 }; 256 257 class UTS46; 258 259 /** 260 * Output container for IDNA processing errors. 261 * The IDNAInfo class is not suitable for subclassing. 262 * @draft ICU 4.6 263 */ 264 class U_COMMON_API IDNAInfo : public UMemory { 265 public: 266 /** 267 * Constructor for stack allocation. 268 * @draft ICU 4.6 269 */ IDNAInfo()270 IDNAInfo() : errors(0), labelErrors(0), isTransDiff(FALSE), isBiDi(FALSE), isOkBiDi(TRUE) {} 271 /** 272 * Were there IDNA processing errors? 273 * @return TRUE if there were processing errors 274 * @draft ICU 4.6 275 */ hasErrors()276 UBool hasErrors() const { return errors!=0; } 277 /** 278 * Returns a bit set indicating IDNA processing errors. 279 * See UIDNA_ERROR_... constants in uidna.h. 280 * @return bit set of processing errors 281 * @draft ICU 4.6 282 */ getErrors()283 uint32_t getErrors() const { return errors; } 284 /** 285 * Returns TRUE if transitional and nontransitional processing produce different results. 286 * This is the case when the input label or domain name contains 287 * one or more deviation characters outside a Punycode label (see UTS #46). 288 * <ul> 289 * <li>With nontransitional processing, such characters are 290 * copied to the destination string. 291 * <li>With transitional processing, such characters are 292 * mapped (sharp s/sigma) or removed (joiner/nonjoiner). 293 * </ul> 294 * @return TRUE if transitional and nontransitional processing produce different results 295 * @draft ICU 4.6 296 */ isTransitionalDifferent()297 UBool isTransitionalDifferent() const { return isTransDiff; } 298 299 private: 300 friend class UTS46; 301 302 IDNAInfo(const IDNAInfo &other); // no copying 303 IDNAInfo &operator=(const IDNAInfo &other); // no copying 304 reset()305 void reset() { 306 errors=labelErrors=0; 307 isTransDiff=FALSE; 308 isBiDi=FALSE; 309 isOkBiDi=TRUE; 310 } 311 312 uint32_t errors, labelErrors; 313 UBool isTransDiff; 314 UBool isBiDi; 315 UBool isOkBiDi; 316 }; 317 318 U_NAMESPACE_END 319 320 #endif // UCONFIG_NO_IDNA 321 #endif // __IDNA_H__ 322