1 /* 2 ******************************************************************************* 3 * 4 * Copyright (C) 2003-2004, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ******************************************************************************* 8 * file name: idnaref.h 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2003feb1 14 * created by: Ram Viswanadha 15 */ 16 17 #ifndef __IDNAREF_H__ 18 #define __IDNAREF_H__ 19 20 #include "unicode/utypes.h" 21 22 #if !UCONFIG_NO_IDNA 23 24 #include "unicode/parseerr.h" 25 26 #define IDNAREF_DEFAULT 0x0000 27 #define IDNAREF_ALLOW_UNASSIGNED 0x0001 28 #define IDNAREF_USE_STD3_RULES 0x0002 29 30 /** 31 * This function implements the ToASCII operation as defined in the IDNA draft. 32 * This operation is done on <b>single labels</b> before sending it to something that expects 33 * ASCII names. A label is an individual part of a domain name. Labels are usually 34 * separated by dots; for e.g." "www.example.com" is composed of 3 labels 35 * "www","example", and "com". 36 * 37 * 38 * @param src Input Unicode label. 39 * @param srcLength Number of UChars in src, or -1 if NUL-terminated. 40 * @param dest Output Unicode array with ACE encoded ASCII label. 41 * @param destCapacity Size of dest. 42 * @param options A bit set of options: 43 * 44 * - idnaref_UNASSIGNED Unassigned values can be converted to ASCII for query operations 45 * If TRUE unassigned values are treated as normal Unicode code points. 46 * If FALSE the operation fails with U_UNASSIGNED_CODE_POINT_FOUND error code. 47 * - idnaref_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 48 * If TRUE and the input does not statisfy STD3 rules, the operation 49 * will fail with U_IDNA_STD3_ASCII_RULES_ERROR 50 * 51 * @param parseError Pointer to UParseError struct to recieve information on position 52 * of error if an error is encountered. Can be NULL. 53 * @param status ICU in/out error code parameter. 54 * U_INVALID_CHAR_FOUND if src contains 55 * unmatched single surrogates. 56 * U_INDEX_OUTOFBOUNDS_ERROR if src contains 57 * too many code points. 58 * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough 59 * @return Number of ASCII characters converted. 60 * @draft ICU 2.6 61 */ 62 U_CFUNC int32_t U_EXPORT2 63 idnaref_toASCII(const UChar* src, int32_t srcLength, 64 UChar* dest, int32_t destCapacity, 65 int32_t options, 66 UParseError* parseError, 67 UErrorCode* status); 68 69 70 /** 71 * This function implements the ToUnicode operation as defined in the IDNA draft. 72 * This operation is done on <b>single labels</b> before sending it to something that expects 73 * ASCII names. A label is an individual part of a domain name. Labels are usually 74 * separated by dots; for e.g." "www.example.com" is composed of 3 labels 75 * "www","example", and "com". 76 * 77 * @param src Input ASCII (ACE encoded) label. 78 * @param srcLength Number of UChars in src, or -1 if NUL-terminated. 79 * @param dest Output Converted Unicode array. 80 * @param destCapacity Size of dest. 81 * @param options A bit set of options: 82 * 83 * - idnaref_UNASSIGNED Unassigned values can be converted to ASCII for query operations 84 * If TRUE unassigned values are treated as normal Unicode code points. 85 * If FALSE the operation fails with U_UNASSIGNED_CODE_POINT_FOUND error code. 86 * - idnaref_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 87 * If TRUE and the input does not statisfy STD3 rules, the operation 88 * will fail with U_IDNA_STD3_ASCII_RULES_ERROR 89 * 90 * @param parseError Pointer to UParseError struct to recieve information on position 91 * of error if an error is encountered. Can be NULL. 92 * @param status ICU in/out error code parameter. 93 * U_INVALID_CHAR_FOUND if src contains 94 * unmatched single surrogates. 95 * U_INDEX_OUTOFBOUNDS_ERROR if src contains 96 * too many code points. 97 * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough 98 * @return Number of Unicode characters converted. 99 * @draft ICU 2.6 100 */ 101 U_CFUNC int32_t U_EXPORT2 102 idnaref_toUnicode(const UChar* src, int32_t srcLength, 103 UChar* dest, int32_t destCapacity, 104 int32_t options, 105 UParseError* parseError, 106 UErrorCode* status); 107 108 109 /** 110 * Convenience function that implements the IDNToASCII operation as defined in the IDNA draft. 111 * This operation is done on complete domain names, e.g: "www.example.com". 112 * It is important to note that this operation can fail. If it fails, then the input 113 * domain name cannot be used as an Internationalized Domain Name and the application 114 * should have methods defined to deal with the failure. 115 * 116 * <b>Note:</b> IDNA draft specifies that a conformant application should divide a domain name 117 * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each, 118 * and then convert. This function does not offer that level of granularity. The options once 119 * set will apply to all labels in the domain name 120 * 121 * @param src Input ASCII IDN. 122 * @param srcLength Number of UChars in src, or -1 if NUL-terminated. 123 * @param dest Output Unicode array. 124 * @param destCapacity Size of dest. 125 * @param options A bit set of options: 126 * 127 * - idnaref_UNASSIGNED Unassigned values can be converted to ASCII for query operations 128 * If TRUE unassigned values are treated as normal Unicode code points. 129 * If FALSE the operation fails with U_UNASSIGNED_CODE_POINT_FOUND error code. 130 * - idnaref_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 131 * If TRUE and the input does not statisfy STD3 rules, the operation 132 * will fail with U_IDNA_STD3_ASCII_RULES_ERROR 133 * 134 * @param parseError Pointer to UParseError struct to recieve information on position 135 * of error if an error is encountered. Can be NULL. 136 * @param status ICU in/out error code parameter. 137 * U_INVALID_CHAR_FOUND if src contains 138 * unmatched single surrogates. 139 * U_INDEX_OUTOFBOUNDS_ERROR if src contains 140 * too many code points. 141 * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough 142 * @return Number of ASCII characters converted. 143 * @draft ICU 2.6 144 */ 145 U_CFUNC int32_t U_EXPORT2 146 idnaref_IDNToASCII( const UChar* src, int32_t srcLength, 147 UChar* dest, int32_t destCapacity, 148 int32_t options, 149 UParseError* parseError, 150 UErrorCode* status); 151 152 /** 153 * Convenience function that implements the IDNToUnicode operation as defined in the IDNA draft. 154 * This operation is done on complete domain names, e.g: "www.example.com". 155 * 156 * <b>Note:</b> IDNA draft specifies that a conformant application should divide a domain name 157 * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each, 158 * and then convert. This function does not offer that level of granularity. The options once 159 * set will apply to all labels in the domain name 160 * 161 * @param src Input Unicode IDN. 162 * @param srcLength Number of UChars in src, or -1 if NUL-terminated. 163 * @param dest Output ASCII array. 164 * @param destCapacity Size of dest. 165 * @param options A bit set of options: 166 * 167 * - idnaref_UNASSIGNED Unassigned values can be converted to ASCII for query operations 168 * If TRUE unassigned values are treated as normal Unicode code points. 169 * If FALSE the operation fails with U_UNASSIGNED_CODE_POINT_FOUND error code. 170 * - idnaref_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 171 * If TRUE and the input does not statisfy STD3 rules, the operation 172 * will fail with U_IDNA_STD3_ASCII_RULES_ERROR 173 * 174 * @param parseError Pointer to UParseError struct to recieve information on position 175 * of error if an error is encountered. Can be NULL. 176 * @param status ICU in/out error code parameter. 177 * U_INVALID_CHAR_FOUND if src contains 178 * unmatched single surrogates. 179 * U_INDEX_OUTOFBOUNDS_ERROR if src contains 180 * too many code points. 181 * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough 182 * @return Number of ASCII characters converted. 183 * @draft ICU 2.6 184 */ 185 U_CFUNC int32_t U_EXPORT2 186 idnaref_IDNToUnicode( const UChar* src, int32_t srcLength, 187 UChar* dest, int32_t destCapacity, 188 int32_t options, 189 UParseError* parseError, 190 UErrorCode* status); 191 192 /** 193 * Compare two strings for IDNs for equivalence. 194 * This function splits the domain names into labels and compares them. 195 * According to IDN draft, whenever two labels are compared, they are 196 * considered equal if and only if their ASCII forms (obtained by 197 * applying toASCII) match using an case-insensitive ASCII comparison. 198 * Two domain names are considered a match if and only if all labels 199 * match regardless of whether label separators match. 200 * 201 * @param s1 First source string. 202 * @param length1 Length of first source string, or -1 if NUL-terminated. 203 * 204 * @param s2 Second source string. 205 * @param length2 Length of second source string, or -1 if NUL-terminated. 206 * @param options A bit set of options: 207 * 208 * - idnaref_UNASSIGNED Unassigned values can be converted to ASCII for query operations 209 * If TRUE unassigned values are treated as normal Unicode code points. 210 * If FALSE the operation fails with U_UNASSIGNED_CODE_POINT_FOUND error code. 211 * - idnaref_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 212 * If TRUE and the input does not statisfy STD3 rules, the operation 213 * will fail with U_IDNA_STD3_ASCII_RULES_ERROR 214 * 215 * @param status ICU error code in/out parameter. 216 * Must fulfill U_SUCCESS before the function call. 217 * @return <0 or 0 or >0 as usual for string comparisons 218 * @draft ICU 2.6 219 */ 220 U_CFUNC int32_t U_EXPORT2 221 idnaref_compare( const UChar *s1, int32_t length1, 222 const UChar *s2, int32_t length2, 223 int32_t options, 224 UErrorCode* status); 225 226 #endif /* #if !UCONFIG_NO_IDNA */ 227 228 #endif 229