1 /* 2 ********************************************************************** 3 * Copyright (C) 1999-2004, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ********************************************************************** 6 * 7 * 8 * ucnv_err.h: 9 */ 10 11 /** 12 * \file 13 * \brief C UConverter predefined error callbacks 14 * 15 * <h2>Error Behaviour Functions</h2> 16 * Defines some error behaviour functions called by ucnv_{from,to}Unicode 17 * These are provided as part of ICU and many are stable, but they 18 * can also be considered only as an example of what can be done with 19 * callbacks. You may of course write your own. 20 * 21 * If you want to write your own, you may also find the functions from 22 * ucnv_cb.h useful when writing your own callbacks. 23 * 24 * These functions, although public, should NEVER be called directly. 25 * They should be used as parameters to the ucnv_setFromUCallback 26 * and ucnv_setToUCallback functions, to set the behaviour of a converter 27 * when it encounters ILLEGAL/UNMAPPED/INVALID sequences. 28 * 29 * usage example: 'STOP' doesn't need any context, but newContext 30 * could be set to something other than 'NULL' if needed. The available 31 * contexts in this header can modify the default behavior of the callback. 32 * 33 * \code 34 * UErrorCode err = U_ZERO_ERROR; 35 * UConverter *myConverter = ucnv_open("ibm-949", &err); 36 * const void *oldContext; 37 * UConverterFromUCallback oldAction; 38 * 39 * 40 * if (U_SUCCESS(err)) 41 * { 42 * ucnv_setFromUCallBack(myConverter, 43 * UCNV_FROM_U_CALLBACK_STOP, 44 * NULL, 45 * &oldAction, 46 * &oldContext, 47 * &status); 48 * } 49 * \endcode 50 * 51 * The code above tells "myConverter" to stop when it encounters an 52 * ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from 53 * Unicode -> Codepage. The behavior from Codepage to Unicode is not changed, 54 * and ucnv_setToUCallBack would need to be called in order to change 55 * that behavior too. 56 * 57 * Here is an example with a context: 58 * 59 * \code 60 * UErrorCode err = U_ZERO_ERROR; 61 * UConverter *myConverter = ucnv_open("ibm-949", &err); 62 * const void *oldContext; 63 * UConverterFromUCallback oldAction; 64 * 65 * 66 * if (U_SUCCESS(err)) 67 * { 68 * ucnv_setToUCallBack(myConverter, 69 * UCNV_TO_U_CALLBACK_SUBSTITUTE, 70 * UCNV_SUB_STOP_ON_ILLEGAL, 71 * &oldAction, 72 * &oldContext, 73 * &status); 74 * } 75 * \endcode 76 * 77 * The code above tells "myConverter" to stop when it encounters an 78 * ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from 79 * Codepage -> Unicode. Any unmapped and legal characters will be 80 * substituted to be the default substitution character. 81 */ 82 83 #ifndef UCNV_ERR_H 84 #define UCNV_ERR_H 85 86 #include "unicode/utypes.h" 87 88 #if !UCONFIG_NO_CONVERSION 89 90 /** Forward declaring the UConverter structure. @stable ICU 2.0 */ 91 struct UConverter; 92 93 /** @stable ICU 2.0 */ 94 typedef struct UConverter UConverter; 95 96 /** 97 * FROM_U, TO_U context options for sub callback 98 * @stable ICU 2.0 99 */ 100 #define UCNV_SUB_STOP_ON_ILLEGAL "i" 101 102 /** 103 * FROM_U, TO_U context options for skip callback 104 * @stable ICU 2.0 105 */ 106 #define UCNV_SKIP_STOP_ON_ILLEGAL "i" 107 108 /** 109 * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to ICU (%UXXXX) 110 * @stable ICU 2.0 111 */ 112 #define UCNV_ESCAPE_ICU NULL 113 /** 114 * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to JAVA (\\uXXXX) 115 * @stable ICU 2.0 116 */ 117 #define UCNV_ESCAPE_JAVA "J" 118 /** 119 * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to C (\\uXXXX \\UXXXXXXXX) 120 * TO_U_CALLBACK_ESCAPE option to escape the character value accoding to C (\\xXXXX) 121 * @stable ICU 2.0 122 */ 123 #define UCNV_ESCAPE_C "C" 124 /** 125 * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Decimal escape (&#DDDD;) 126 * TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Decimal escape (&#DDDD;) 127 * @stable ICU 2.0 128 */ 129 #define UCNV_ESCAPE_XML_DEC "D" 130 /** 131 * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Hex escape (&#xXXXX;) 132 * TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Hex escape (&#xXXXX;) 133 * @stable ICU 2.0 134 */ 135 #define UCNV_ESCAPE_XML_HEX "X" 136 /** 137 * FROM_U_CALLBACK_ESCAPE context option to escape teh code unit according to Unicode (U+XXXXX) 138 * @stable ICU 2.0 139 */ 140 #define UCNV_ESCAPE_UNICODE "U" 141 142 /** 143 * The process condition code to be used with the callbacks. 144 * Codes which are greater than UCNV_IRREGULAR should be 145 * passed on to any chained callbacks. 146 * @stable ICU 2.0 147 */ 148 typedef enum { 149 UCNV_UNASSIGNED = 0, /**< The code point is unassigned. 150 The error code U_INVALID_CHAR_FOUND will be set. */ 151 UCNV_ILLEGAL = 1, /**< The code point is illegal. For example, 152 \\x81\\x2E is illegal in SJIS because \\x2E 153 is not a valid trail byte for the \\x81 154 lead byte. 155 Also, starting with Unicode 3.0.1, non-shortest byte sequences 156 in UTF-8 (like \\xC1\\xA1 instead of \\x61 for U+0061) 157 are also illegal, not just irregular. 158 The error code U_ILLEGAL_CHAR_FOUND will be set. */ 159 UCNV_IRREGULAR = 2, /**< The codepoint is not a regular sequence in 160 the encoding. For example, \\xED\\xA0\\x80..\\xED\\xBF\\xBF 161 are irregular UTF-8 byte sequences for single surrogate 162 code points. 163 The error code U_INVALID_CHAR_FOUND will be set. */ 164 UCNV_RESET = 3, /**< The callback is called with this reason when a 165 'reset' has occured. Callback should reset all 166 state. */ 167 UCNV_CLOSE = 4, /**< Called when the converter is closed. The 168 callback should release any allocated memory.*/ 169 UCNV_CLONE = 5 /**< Called when ucnv_safeClone() is called on the 170 converter. the pointer available as the 171 'context' is an alias to the original converters' 172 context pointer. If the context must be owned 173 by the new converter, the callback must clone 174 the data and call ucnv_setFromUCallback 175 (or setToUCallback) with the correct pointer. 176 @stable ICU 2.2 177 */ 178 } UConverterCallbackReason; 179 180 181 /** 182 * The structure for the fromUnicode callback function parameter. 183 * @stable ICU 2.0 184 */ 185 typedef struct { 186 uint16_t size; /**< The size of this struct. @stable ICU 2.0 */ 187 UBool flush; /**< The internal state of converter will be reset and data flushed if set to TRUE. @stable ICU 2.0 */ 188 UConverter *converter; /**< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0 */ 189 const UChar *source; /**< Pointer to the source source buffer. @stable ICU 2.0 */ 190 const UChar *sourceLimit; /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0 */ 191 char *target; /**< Pointer to the target buffer. @stable ICU 2.0 */ 192 const char *targetLimit; /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0 */ 193 int32_t *offsets; /**< Pointer to the buffer that recieves the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */ 194 } UConverterFromUnicodeArgs; 195 196 197 /** 198 * The structure for the toUnicode callback function parameter. 199 * @stable ICU 2.0 200 */ 201 typedef struct { 202 uint16_t size; /**< The size of this struct @stable ICU 2.0 */ 203 UBool flush; /**< The internal state of converter will be reset and data flushed if set to TRUE. @stable ICU 2.0 */ 204 UConverter *converter; /**< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0 */ 205 const char *source; /**< Pointer to the source source buffer. @stable ICU 2.0 */ 206 const char *sourceLimit; /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0 */ 207 UChar *target; /**< Pointer to the target buffer. @stable ICU 2.0 */ 208 const UChar *targetLimit; /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0 */ 209 int32_t *offsets; /**< Pointer to the buffer that recieves the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */ 210 } UConverterToUnicodeArgs; 211 212 213 /** 214 * DO NOT CALL THIS FUNCTION DIRECTLY! 215 * This From Unicode callback STOPS at the ILLEGAL_SEQUENCE, 216 * returning the error code back to the caller immediately. 217 * 218 * @param context Pointer to the callback's private data 219 * @param fromUArgs Information about the conversion in progress 220 * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence 221 * @param length Size (in bytes) of the concerned codepage sequence 222 * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint. 223 * @param reason Defines the reason the callback was invoked 224 * @param err This should always be set to a failure status prior to calling. 225 * @stable ICU 2.0 226 */ 227 U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_STOP ( 228 const void *context, 229 UConverterFromUnicodeArgs *fromUArgs, 230 const UChar* codeUnits, 231 int32_t length, 232 UChar32 codePoint, 233 UConverterCallbackReason reason, 234 UErrorCode * err); 235 236 237 238 /** 239 * DO NOT CALL THIS FUNCTION DIRECTLY! 240 * This To Unicode callback STOPS at the ILLEGAL_SEQUENCE, 241 * returning the error code back to the caller immediately. 242 * 243 * @param context Pointer to the callback's private data 244 * @param toUArgs Information about the conversion in progress 245 * @param codeUnits Points to 'length' bytes of the concerned codepage sequence 246 * @param length Size (in bytes) of the concerned codepage sequence 247 * @param reason Defines the reason the callback was invoked 248 * @param err This should always be set to a failure status prior to calling. 249 * @stable ICU 2.0 250 */ 251 U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_STOP ( 252 const void *context, 253 UConverterToUnicodeArgs *toUArgs, 254 const char* codeUnits, 255 int32_t length, 256 UConverterCallbackReason reason, 257 UErrorCode * err); 258 259 /** 260 * DO NOT CALL THIS FUNCTION DIRECTLY! 261 * This From Unicode callback skips any ILLEGAL_SEQUENCE, or 262 * skips only UNASSINGED_SEQUENCE depending on the context parameter 263 * simply ignoring those characters. 264 * 265 * @param context The function currently recognizes the callback options: 266 * UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE, 267 * returning the error code back to the caller immediately. 268 * NULL: Skips any ILLEGAL_SEQUENCE 269 * @param fromUArgs Information about the conversion in progress 270 * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence 271 * @param length Size (in bytes) of the concerned codepage sequence 272 * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint. 273 * @param reason Defines the reason the callback was invoked 274 * @param err Return value will be set to success if the callback was handled, 275 * otherwise this value will be set to a failure status. 276 * @stable ICU 2.0 277 */ 278 U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_SKIP ( 279 const void *context, 280 UConverterFromUnicodeArgs *fromUArgs, 281 const UChar* codeUnits, 282 int32_t length, 283 UChar32 codePoint, 284 UConverterCallbackReason reason, 285 UErrorCode * err); 286 287 /** 288 * DO NOT CALL THIS FUNCTION DIRECTLY! 289 * This From Unicode callback will Substitute the ILLEGAL SEQUENCE, or 290 * UNASSIGNED_SEQUENCE depending on context parameter, with the 291 * current substitution string for the converter. This is the default 292 * callback. 293 * 294 * @param context The function currently recognizes the callback options: 295 * UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE, 296 * returning the error code back to the caller immediately. 297 * NULL: Substitutes any ILLEGAL_SEQUENCE 298 * @param fromUArgs Information about the conversion in progress 299 * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence 300 * @param length Size (in bytes) of the concerned codepage sequence 301 * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint. 302 * @param reason Defines the reason the callback was invoked 303 * @param err Return value will be set to success if the callback was handled, 304 * otherwise this value will be set to a failure status. 305 * @see ucnv_setSubstChars 306 * @stable ICU 2.0 307 */ 308 U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_SUBSTITUTE ( 309 const void *context, 310 UConverterFromUnicodeArgs *fromUArgs, 311 const UChar* codeUnits, 312 int32_t length, 313 UChar32 codePoint, 314 UConverterCallbackReason reason, 315 UErrorCode * err); 316 317 /** 318 * DO NOT CALL THIS FUNCTION DIRECTLY! 319 * This From Unicode callback will Substitute the ILLEGAL SEQUENCE with the 320 * hexadecimal representation of the illegal codepoints 321 * 322 * @param context The function currently recognizes the callback options: 323 * <ul> 324 * <li>UCNV_ESCAPE_ICU: Substitues the ILLEGAL SEQUENCE with the hexadecimal 325 * representation in the format %UXXXX, e.g. "%uFFFE%u00AC%uC8FE"). 326 * In the Event the converter doesn't support the characters {%,U}[A-F][0-9], 327 * it will substitute the illegal sequence with the substitution characters. 328 * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as 329 * %UD84D%UDC56</li> 330 * <li>UCNV_ESCAPE_JAVA: Substitues the ILLEGAL SEQUENCE with the hexadecimal 331 * representation in the format \\uXXXX, e.g. "\\uFFFE\\u00AC\\uC8FE"). 332 * In the Event the converter doesn't support the characters {\,u}[A-F][0-9], 333 * it will substitute the illegal sequence with the substitution characters. 334 * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as 335 * \\uD84D\\uDC56</li> 336 * <li>UCNV_ESCAPE_C: Substitues the ILLEGAL SEQUENCE with the hexadecimal 337 * representation in the format \\uXXXX, e.g. "\\uFFFE\\u00AC\\uC8FE"). 338 * In the Event the converter doesn't support the characters {\,u,U}[A-F][0-9], 339 * it will substitute the illegal sequence with the substitution characters. 340 * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as 341 * \\U00023456</li> 342 * <li>UCNV_ESCAPE_XML_DEC: Substitues the ILLEGAL SEQUENCE with the decimal 343 * representation in the format &#DDDDDDDD;, e.g. "&#65534;&#172;&#51454;"). 344 * In the Event the converter doesn't support the characters {&,#}[0-9], 345 * it will substitute the illegal sequence with the substitution characters. 346 * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as 347 * &#144470; and Zero padding is ignored.</li> 348 * <li>UCNV_ESCAPE_XML_HEX:Substitues the ILLEGAL SEQUENCE with the decimal 349 * representation in the format &#xXXXX, e.g. "&#xFFFE;&#x00AC;&#xC8FE;"). 350 * In the Event the converter doesn't support the characters {&,#,x}[0-9], 351 * it will substitute the illegal sequence with the substitution characters. 352 * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as 353 * &#x23456;</li> 354 * </ul> 355 * @param fromUArgs Information about the conversion in progress 356 * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence 357 * @param length Size (in bytes) of the concerned codepage sequence 358 * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint. 359 * @param reason Defines the reason the callback was invoked 360 * @param err Return value will be set to success if the callback was handled, 361 * otherwise this value will be set to a failure status. 362 * @stable ICU 2.0 363 */ 364 U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_ESCAPE ( 365 const void *context, 366 UConverterFromUnicodeArgs *fromUArgs, 367 const UChar* codeUnits, 368 int32_t length, 369 UChar32 codePoint, 370 UConverterCallbackReason reason, 371 UErrorCode * err); 372 373 374 /** 375 * DO NOT CALL THIS FUNCTION DIRECTLY! 376 * This To Unicode callback skips any ILLEGAL_SEQUENCE, or 377 * skips only UNASSINGED_SEQUENCE depending on the context parameter 378 * simply ignoring those characters. 379 * 380 * @param context The function currently recognizes the callback options: 381 * UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE, 382 * returning the error code back to the caller immediately. 383 * NULL: Skips any ILLEGAL_SEQUENCE 384 * @param toUArgs Information about the conversion in progress 385 * @param codeUnits Points to 'length' bytes of the concerned codepage sequence 386 * @param length Size (in bytes) of the concerned codepage sequence 387 * @param reason Defines the reason the callback was invoked 388 * @param err Return value will be set to success if the callback was handled, 389 * otherwise this value will be set to a failure status. 390 * @stable ICU 2.0 391 */ 392 U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_SKIP ( 393 const void *context, 394 UConverterToUnicodeArgs *toUArgs, 395 const char* codeUnits, 396 int32_t length, 397 UConverterCallbackReason reason, 398 UErrorCode * err); 399 400 /** 401 * DO NOT CALL THIS FUNCTION DIRECTLY! 402 * This To Unicode callback will Substitute the ILLEGAL SEQUENCE,or 403 * UNASSIGNED_SEQUENCE depending on context parameter, with the 404 * Unicode substitution character, U+FFFD. 405 * 406 * @param context The function currently recognizes the callback options: 407 * UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE, 408 * returning the error code back to the caller immediately. 409 * NULL: Substitutes any ILLEGAL_SEQUENCE 410 * @param toUArgs Information about the conversion in progress 411 * @param codeUnits Points to 'length' bytes of the concerned codepage sequence 412 * @param length Size (in bytes) of the concerned codepage sequence 413 * @param reason Defines the reason the callback was invoked 414 * @param err Return value will be set to success if the callback was handled, 415 * otherwise this value will be set to a failure status. 416 * @stable ICU 2.0 417 */ 418 U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_SUBSTITUTE ( 419 const void *context, 420 UConverterToUnicodeArgs *toUArgs, 421 const char* codeUnits, 422 int32_t length, 423 UConverterCallbackReason reason, 424 UErrorCode * err); 425 426 /** 427 * DO NOT CALL THIS FUNCTION DIRECTLY! 428 * This To Unicode callback will Substitute the ILLEGAL SEQUENCE with the 429 * hexadecimal representation of the illegal bytes 430 * (in the format %XNN, e.g. "%XFF%X0A%XC8%X03"). 431 * 432 * @param context This function currently recognizes the callback options: 433 * UCNV_ESCAPE_ICU, UCNV_ESCAPE_JAVA, UCNV_ESCAPE_C, UCNV_ESCAPE_XML_DEC, 434 * UCNV_ESCAPE_XML_HEX and UCNV_ESCAPE_UNICODE. 435 * @param toUArgs Information about the conversion in progress 436 * @param codeUnits Points to 'length' bytes of the concerned codepage sequence 437 * @param length Size (in bytes) of the concerned codepage sequence 438 * @param reason Defines the reason the callback was invoked 439 * @param err Return value will be set to success if the callback was handled, 440 * otherwise this value will be set to a failure status. 441 * @stable ICU 2.0 442 */ 443 444 U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_ESCAPE ( 445 const void *context, 446 UConverterToUnicodeArgs *toUArgs, 447 const char* codeUnits, 448 int32_t length, 449 UConverterCallbackReason reason, 450 UErrorCode * err); 451 452 #endif 453 454 #endif 455 456 /*UCNV_ERR_H*/ 457