1 /*
2 *****************************************************************************
3 *
4 * Copyright (C) 1998-2007, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *****************************************************************************
8 *
9 * ucnv_err.c
10 * Implements error behaviour functions called by T_UConverter_{from,to}Unicode
11 *
12 *
13 * Change history:
14 *
15 * 06/29/2000 helena Major rewrite of the callback APIs.
16 */
17
18 #include "unicode/utypes.h"
19
20 #if !UCONFIG_NO_CONVERSION
21
22 #include "unicode/ucnv_err.h"
23 #include "unicode/ucnv_cb.h"
24 #include "ucnv_cnv.h"
25 #include "cmemory.h"
26 #include "unicode/ucnv.h"
27 #include "ustrfmt.h"
28
29 #define VALUE_STRING_LENGTH 32
30 /*Magic # 32 = 4(number of char in value string) * 8(max number of bytes per char for any converter) */
31 #define UNICODE_PERCENT_SIGN_CODEPOINT 0x0025
32 #define UNICODE_U_CODEPOINT 0x0055
33 #define UNICODE_X_CODEPOINT 0x0058
34 #define UNICODE_RS_CODEPOINT 0x005C
35 #define UNICODE_U_LOW_CODEPOINT 0x0075
36 #define UNICODE_X_LOW_CODEPOINT 0x0078
37 #define UNICODE_AMP_CODEPOINT 0x0026
38 #define UNICODE_HASH_CODEPOINT 0x0023
39 #define UNICODE_SEMICOLON_CODEPOINT 0x003B
40 #define UNICODE_PLUS_CODEPOINT 0x002B
41 #define UNICODE_LEFT_CURLY_CODEPOINT 0x007B
42 #define UNICODE_RIGHT_CURLY_CODEPOINT 0x007D
43 #define UCNV_PRV_ESCAPE_ICU 0
44 #define UCNV_PRV_ESCAPE_C 'C'
45 #define UCNV_PRV_ESCAPE_XML_DEC 'D'
46 #define UCNV_PRV_ESCAPE_XML_HEX 'X'
47 #define UCNV_PRV_ESCAPE_JAVA 'J'
48 #define UCNV_PRV_ESCAPE_UNICODE 'U'
49 #define UCNV_PRV_STOP_ON_ILLEGAL 'i'
50
51 /*Function Pointer STOPS at the ILLEGAL_SEQUENCE */
52 U_CAPI void U_EXPORT2
UCNV_FROM_U_CALLBACK_STOP(const void * context,UConverterFromUnicodeArgs * fromUArgs,const UChar * codeUnits,int32_t length,UChar32 codePoint,UConverterCallbackReason reason,UErrorCode * err)53 UCNV_FROM_U_CALLBACK_STOP (
54 const void *context,
55 UConverterFromUnicodeArgs *fromUArgs,
56 const UChar* codeUnits,
57 int32_t length,
58 UChar32 codePoint,
59 UConverterCallbackReason reason,
60 UErrorCode * err)
61 {
62 /* the caller must have set the error code accordingly */
63 return;
64 }
65
66
67 /*Function Pointer STOPS at the ILLEGAL_SEQUENCE */
68 U_CAPI void U_EXPORT2
UCNV_TO_U_CALLBACK_STOP(const void * context,UConverterToUnicodeArgs * toUArgs,const char * codePoints,int32_t length,UConverterCallbackReason reason,UErrorCode * err)69 UCNV_TO_U_CALLBACK_STOP (
70 const void *context,
71 UConverterToUnicodeArgs *toUArgs,
72 const char* codePoints,
73 int32_t length,
74 UConverterCallbackReason reason,
75 UErrorCode * err)
76 {
77 /* the caller must have set the error code accordingly */
78 return;
79 }
80
81 U_CAPI void U_EXPORT2
UCNV_FROM_U_CALLBACK_SKIP(const void * context,UConverterFromUnicodeArgs * fromUArgs,const UChar * codeUnits,int32_t length,UChar32 codePoint,UConverterCallbackReason reason,UErrorCode * err)82 UCNV_FROM_U_CALLBACK_SKIP (
83 const void *context,
84 UConverterFromUnicodeArgs *fromUArgs,
85 const UChar* codeUnits,
86 int32_t length,
87 UChar32 codePoint,
88 UConverterCallbackReason reason,
89 UErrorCode * err)
90 {
91 if (reason <= UCNV_IRREGULAR)
92 {
93 if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
94 {
95 *err = U_ZERO_ERROR;
96 }
97 /* else the caller must have set the error code accordingly. */
98 }
99 /* else ignore the reset, close and clone calls. */
100 }
101
102 U_CAPI void U_EXPORT2
UCNV_FROM_U_CALLBACK_SUBSTITUTE(const void * context,UConverterFromUnicodeArgs * fromArgs,const UChar * codeUnits,int32_t length,UChar32 codePoint,UConverterCallbackReason reason,UErrorCode * err)103 UCNV_FROM_U_CALLBACK_SUBSTITUTE (
104 const void *context,
105 UConverterFromUnicodeArgs *fromArgs,
106 const UChar* codeUnits,
107 int32_t length,
108 UChar32 codePoint,
109 UConverterCallbackReason reason,
110 UErrorCode * err)
111 {
112 if (reason <= UCNV_IRREGULAR)
113 {
114 if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
115 {
116 *err = U_ZERO_ERROR;
117 ucnv_cbFromUWriteSub(fromArgs, 0, err);
118 }
119 /* else the caller must have set the error code accordingly. */
120 }
121 /* else ignore the reset, close and clone calls. */
122 }
123
124 /*uses uprv_itou to get a unicode escape sequence of the offensive sequence,
125 *uses a clean copy (resetted) of the converter, to convert that unicode
126 *escape sequence to the target codepage (if conversion failure happens then
127 *we revert to substituting with subchar)
128 */
129 U_CAPI void U_EXPORT2
UCNV_FROM_U_CALLBACK_ESCAPE(const void * context,UConverterFromUnicodeArgs * fromArgs,const UChar * codeUnits,int32_t length,UChar32 codePoint,UConverterCallbackReason reason,UErrorCode * err)130 UCNV_FROM_U_CALLBACK_ESCAPE (
131 const void *context,
132 UConverterFromUnicodeArgs *fromArgs,
133 const UChar *codeUnits,
134 int32_t length,
135 UChar32 codePoint,
136 UConverterCallbackReason reason,
137 UErrorCode * err)
138 {
139
140 UChar valueString[VALUE_STRING_LENGTH];
141 int32_t valueStringLength = 0;
142 int32_t i = 0;
143
144 const UChar *myValueSource = NULL;
145 UErrorCode err2 = U_ZERO_ERROR;
146 UConverterFromUCallback original = NULL;
147 const void *originalContext;
148
149 UConverterFromUCallback ignoredCallback = NULL;
150 const void *ignoredContext;
151
152 if (reason > UCNV_IRREGULAR)
153 {
154 return;
155 }
156
157 ucnv_setFromUCallBack (fromArgs->converter,
158 (UConverterFromUCallback) UCNV_FROM_U_CALLBACK_SUBSTITUTE,
159 NULL,
160 &original,
161 &originalContext,
162 &err2);
163
164 if (U_FAILURE (err2))
165 {
166 *err = err2;
167 return;
168 }
169 if(context==NULL)
170 {
171 while (i < length)
172 {
173 valueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
174 valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */
175 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4);
176 }
177 }
178 else
179 {
180 switch(*((char*)context))
181 {
182 case UCNV_PRV_ESCAPE_JAVA:
183 while (i < length)
184 {
185 valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */
186 valueString[valueStringLength++] = (UChar) UNICODE_U_LOW_CODEPOINT; /* adding u */
187 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4);
188 }
189 break;
190
191 case UCNV_PRV_ESCAPE_C:
192 valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */
193
194 if(length==2){
195 valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */
196 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 8);
197
198 }
199 else{
200 valueString[valueStringLength++] = (UChar) UNICODE_U_LOW_CODEPOINT; /* adding u */
201 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 4);
202 }
203 break;
204
205 case UCNV_PRV_ESCAPE_XML_DEC:
206
207 valueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */
208 valueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */
209 if(length==2){
210 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 10, 0);
211 }
212 else{
213 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 10, 0);
214 }
215 valueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
216 break;
217
218 case UCNV_PRV_ESCAPE_XML_HEX:
219
220 valueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */
221 valueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */
222 valueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */
223 if(length==2){
224 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 0);
225 }
226 else{
227 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 0);
228 }
229 valueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
230 break;
231
232 case UCNV_PRV_ESCAPE_UNICODE:
233 valueString[valueStringLength++] = (UChar) UNICODE_LEFT_CURLY_CODEPOINT; /* adding { */
234 valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */
235 valueString[valueStringLength++] = (UChar) UNICODE_PLUS_CODEPOINT; /* adding + */
236 if (length == 2) {
237 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 4);
238 } else {
239 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 4);
240 }
241 valueString[valueStringLength++] = (UChar) UNICODE_RIGHT_CURLY_CODEPOINT; /* adding } */
242 break;
243
244 default:
245 while (i < length)
246 {
247 valueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
248 valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */
249 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4);
250 }
251 }
252 }
253 myValueSource = valueString;
254
255 /* reset the error */
256 *err = U_ZERO_ERROR;
257
258 ucnv_cbFromUWriteUChars(fromArgs, &myValueSource, myValueSource+valueStringLength, 0, err);
259
260 ucnv_setFromUCallBack (fromArgs->converter,
261 original,
262 originalContext,
263 &ignoredCallback,
264 &ignoredContext,
265 &err2);
266 if (U_FAILURE (err2))
267 {
268 *err = err2;
269 return;
270 }
271
272 return;
273 }
274
275
276
277 U_CAPI void U_EXPORT2
UCNV_TO_U_CALLBACK_SKIP(const void * context,UConverterToUnicodeArgs * toArgs,const char * codeUnits,int32_t length,UConverterCallbackReason reason,UErrorCode * err)278 UCNV_TO_U_CALLBACK_SKIP (
279 const void *context,
280 UConverterToUnicodeArgs *toArgs,
281 const char* codeUnits,
282 int32_t length,
283 UConverterCallbackReason reason,
284 UErrorCode * err)
285 {
286 if (reason <= UCNV_IRREGULAR)
287 {
288 if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
289 {
290 *err = U_ZERO_ERROR;
291 }
292 /* else the caller must have set the error code accordingly. */
293 }
294 /* else ignore the reset, close and clone calls. */
295 }
296
297 U_CAPI void U_EXPORT2
UCNV_TO_U_CALLBACK_SUBSTITUTE(const void * context,UConverterToUnicodeArgs * toArgs,const char * codeUnits,int32_t length,UConverterCallbackReason reason,UErrorCode * err)298 UCNV_TO_U_CALLBACK_SUBSTITUTE (
299 const void *context,
300 UConverterToUnicodeArgs *toArgs,
301 const char* codeUnits,
302 int32_t length,
303 UConverterCallbackReason reason,
304 UErrorCode * err)
305 {
306 if (reason <= UCNV_IRREGULAR)
307 {
308 if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
309 {
310 *err = U_ZERO_ERROR;
311 ucnv_cbToUWriteSub(toArgs,0,err);
312 }
313 /* else the caller must have set the error code accordingly. */
314 }
315 /* else ignore the reset, close and clone calls. */
316 }
317
318 /*uses uprv_itou to get a unicode escape sequence of the offensive sequence,
319 *and uses that as the substitution sequence
320 */
321 U_CAPI void U_EXPORT2
UCNV_TO_U_CALLBACK_ESCAPE(const void * context,UConverterToUnicodeArgs * toArgs,const char * codeUnits,int32_t length,UConverterCallbackReason reason,UErrorCode * err)322 UCNV_TO_U_CALLBACK_ESCAPE (
323 const void *context,
324 UConverterToUnicodeArgs *toArgs,
325 const char* codeUnits,
326 int32_t length,
327 UConverterCallbackReason reason,
328 UErrorCode * err)
329 {
330 UChar uniValueString[VALUE_STRING_LENGTH];
331 int32_t valueStringLength = 0;
332 int32_t i = 0;
333
334 if (reason > UCNV_IRREGULAR)
335 {
336 return;
337 }
338
339 if(context==NULL)
340 {
341 while (i < length)
342 {
343 uniValueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
344 uniValueString[valueStringLength++] = (UChar) UNICODE_X_CODEPOINT; /* adding X */
345 valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t) codeUnits[i++], 16, 2);
346 }
347 }
348 else
349 {
350 switch(*((char*)context))
351 {
352 case UCNV_PRV_ESCAPE_XML_DEC:
353 while (i < length)
354 {
355 uniValueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */
356 uniValueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */
357 valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 10, 0);
358 uniValueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
359 }
360 break;
361
362 case UCNV_PRV_ESCAPE_XML_HEX:
363 while (i < length)
364 {
365 uniValueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */
366 uniValueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */
367 uniValueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */
368 valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 16, 0);
369 uniValueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
370 }
371 break;
372 case UCNV_PRV_ESCAPE_C:
373 while (i < length)
374 {
375 uniValueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */
376 uniValueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */
377 valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 16, 2);
378 }
379 break;
380 default:
381 while (i < length)
382 {
383 uniValueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
384 uniValueString[valueStringLength++] = (UChar) UNICODE_X_CODEPOINT; /* adding X */
385 uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t) codeUnits[i++], 16, 2);
386 valueStringLength += 2;
387 }
388 }
389 }
390 /* reset the error */
391 *err = U_ZERO_ERROR;
392
393 ucnv_cbToUWriteUChars(toArgs, uniValueString, valueStringLength, 0, err);
394 }
395
396 #endif
397