1 /*
2 *****************************************************************************
3 *
4 * Copyright (C) 1998-2007, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *****************************************************************************
8 *
9 * ucnv_err.c
10 * Implements error behaviour functions called by T_UConverter_{from,to}Unicode
11 *
12 *
13 * Change history:
14 *
15 * 06/29/2000 helena Major rewrite of the callback APIs.
16 */
17
18 #include "unicode/utypes.h"
19
20 #if !UCONFIG_NO_CONVERSION
21
22 #include "unicode/ucnv_err.h"
23 #include "unicode/ucnv_cb.h"
24 #include "ucnv_cnv.h"
25 #include "cmemory.h"
26 #include "unicode/ucnv.h"
27 #include "ustrfmt.h"
28
29 #define VALUE_STRING_LENGTH 32
30 /*Magic # 32 = 4(number of char in value string) * 8(max number of bytes per char for any converter) */
31 #define UNICODE_PERCENT_SIGN_CODEPOINT 0x0025
32 #define UNICODE_U_CODEPOINT 0x0055
33 #define UNICODE_X_CODEPOINT 0x0058
34 #define UNICODE_RS_CODEPOINT 0x005C
35 #define UNICODE_U_LOW_CODEPOINT 0x0075
36 #define UNICODE_X_LOW_CODEPOINT 0x0078
37 #define UNICODE_AMP_CODEPOINT 0x0026
38 #define UNICODE_HASH_CODEPOINT 0x0023
39 #define UNICODE_SEMICOLON_CODEPOINT 0x003B
40 #define UNICODE_PLUS_CODEPOINT 0x002B
41 #define UNICODE_LEFT_CURLY_CODEPOINT 0x007B
42 #define UNICODE_RIGHT_CURLY_CODEPOINT 0x007D
43 #define UNICODE_SPACE_CODEPOINT 0x0020
44 #define UCNV_PRV_ESCAPE_ICU 0
45 #define UCNV_PRV_ESCAPE_C 'C'
46 #define UCNV_PRV_ESCAPE_XML_DEC 'D'
47 #define UCNV_PRV_ESCAPE_XML_HEX 'X'
48 #define UCNV_PRV_ESCAPE_JAVA 'J'
49 #define UCNV_PRV_ESCAPE_UNICODE 'U'
50 #define UCNV_PRV_ESCAPE_CSS2 'S'
51 #define UCNV_PRV_STOP_ON_ILLEGAL 'i'
52
53 /*Function Pointer STOPS at the ILLEGAL_SEQUENCE */
54 U_CAPI void U_EXPORT2
UCNV_FROM_U_CALLBACK_STOP(const void * context,UConverterFromUnicodeArgs * fromUArgs,const UChar * codeUnits,int32_t length,UChar32 codePoint,UConverterCallbackReason reason,UErrorCode * err)55 UCNV_FROM_U_CALLBACK_STOP (
56 const void *context,
57 UConverterFromUnicodeArgs *fromUArgs,
58 const UChar* codeUnits,
59 int32_t length,
60 UChar32 codePoint,
61 UConverterCallbackReason reason,
62 UErrorCode * err)
63 {
64 /* the caller must have set the error code accordingly */
65 return;
66 }
67
68
69 /*Function Pointer STOPS at the ILLEGAL_SEQUENCE */
70 U_CAPI void U_EXPORT2
UCNV_TO_U_CALLBACK_STOP(const void * context,UConverterToUnicodeArgs * toUArgs,const char * codePoints,int32_t length,UConverterCallbackReason reason,UErrorCode * err)71 UCNV_TO_U_CALLBACK_STOP (
72 const void *context,
73 UConverterToUnicodeArgs *toUArgs,
74 const char* codePoints,
75 int32_t length,
76 UConverterCallbackReason reason,
77 UErrorCode * err)
78 {
79 /* the caller must have set the error code accordingly */
80 return;
81 }
82
83 U_CAPI void U_EXPORT2
UCNV_FROM_U_CALLBACK_SKIP(const void * context,UConverterFromUnicodeArgs * fromUArgs,const UChar * codeUnits,int32_t length,UChar32 codePoint,UConverterCallbackReason reason,UErrorCode * err)84 UCNV_FROM_U_CALLBACK_SKIP (
85 const void *context,
86 UConverterFromUnicodeArgs *fromUArgs,
87 const UChar* codeUnits,
88 int32_t length,
89 UChar32 codePoint,
90 UConverterCallbackReason reason,
91 UErrorCode * err)
92 {
93 if (reason <= UCNV_IRREGULAR)
94 {
95 if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
96 {
97 *err = U_ZERO_ERROR;
98 }
99 /* else the caller must have set the error code accordingly. */
100 }
101 /* else ignore the reset, close and clone calls. */
102 }
103
104 U_CAPI void U_EXPORT2
UCNV_FROM_U_CALLBACK_SUBSTITUTE(const void * context,UConverterFromUnicodeArgs * fromArgs,const UChar * codeUnits,int32_t length,UChar32 codePoint,UConverterCallbackReason reason,UErrorCode * err)105 UCNV_FROM_U_CALLBACK_SUBSTITUTE (
106 const void *context,
107 UConverterFromUnicodeArgs *fromArgs,
108 const UChar* codeUnits,
109 int32_t length,
110 UChar32 codePoint,
111 UConverterCallbackReason reason,
112 UErrorCode * err)
113 {
114 if (reason <= UCNV_IRREGULAR)
115 {
116 if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
117 {
118 *err = U_ZERO_ERROR;
119 ucnv_cbFromUWriteSub(fromArgs, 0, err);
120 }
121 /* else the caller must have set the error code accordingly. */
122 }
123 /* else ignore the reset, close and clone calls. */
124 }
125
126 /*uses uprv_itou to get a unicode escape sequence of the offensive sequence,
127 *uses a clean copy (resetted) of the converter, to convert that unicode
128 *escape sequence to the target codepage (if conversion failure happens then
129 *we revert to substituting with subchar)
130 */
131 U_CAPI void U_EXPORT2
UCNV_FROM_U_CALLBACK_ESCAPE(const void * context,UConverterFromUnicodeArgs * fromArgs,const UChar * codeUnits,int32_t length,UChar32 codePoint,UConverterCallbackReason reason,UErrorCode * err)132 UCNV_FROM_U_CALLBACK_ESCAPE (
133 const void *context,
134 UConverterFromUnicodeArgs *fromArgs,
135 const UChar *codeUnits,
136 int32_t length,
137 UChar32 codePoint,
138 UConverterCallbackReason reason,
139 UErrorCode * err)
140 {
141
142 UChar valueString[VALUE_STRING_LENGTH];
143 int32_t valueStringLength = 0;
144 int32_t i = 0;
145
146 const UChar *myValueSource = NULL;
147 UErrorCode err2 = U_ZERO_ERROR;
148 UConverterFromUCallback original = NULL;
149 const void *originalContext;
150
151 UConverterFromUCallback ignoredCallback = NULL;
152 const void *ignoredContext;
153
154 if (reason > UCNV_IRREGULAR)
155 {
156 return;
157 }
158
159 ucnv_setFromUCallBack (fromArgs->converter,
160 (UConverterFromUCallback) UCNV_FROM_U_CALLBACK_SUBSTITUTE,
161 NULL,
162 &original,
163 &originalContext,
164 &err2);
165
166 if (U_FAILURE (err2))
167 {
168 *err = err2;
169 return;
170 }
171 if(context==NULL)
172 {
173 while (i < length)
174 {
175 valueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
176 valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */
177 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4);
178 }
179 }
180 else
181 {
182 switch(*((char*)context))
183 {
184 case UCNV_PRV_ESCAPE_JAVA:
185 while (i < length)
186 {
187 valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */
188 valueString[valueStringLength++] = (UChar) UNICODE_U_LOW_CODEPOINT; /* adding u */
189 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4);
190 }
191 break;
192
193 case UCNV_PRV_ESCAPE_C:
194 valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */
195
196 if(length==2){
197 valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */
198 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 8);
199
200 }
201 else{
202 valueString[valueStringLength++] = (UChar) UNICODE_U_LOW_CODEPOINT; /* adding u */
203 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 4);
204 }
205 break;
206
207 case UCNV_PRV_ESCAPE_XML_DEC:
208
209 valueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */
210 valueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */
211 if(length==2){
212 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 10, 0);
213 }
214 else{
215 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 10, 0);
216 }
217 valueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
218 break;
219
220 case UCNV_PRV_ESCAPE_XML_HEX:
221
222 valueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */
223 valueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */
224 valueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */
225 if(length==2){
226 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 0);
227 }
228 else{
229 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 0);
230 }
231 valueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
232 break;
233
234 case UCNV_PRV_ESCAPE_UNICODE:
235 valueString[valueStringLength++] = (UChar) UNICODE_LEFT_CURLY_CODEPOINT; /* adding { */
236 valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */
237 valueString[valueStringLength++] = (UChar) UNICODE_PLUS_CODEPOINT; /* adding + */
238 if (length == 2) {
239 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 4);
240 } else {
241 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 4);
242 }
243 valueString[valueStringLength++] = (UChar) UNICODE_RIGHT_CURLY_CODEPOINT; /* adding } */
244 break;
245
246 case UCNV_PRV_ESCAPE_CSS2:
247 valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */
248 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 0);
249 /* Always add space character, becase the next character might be whitespace,
250 which would erroneously be considered the termination of the escape sequence. */
251 valueString[valueStringLength++] = (UChar) UNICODE_SPACE_CODEPOINT;
252 break;
253
254 default:
255 while (i < length)
256 {
257 valueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
258 valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */
259 valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4);
260 }
261 }
262 }
263 myValueSource = valueString;
264
265 /* reset the error */
266 *err = U_ZERO_ERROR;
267
268 ucnv_cbFromUWriteUChars(fromArgs, &myValueSource, myValueSource+valueStringLength, 0, err);
269
270 ucnv_setFromUCallBack (fromArgs->converter,
271 original,
272 originalContext,
273 &ignoredCallback,
274 &ignoredContext,
275 &err2);
276 if (U_FAILURE (err2))
277 {
278 *err = err2;
279 return;
280 }
281
282 return;
283 }
284
285
286
287 U_CAPI void U_EXPORT2
UCNV_TO_U_CALLBACK_SKIP(const void * context,UConverterToUnicodeArgs * toArgs,const char * codeUnits,int32_t length,UConverterCallbackReason reason,UErrorCode * err)288 UCNV_TO_U_CALLBACK_SKIP (
289 const void *context,
290 UConverterToUnicodeArgs *toArgs,
291 const char* codeUnits,
292 int32_t length,
293 UConverterCallbackReason reason,
294 UErrorCode * err)
295 {
296 if (reason <= UCNV_IRREGULAR)
297 {
298 if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
299 {
300 *err = U_ZERO_ERROR;
301 }
302 /* else the caller must have set the error code accordingly. */
303 }
304 /* else ignore the reset, close and clone calls. */
305 }
306
307 U_CAPI void U_EXPORT2
UCNV_TO_U_CALLBACK_SUBSTITUTE(const void * context,UConverterToUnicodeArgs * toArgs,const char * codeUnits,int32_t length,UConverterCallbackReason reason,UErrorCode * err)308 UCNV_TO_U_CALLBACK_SUBSTITUTE (
309 const void *context,
310 UConverterToUnicodeArgs *toArgs,
311 const char* codeUnits,
312 int32_t length,
313 UConverterCallbackReason reason,
314 UErrorCode * err)
315 {
316 if (reason <= UCNV_IRREGULAR)
317 {
318 if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
319 {
320 *err = U_ZERO_ERROR;
321 ucnv_cbToUWriteSub(toArgs,0,err);
322 }
323 /* else the caller must have set the error code accordingly. */
324 }
325 /* else ignore the reset, close and clone calls. */
326 }
327
328 /*uses uprv_itou to get a unicode escape sequence of the offensive sequence,
329 *and uses that as the substitution sequence
330 */
331 U_CAPI void U_EXPORT2
UCNV_TO_U_CALLBACK_ESCAPE(const void * context,UConverterToUnicodeArgs * toArgs,const char * codeUnits,int32_t length,UConverterCallbackReason reason,UErrorCode * err)332 UCNV_TO_U_CALLBACK_ESCAPE (
333 const void *context,
334 UConverterToUnicodeArgs *toArgs,
335 const char* codeUnits,
336 int32_t length,
337 UConverterCallbackReason reason,
338 UErrorCode * err)
339 {
340 UChar uniValueString[VALUE_STRING_LENGTH];
341 int32_t valueStringLength = 0;
342 int32_t i = 0;
343
344 if (reason > UCNV_IRREGULAR)
345 {
346 return;
347 }
348
349 if(context==NULL)
350 {
351 while (i < length)
352 {
353 uniValueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
354 uniValueString[valueStringLength++] = (UChar) UNICODE_X_CODEPOINT; /* adding X */
355 valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t) codeUnits[i++], 16, 2);
356 }
357 }
358 else
359 {
360 switch(*((char*)context))
361 {
362 case UCNV_PRV_ESCAPE_XML_DEC:
363 while (i < length)
364 {
365 uniValueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */
366 uniValueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */
367 valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 10, 0);
368 uniValueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
369 }
370 break;
371
372 case UCNV_PRV_ESCAPE_XML_HEX:
373 while (i < length)
374 {
375 uniValueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT; /* adding & */
376 uniValueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT; /* adding # */
377 uniValueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */
378 valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 16, 0);
379 uniValueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
380 }
381 break;
382 case UCNV_PRV_ESCAPE_C:
383 while (i < length)
384 {
385 uniValueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT; /* adding \ */
386 uniValueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */
387 valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 16, 2);
388 }
389 break;
390 default:
391 while (i < length)
392 {
393 uniValueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
394 uniValueString[valueStringLength++] = (UChar) UNICODE_X_CODEPOINT; /* adding X */
395 uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t) codeUnits[i++], 16, 2);
396 valueStringLength += 2;
397 }
398 }
399 }
400 /* reset the error */
401 *err = U_ZERO_ERROR;
402
403 ucnv_cbToUWriteUChars(toArgs, uniValueString, valueStringLength, 0, err);
404 }
405
406 #endif
407