• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *****************************************************************************
3  *
4  *   Copyright (C) 1998-2007, International Business Machines
5  *   Corporation and others.  All Rights Reserved.
6  *
7  *****************************************************************************
8  *
9  *  ucnv_err.c
10  *  Implements error behaviour functions called by T_UConverter_{from,to}Unicode
11  *
12  *
13 *   Change history:
14 *
15 *   06/29/2000  helena      Major rewrite of the callback APIs.
16 */
17 
18 #include "unicode/utypes.h"
19 
20 #if !UCONFIG_NO_CONVERSION
21 
22 #include "unicode/ucnv_err.h"
23 #include "unicode/ucnv_cb.h"
24 #include "ucnv_cnv.h"
25 #include "cmemory.h"
26 #include "unicode/ucnv.h"
27 #include "ustrfmt.h"
28 
29 #define VALUE_STRING_LENGTH 32
30 /*Magic # 32 = 4(number of char in value string) * 8(max number of bytes per char for any converter) */
31 #define UNICODE_PERCENT_SIGN_CODEPOINT  0x0025
32 #define UNICODE_U_CODEPOINT             0x0055
33 #define UNICODE_X_CODEPOINT             0x0058
34 #define UNICODE_RS_CODEPOINT            0x005C
35 #define UNICODE_U_LOW_CODEPOINT         0x0075
36 #define UNICODE_X_LOW_CODEPOINT         0x0078
37 #define UNICODE_AMP_CODEPOINT           0x0026
38 #define UNICODE_HASH_CODEPOINT          0x0023
39 #define UNICODE_SEMICOLON_CODEPOINT     0x003B
40 #define UNICODE_PLUS_CODEPOINT          0x002B
41 #define UNICODE_LEFT_CURLY_CODEPOINT    0x007B
42 #define UNICODE_RIGHT_CURLY_CODEPOINT   0x007D
43 #define UNICODE_SPACE_CODEPOINT         0x0020
44 #define UCNV_PRV_ESCAPE_ICU         0
45 #define UCNV_PRV_ESCAPE_C           'C'
46 #define UCNV_PRV_ESCAPE_XML_DEC     'D'
47 #define UCNV_PRV_ESCAPE_XML_HEX     'X'
48 #define UCNV_PRV_ESCAPE_JAVA        'J'
49 #define UCNV_PRV_ESCAPE_UNICODE     'U'
50 #define UCNV_PRV_ESCAPE_CSS2        'S'
51 #define UCNV_PRV_STOP_ON_ILLEGAL    'i'
52 
53 /*Function Pointer STOPS at the ILLEGAL_SEQUENCE */
54 U_CAPI void    U_EXPORT2
UCNV_FROM_U_CALLBACK_STOP(const void * context,UConverterFromUnicodeArgs * fromUArgs,const UChar * codeUnits,int32_t length,UChar32 codePoint,UConverterCallbackReason reason,UErrorCode * err)55 UCNV_FROM_U_CALLBACK_STOP (
56                   const void *context,
57                   UConverterFromUnicodeArgs *fromUArgs,
58                   const UChar* codeUnits,
59                   int32_t length,
60                   UChar32 codePoint,
61                   UConverterCallbackReason reason,
62                   UErrorCode * err)
63 {
64     /* the caller must have set the error code accordingly */
65     return;
66 }
67 
68 
69 /*Function Pointer STOPS at the ILLEGAL_SEQUENCE */
70 U_CAPI void    U_EXPORT2
UCNV_TO_U_CALLBACK_STOP(const void * context,UConverterToUnicodeArgs * toUArgs,const char * codePoints,int32_t length,UConverterCallbackReason reason,UErrorCode * err)71 UCNV_TO_U_CALLBACK_STOP (
72                    const void *context,
73                    UConverterToUnicodeArgs *toUArgs,
74                    const char* codePoints,
75                    int32_t length,
76                    UConverterCallbackReason reason,
77                    UErrorCode * err)
78 {
79     /* the caller must have set the error code accordingly */
80     return;
81 }
82 
83 U_CAPI void    U_EXPORT2
UCNV_FROM_U_CALLBACK_SKIP(const void * context,UConverterFromUnicodeArgs * fromUArgs,const UChar * codeUnits,int32_t length,UChar32 codePoint,UConverterCallbackReason reason,UErrorCode * err)84 UCNV_FROM_U_CALLBACK_SKIP (
85                   const void *context,
86                   UConverterFromUnicodeArgs *fromUArgs,
87                   const UChar* codeUnits,
88                   int32_t length,
89                   UChar32 codePoint,
90                   UConverterCallbackReason reason,
91                   UErrorCode * err)
92 {
93     if (reason <= UCNV_IRREGULAR)
94     {
95         if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
96         {
97             *err = U_ZERO_ERROR;
98         }
99         /* else the caller must have set the error code accordingly. */
100     }
101     /* else ignore the reset, close and clone calls. */
102 }
103 
104 U_CAPI void    U_EXPORT2
UCNV_FROM_U_CALLBACK_SUBSTITUTE(const void * context,UConverterFromUnicodeArgs * fromArgs,const UChar * codeUnits,int32_t length,UChar32 codePoint,UConverterCallbackReason reason,UErrorCode * err)105 UCNV_FROM_U_CALLBACK_SUBSTITUTE (
106                   const void *context,
107                   UConverterFromUnicodeArgs *fromArgs,
108                   const UChar* codeUnits,
109                   int32_t length,
110                   UChar32 codePoint,
111                   UConverterCallbackReason reason,
112                   UErrorCode * err)
113 {
114     if (reason <= UCNV_IRREGULAR)
115     {
116         if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
117         {
118             *err = U_ZERO_ERROR;
119             ucnv_cbFromUWriteSub(fromArgs, 0, err);
120         }
121         /* else the caller must have set the error code accordingly. */
122     }
123     /* else ignore the reset, close and clone calls. */
124 }
125 
126 /*uses uprv_itou to get a unicode escape sequence of the offensive sequence,
127  *uses a clean copy (resetted) of the converter, to convert that unicode
128  *escape sequence to the target codepage (if conversion failure happens then
129  *we revert to substituting with subchar)
130  */
131 U_CAPI void    U_EXPORT2
UCNV_FROM_U_CALLBACK_ESCAPE(const void * context,UConverterFromUnicodeArgs * fromArgs,const UChar * codeUnits,int32_t length,UChar32 codePoint,UConverterCallbackReason reason,UErrorCode * err)132 UCNV_FROM_U_CALLBACK_ESCAPE (
133                          const void *context,
134                          UConverterFromUnicodeArgs *fromArgs,
135                          const UChar *codeUnits,
136                          int32_t length,
137                          UChar32 codePoint,
138                          UConverterCallbackReason reason,
139                          UErrorCode * err)
140 {
141 
142   UChar valueString[VALUE_STRING_LENGTH];
143   int32_t valueStringLength = 0;
144   int32_t i = 0;
145 
146   const UChar *myValueSource = NULL;
147   UErrorCode err2 = U_ZERO_ERROR;
148   UConverterFromUCallback original = NULL;
149   const void *originalContext;
150 
151   UConverterFromUCallback ignoredCallback = NULL;
152   const void *ignoredContext;
153 
154   if (reason > UCNV_IRREGULAR)
155   {
156       return;
157   }
158 
159   ucnv_setFromUCallBack (fromArgs->converter,
160                      (UConverterFromUCallback) UCNV_FROM_U_CALLBACK_SUBSTITUTE,
161                      NULL,
162                      &original,
163                      &originalContext,
164                      &err2);
165 
166   if (U_FAILURE (err2))
167   {
168     *err = err2;
169     return;
170   }
171   if(context==NULL)
172   {
173       while (i < length)
174       {
175         valueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT;  /* adding % */
176         valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */
177         valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4);
178       }
179   }
180   else
181   {
182       switch(*((char*)context))
183       {
184       case UCNV_PRV_ESCAPE_JAVA:
185           while (i < length)
186           {
187               valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT;    /* adding \ */
188               valueString[valueStringLength++] = (UChar) UNICODE_U_LOW_CODEPOINT; /* adding u */
189               valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4);
190           }
191           break;
192 
193       case UCNV_PRV_ESCAPE_C:
194           valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT;    /* adding \ */
195 
196           if(length==2){
197               valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */
198               valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 8);
199 
200           }
201           else{
202               valueString[valueStringLength++] = (UChar) UNICODE_U_LOW_CODEPOINT; /* adding u */
203               valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 4);
204           }
205           break;
206 
207       case UCNV_PRV_ESCAPE_XML_DEC:
208 
209           valueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT;   /* adding & */
210           valueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT;  /* adding # */
211           if(length==2){
212               valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 10, 0);
213           }
214           else{
215               valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 10, 0);
216           }
217           valueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
218           break;
219 
220       case UCNV_PRV_ESCAPE_XML_HEX:
221 
222           valueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT;   /* adding & */
223           valueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT;  /* adding # */
224           valueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */
225           if(length==2){
226               valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 0);
227           }
228           else{
229               valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 0);
230           }
231           valueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
232           break;
233 
234       case UCNV_PRV_ESCAPE_UNICODE:
235           valueString[valueStringLength++] = (UChar) UNICODE_LEFT_CURLY_CODEPOINT;    /* adding { */
236           valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT;    /* adding U */
237           valueString[valueStringLength++] = (UChar) UNICODE_PLUS_CODEPOINT; /* adding + */
238           if (length == 2) {
239               valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 4);
240           } else {
241               valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 4);
242           }
243           valueString[valueStringLength++] = (UChar) UNICODE_RIGHT_CURLY_CODEPOINT;    /* adding } */
244           break;
245 
246       case UCNV_PRV_ESCAPE_CSS2:
247           valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT;    /* adding \ */
248           valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 0);
249           /* Always add space character, becase the next character might be whitespace,
250              which would erroneously be considered the termination of the escape sequence. */
251           valueString[valueStringLength++] = (UChar) UNICODE_SPACE_CODEPOINT;
252           break;
253 
254       default:
255           while (i < length)
256           {
257               valueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT;  /* adding % */
258               valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT;             /* adding U */
259               valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4);
260           }
261       }
262   }
263   myValueSource = valueString;
264 
265   /* reset the error */
266   *err = U_ZERO_ERROR;
267 
268   ucnv_cbFromUWriteUChars(fromArgs, &myValueSource, myValueSource+valueStringLength, 0, err);
269 
270   ucnv_setFromUCallBack (fromArgs->converter,
271                          original,
272                          originalContext,
273                          &ignoredCallback,
274                          &ignoredContext,
275                          &err2);
276   if (U_FAILURE (err2))
277   {
278       *err = err2;
279       return;
280   }
281 
282   return;
283 }
284 
285 
286 
287 U_CAPI void  U_EXPORT2
UCNV_TO_U_CALLBACK_SKIP(const void * context,UConverterToUnicodeArgs * toArgs,const char * codeUnits,int32_t length,UConverterCallbackReason reason,UErrorCode * err)288 UCNV_TO_U_CALLBACK_SKIP (
289                  const void *context,
290                  UConverterToUnicodeArgs *toArgs,
291                  const char* codeUnits,
292                  int32_t length,
293                  UConverterCallbackReason reason,
294                  UErrorCode * err)
295 {
296     if (reason <= UCNV_IRREGULAR)
297     {
298         if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
299         {
300             *err = U_ZERO_ERROR;
301         }
302         /* else the caller must have set the error code accordingly. */
303     }
304     /* else ignore the reset, close and clone calls. */
305 }
306 
307 U_CAPI void    U_EXPORT2
UCNV_TO_U_CALLBACK_SUBSTITUTE(const void * context,UConverterToUnicodeArgs * toArgs,const char * codeUnits,int32_t length,UConverterCallbackReason reason,UErrorCode * err)308 UCNV_TO_U_CALLBACK_SUBSTITUTE (
309                  const void *context,
310                  UConverterToUnicodeArgs *toArgs,
311                  const char* codeUnits,
312                  int32_t length,
313                  UConverterCallbackReason reason,
314                  UErrorCode * err)
315 {
316     if (reason <= UCNV_IRREGULAR)
317     {
318         if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
319         {
320             *err = U_ZERO_ERROR;
321             ucnv_cbToUWriteSub(toArgs,0,err);
322         }
323         /* else the caller must have set the error code accordingly. */
324     }
325     /* else ignore the reset, close and clone calls. */
326 }
327 
328 /*uses uprv_itou to get a unicode escape sequence of the offensive sequence,
329  *and uses that as the substitution sequence
330  */
331 U_CAPI void   U_EXPORT2
UCNV_TO_U_CALLBACK_ESCAPE(const void * context,UConverterToUnicodeArgs * toArgs,const char * codeUnits,int32_t length,UConverterCallbackReason reason,UErrorCode * err)332 UCNV_TO_U_CALLBACK_ESCAPE (
333                  const void *context,
334                  UConverterToUnicodeArgs *toArgs,
335                  const char* codeUnits,
336                  int32_t length,
337                  UConverterCallbackReason reason,
338                  UErrorCode * err)
339 {
340     UChar uniValueString[VALUE_STRING_LENGTH];
341     int32_t valueStringLength = 0;
342     int32_t i = 0;
343 
344     if (reason > UCNV_IRREGULAR)
345     {
346         return;
347     }
348 
349     if(context==NULL)
350     {
351         while (i < length)
352         {
353             uniValueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
354             uniValueString[valueStringLength++] = (UChar) UNICODE_X_CODEPOINT;    /* adding X */
355             valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t) codeUnits[i++], 16, 2);
356         }
357     }
358     else
359     {
360         switch(*((char*)context))
361         {
362         case UCNV_PRV_ESCAPE_XML_DEC:
363             while (i < length)
364             {
365                 uniValueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT;   /* adding & */
366                 uniValueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT;  /* adding # */
367                 valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 10, 0);
368                 uniValueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
369             }
370             break;
371 
372         case UCNV_PRV_ESCAPE_XML_HEX:
373             while (i < length)
374             {
375                 uniValueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT;   /* adding & */
376                 uniValueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT;  /* adding # */
377                 uniValueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */
378                 valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 16, 0);
379                 uniValueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
380             }
381             break;
382         case UCNV_PRV_ESCAPE_C:
383             while (i < length)
384             {
385                 uniValueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT;    /* adding \ */
386                 uniValueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */
387                 valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 16, 2);
388             }
389             break;
390         default:
391             while (i < length)
392             {
393                 uniValueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
394                 uniValueString[valueStringLength++] = (UChar) UNICODE_X_CODEPOINT;    /* adding X */
395                 uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t) codeUnits[i++], 16, 2);
396                 valueStringLength += 2;
397             }
398         }
399     }
400     /* reset the error */
401     *err = U_ZERO_ERROR;
402 
403     ucnv_cbToUWriteUChars(toArgs, uniValueString, valueStringLength, 0, err);
404 }
405 
406 #endif
407