• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *****************************************************************************
3  *
4  *   Copyright (C) 1998-2007, International Business Machines
5  *   Corporation and others.  All Rights Reserved.
6  *
7  *****************************************************************************
8  *
9  *  ucnv_err.c
10  *  Implements error behaviour functions called by T_UConverter_{from,to}Unicode
11  *
12  *
13 *   Change history:
14 *
15 *   06/29/2000  helena      Major rewrite of the callback APIs.
16 */
17 
18 #include "unicode/utypes.h"
19 
20 #if !UCONFIG_NO_CONVERSION
21 
22 #include "unicode/ucnv_err.h"
23 #include "unicode/ucnv_cb.h"
24 #include "ucnv_cnv.h"
25 #include "cmemory.h"
26 #include "unicode/ucnv.h"
27 #include "ustrfmt.h"
28 
29 #define VALUE_STRING_LENGTH 32
30 /*Magic # 32 = 4(number of char in value string) * 8(max number of bytes per char for any converter) */
31 #define UNICODE_PERCENT_SIGN_CODEPOINT  0x0025
32 #define UNICODE_U_CODEPOINT             0x0055
33 #define UNICODE_X_CODEPOINT             0x0058
34 #define UNICODE_RS_CODEPOINT            0x005C
35 #define UNICODE_U_LOW_CODEPOINT         0x0075
36 #define UNICODE_X_LOW_CODEPOINT         0x0078
37 #define UNICODE_AMP_CODEPOINT           0x0026
38 #define UNICODE_HASH_CODEPOINT          0x0023
39 #define UNICODE_SEMICOLON_CODEPOINT     0x003B
40 #define UNICODE_PLUS_CODEPOINT          0x002B
41 #define UNICODE_LEFT_CURLY_CODEPOINT    0x007B
42 #define UNICODE_RIGHT_CURLY_CODEPOINT   0x007D
43 #define UCNV_PRV_ESCAPE_ICU         0
44 #define UCNV_PRV_ESCAPE_C           'C'
45 #define UCNV_PRV_ESCAPE_XML_DEC     'D'
46 #define UCNV_PRV_ESCAPE_XML_HEX     'X'
47 #define UCNV_PRV_ESCAPE_JAVA        'J'
48 #define UCNV_PRV_ESCAPE_UNICODE     'U'
49 #define UCNV_PRV_STOP_ON_ILLEGAL    'i'
50 
51 /*Function Pointer STOPS at the ILLEGAL_SEQUENCE */
52 U_CAPI void    U_EXPORT2
UCNV_FROM_U_CALLBACK_STOP(const void * context,UConverterFromUnicodeArgs * fromUArgs,const UChar * codeUnits,int32_t length,UChar32 codePoint,UConverterCallbackReason reason,UErrorCode * err)53 UCNV_FROM_U_CALLBACK_STOP (
54                   const void *context,
55                   UConverterFromUnicodeArgs *fromUArgs,
56                   const UChar* codeUnits,
57                   int32_t length,
58                   UChar32 codePoint,
59                   UConverterCallbackReason reason,
60                   UErrorCode * err)
61 {
62     /* the caller must have set the error code accordingly */
63     return;
64 }
65 
66 
67 /*Function Pointer STOPS at the ILLEGAL_SEQUENCE */
68 U_CAPI void    U_EXPORT2
UCNV_TO_U_CALLBACK_STOP(const void * context,UConverterToUnicodeArgs * toUArgs,const char * codePoints,int32_t length,UConverterCallbackReason reason,UErrorCode * err)69 UCNV_TO_U_CALLBACK_STOP (
70                    const void *context,
71                    UConverterToUnicodeArgs *toUArgs,
72                    const char* codePoints,
73                    int32_t length,
74                    UConverterCallbackReason reason,
75                    UErrorCode * err)
76 {
77     /* the caller must have set the error code accordingly */
78     return;
79 }
80 
81 U_CAPI void    U_EXPORT2
UCNV_FROM_U_CALLBACK_SKIP(const void * context,UConverterFromUnicodeArgs * fromUArgs,const UChar * codeUnits,int32_t length,UChar32 codePoint,UConverterCallbackReason reason,UErrorCode * err)82 UCNV_FROM_U_CALLBACK_SKIP (
83                   const void *context,
84                   UConverterFromUnicodeArgs *fromUArgs,
85                   const UChar* codeUnits,
86                   int32_t length,
87                   UChar32 codePoint,
88                   UConverterCallbackReason reason,
89                   UErrorCode * err)
90 {
91     if (reason <= UCNV_IRREGULAR)
92     {
93         if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
94         {
95             *err = U_ZERO_ERROR;
96         }
97         /* else the caller must have set the error code accordingly. */
98     }
99     /* else ignore the reset, close and clone calls. */
100 }
101 
102 U_CAPI void    U_EXPORT2
UCNV_FROM_U_CALLBACK_SUBSTITUTE(const void * context,UConverterFromUnicodeArgs * fromArgs,const UChar * codeUnits,int32_t length,UChar32 codePoint,UConverterCallbackReason reason,UErrorCode * err)103 UCNV_FROM_U_CALLBACK_SUBSTITUTE (
104                   const void *context,
105                   UConverterFromUnicodeArgs *fromArgs,
106                   const UChar* codeUnits,
107                   int32_t length,
108                   UChar32 codePoint,
109                   UConverterCallbackReason reason,
110                   UErrorCode * err)
111 {
112     if (reason <= UCNV_IRREGULAR)
113     {
114         if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
115         {
116             *err = U_ZERO_ERROR;
117             ucnv_cbFromUWriteSub(fromArgs, 0, err);
118         }
119         /* else the caller must have set the error code accordingly. */
120     }
121     /* else ignore the reset, close and clone calls. */
122 }
123 
124 /*uses uprv_itou to get a unicode escape sequence of the offensive sequence,
125  *uses a clean copy (resetted) of the converter, to convert that unicode
126  *escape sequence to the target codepage (if conversion failure happens then
127  *we revert to substituting with subchar)
128  */
129 U_CAPI void    U_EXPORT2
UCNV_FROM_U_CALLBACK_ESCAPE(const void * context,UConverterFromUnicodeArgs * fromArgs,const UChar * codeUnits,int32_t length,UChar32 codePoint,UConverterCallbackReason reason,UErrorCode * err)130 UCNV_FROM_U_CALLBACK_ESCAPE (
131                          const void *context,
132                          UConverterFromUnicodeArgs *fromArgs,
133                          const UChar *codeUnits,
134                          int32_t length,
135                          UChar32 codePoint,
136                          UConverterCallbackReason reason,
137                          UErrorCode * err)
138 {
139 
140   UChar valueString[VALUE_STRING_LENGTH];
141   int32_t valueStringLength = 0;
142   int32_t i = 0;
143 
144   const UChar *myValueSource = NULL;
145   UErrorCode err2 = U_ZERO_ERROR;
146   UConverterFromUCallback original = NULL;
147   const void *originalContext;
148 
149   UConverterFromUCallback ignoredCallback = NULL;
150   const void *ignoredContext;
151 
152   if (reason > UCNV_IRREGULAR)
153   {
154       return;
155   }
156 
157   ucnv_setFromUCallBack (fromArgs->converter,
158                      (UConverterFromUCallback) UCNV_FROM_U_CALLBACK_SUBSTITUTE,
159                      NULL,
160                      &original,
161                      &originalContext,
162                      &err2);
163 
164   if (U_FAILURE (err2))
165   {
166     *err = err2;
167     return;
168   }
169   if(context==NULL)
170   {
171       while (i < length)
172       {
173         valueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT;  /* adding % */
174         valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */
175         valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4);
176       }
177   }
178   else
179   {
180       switch(*((char*)context))
181       {
182       case UCNV_PRV_ESCAPE_JAVA:
183           while (i < length)
184           {
185               valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT;    /* adding \ */
186               valueString[valueStringLength++] = (UChar) UNICODE_U_LOW_CODEPOINT; /* adding u */
187               valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4);
188           }
189           break;
190 
191       case UCNV_PRV_ESCAPE_C:
192           valueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT;    /* adding \ */
193 
194           if(length==2){
195               valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT; /* adding U */
196               valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 8);
197 
198           }
199           else{
200               valueString[valueStringLength++] = (UChar) UNICODE_U_LOW_CODEPOINT; /* adding u */
201               valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 4);
202           }
203           break;
204 
205       case UCNV_PRV_ESCAPE_XML_DEC:
206 
207           valueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT;   /* adding & */
208           valueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT;  /* adding # */
209           if(length==2){
210               valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 10, 0);
211           }
212           else{
213               valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 10, 0);
214           }
215           valueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
216           break;
217 
218       case UCNV_PRV_ESCAPE_XML_HEX:
219 
220           valueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT;   /* adding & */
221           valueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT;  /* adding # */
222           valueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */
223           if(length==2){
224               valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 0);
225           }
226           else{
227               valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 0);
228           }
229           valueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
230           break;
231 
232       case UCNV_PRV_ESCAPE_UNICODE:
233           valueString[valueStringLength++] = (UChar) UNICODE_LEFT_CURLY_CODEPOINT;    /* adding { */
234           valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT;    /* adding U */
235           valueString[valueStringLength++] = (UChar) UNICODE_PLUS_CODEPOINT; /* adding + */
236           if (length == 2) {
237               valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 4);
238           } else {
239               valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 4);
240           }
241           valueString[valueStringLength++] = (UChar) UNICODE_RIGHT_CURLY_CODEPOINT;    /* adding } */
242           break;
243 
244       default:
245           while (i < length)
246           {
247               valueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT;  /* adding % */
248               valueString[valueStringLength++] = (UChar) UNICODE_U_CODEPOINT;             /* adding U */
249               valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4);
250           }
251       }
252   }
253   myValueSource = valueString;
254 
255   /* reset the error */
256   *err = U_ZERO_ERROR;
257 
258   ucnv_cbFromUWriteUChars(fromArgs, &myValueSource, myValueSource+valueStringLength, 0, err);
259 
260   ucnv_setFromUCallBack (fromArgs->converter,
261                          original,
262                          originalContext,
263                          &ignoredCallback,
264                          &ignoredContext,
265                          &err2);
266   if (U_FAILURE (err2))
267   {
268       *err = err2;
269       return;
270   }
271 
272   return;
273 }
274 
275 
276 
277 U_CAPI void  U_EXPORT2
UCNV_TO_U_CALLBACK_SKIP(const void * context,UConverterToUnicodeArgs * toArgs,const char * codeUnits,int32_t length,UConverterCallbackReason reason,UErrorCode * err)278 UCNV_TO_U_CALLBACK_SKIP (
279                  const void *context,
280                  UConverterToUnicodeArgs *toArgs,
281                  const char* codeUnits,
282                  int32_t length,
283                  UConverterCallbackReason reason,
284                  UErrorCode * err)
285 {
286     if (reason <= UCNV_IRREGULAR)
287     {
288         if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
289         {
290             *err = U_ZERO_ERROR;
291         }
292         /* else the caller must have set the error code accordingly. */
293     }
294     /* else ignore the reset, close and clone calls. */
295 }
296 
297 U_CAPI void    U_EXPORT2
UCNV_TO_U_CALLBACK_SUBSTITUTE(const void * context,UConverterToUnicodeArgs * toArgs,const char * codeUnits,int32_t length,UConverterCallbackReason reason,UErrorCode * err)298 UCNV_TO_U_CALLBACK_SUBSTITUTE (
299                  const void *context,
300                  UConverterToUnicodeArgs *toArgs,
301                  const char* codeUnits,
302                  int32_t length,
303                  UConverterCallbackReason reason,
304                  UErrorCode * err)
305 {
306     if (reason <= UCNV_IRREGULAR)
307     {
308         if (context == NULL || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
309         {
310             *err = U_ZERO_ERROR;
311             ucnv_cbToUWriteSub(toArgs,0,err);
312         }
313         /* else the caller must have set the error code accordingly. */
314     }
315     /* else ignore the reset, close and clone calls. */
316 }
317 
318 /*uses uprv_itou to get a unicode escape sequence of the offensive sequence,
319  *and uses that as the substitution sequence
320  */
321 U_CAPI void   U_EXPORT2
UCNV_TO_U_CALLBACK_ESCAPE(const void * context,UConverterToUnicodeArgs * toArgs,const char * codeUnits,int32_t length,UConverterCallbackReason reason,UErrorCode * err)322 UCNV_TO_U_CALLBACK_ESCAPE (
323                  const void *context,
324                  UConverterToUnicodeArgs *toArgs,
325                  const char* codeUnits,
326                  int32_t length,
327                  UConverterCallbackReason reason,
328                  UErrorCode * err)
329 {
330     UChar uniValueString[VALUE_STRING_LENGTH];
331     int32_t valueStringLength = 0;
332     int32_t i = 0;
333 
334     if (reason > UCNV_IRREGULAR)
335     {
336         return;
337     }
338 
339     if(context==NULL)
340     {
341         while (i < length)
342         {
343             uniValueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
344             uniValueString[valueStringLength++] = (UChar) UNICODE_X_CODEPOINT;    /* adding X */
345             valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t) codeUnits[i++], 16, 2);
346         }
347     }
348     else
349     {
350         switch(*((char*)context))
351         {
352         case UCNV_PRV_ESCAPE_XML_DEC:
353             while (i < length)
354             {
355                 uniValueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT;   /* adding & */
356                 uniValueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT;  /* adding # */
357                 valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 10, 0);
358                 uniValueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
359             }
360             break;
361 
362         case UCNV_PRV_ESCAPE_XML_HEX:
363             while (i < length)
364             {
365                 uniValueString[valueStringLength++] = (UChar) UNICODE_AMP_CODEPOINT;   /* adding & */
366                 uniValueString[valueStringLength++] = (UChar) UNICODE_HASH_CODEPOINT;  /* adding # */
367                 uniValueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */
368                 valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 16, 0);
369                 uniValueString[valueStringLength++] = (UChar) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
370             }
371             break;
372         case UCNV_PRV_ESCAPE_C:
373             while (i < length)
374             {
375                 uniValueString[valueStringLength++] = (UChar) UNICODE_RS_CODEPOINT;    /* adding \ */
376                 uniValueString[valueStringLength++] = (UChar) UNICODE_X_LOW_CODEPOINT; /* adding x */
377                 valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 16, 2);
378             }
379             break;
380         default:
381             while (i < length)
382             {
383                 uniValueString[valueStringLength++] = (UChar) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
384                 uniValueString[valueStringLength++] = (UChar) UNICODE_X_CODEPOINT;    /* adding X */
385                 uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t) codeUnits[i++], 16, 2);
386                 valueStringLength += 2;
387             }
388         }
389     }
390     /* reset the error */
391     *err = U_ZERO_ERROR;
392 
393     ucnv_cbToUWriteUChars(toArgs, uniValueString, valueStringLength, 0, err);
394 }
395 
396 #endif
397