• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**************************************************************************
2 *
3 *   Copyright (C) 2000-2010, International Business Machines
4 *   Corporation and others.  All Rights Reserved.
5 *
6 ***************************************************************************
7 *   file name:  convsamp.c
8 *   encoding:   ASCII (7-bit)
9 *
10 *   created on: 2000may30
11 *   created by: Steven R. Loomis
12 *
13 *   Sample code for the ICU conversion routines.
14 *
15 * Note: Nothing special is needed to build this sample. Link with
16 *       the icu UC and icu I18N libraries.
17 *
18 *       I use 'assert' for error checking, you probably will want
19 *       something more flexible.  '***BEGIN SAMPLE***' and
20 *       '***END SAMPLE***' mark pieces suitable for stand alone
21 *       code snippets.
22 *
23 *
24 *  Each test can define it's own BUFFERSIZE
25 *
26 */
27 
28 #define DEBUG_TMI 0  /* define to 1 to enable Too Much Information */
29 
30 #include <stdio.h>
31 #include <ctype.h>            /* for isspace, etc.    */
32 #include <assert.h>
33 #include <string.h>
34 #include <stdlib.h>  /* malloc */
35 
36 #include "unicode/utypes.h"   /* Basic ICU data types */
37 #include "unicode/ucnv.h"     /* C   Converter API    */
38 #include "unicode/ustring.h"  /* some more string fcns*/
39 #include "unicode/uchar.h"    /* char names           */
40 #include "unicode/uloc.h"
41 #include "unicode/unistr.h"
42 
43 #include "flagcb.h"
44 
45 /* Some utility functions */
46 
47 static const UChar kNone[] = { 0x0000 };
48 
49 #define U_ASSERT(x)  { if(U_FAILURE(x)) {fflush(stdout);fflush(stderr); fprintf(stderr, #x " == %s\n", u_errorName(x)); assert(U_SUCCESS(x)); }}
50 
51 /* Print a UChar if possible, in seven characters. */
prettyPrintUChar(UChar c)52 void prettyPrintUChar(UChar c)
53 {
54   if(  (c <= 0x007F) &&
55        (isgraph(c))  ) {
56     printf(" '%c'   ", (char)(0x00FF&c));
57   } else if ( c > 0x007F ) {
58     char buf[1000];
59     UErrorCode status = U_ZERO_ERROR;
60     int32_t o;
61 
62     o = u_charName(c, U_UNICODE_CHAR_NAME, buf, 1000, &status);
63     if(U_SUCCESS(status) && (o>0) ) {
64       buf[6] = 0;
65       printf("%7s", buf);
66     } else {
67       o = u_charName(c, U_UNICODE_10_CHAR_NAME, buf, 1000, &status);
68       if(U_SUCCESS(status) && (o>0)) {
69         buf[5] = 0;
70         printf("~%6s", buf);
71       }
72       else {
73         printf(" ??????");
74       }
75     }
76   } else {
77     switch((char)(c & 0x007F)) {
78     case ' ':
79       printf(" ' '   ");
80       break;
81     case '\t':
82       printf(" \\t    ");
83       break;
84     case '\n':
85       printf(" \\n    ");
86       break;
87     default:
88       printf("  _    ");
89       break;
90     }
91   }
92 }
93 
94 
printUChars(const char * name="?",const UChar * uch=kNone,int32_t len=-1)95 void printUChars(const char  *name = "?",
96                  const UChar *uch  = kNone,
97                  int32_t     len   = -1 )
98 {
99   int32_t i;
100 
101   if( (len == -1) && (uch) ) {
102     len = u_strlen(uch);
103   }
104 
105   printf("%5s: ", name);
106   for( i = 0; i <len; i++) {
107     printf("%-6d ", i);
108   }
109   printf("\n");
110 
111   printf("%5s: ", "uni");
112   for( i = 0; i <len; i++) {
113     printf("\\u%04X ", (int)uch[i]);
114   }
115   printf("\n");
116 
117   printf("%5s:", "ch");
118   for( i = 0; i <len; i++) {
119     prettyPrintUChar(uch[i]);
120   }
121   printf("\n");
122 }
123 
printBytes(const char * name="?",const char * uch="",int32_t len=-1)124 void printBytes(const char  *name = "?",
125                  const char *uch  = "",
126                  int32_t     len   = -1 )
127 {
128   int32_t i;
129 
130   if( (len == -1) && (uch) ) {
131     len = strlen(uch);
132   }
133 
134   printf("%5s: ", name);
135   for( i = 0; i <len; i++) {
136     printf("%-4d ", i);
137   }
138   printf("\n");
139 
140   printf("%5s: ", "uni");
141   for( i = 0; i <len; i++) {
142     printf("\\x%02X ", 0x00FF & (int)uch[i]);
143   }
144   printf("\n");
145 
146   printf("%5s:", "ch");
147   for( i = 0; i <len; i++) {
148     if(isgraph(0x00FF & (int)uch[i])) {
149       printf(" '%c' ", (char)uch[i]);
150     } else {
151       printf("     ");
152     }
153   }
154   printf("\n");
155 }
156 
printUChar(UChar32 ch32)157 void printUChar(UChar32 ch32)
158 {
159     if(ch32 > 0xFFFF) {
160       printf("ch: U+%06X\n", ch32);
161     }
162     else {
163       UChar ch = (UChar)ch32;
164       printUChars("C", &ch, 1);
165     }
166 }
167 
168 /*******************************************************************
169   Very simple C sample to convert the word 'Moscow' in Russian in Unicode,
170   followed by an exclamation mark (!) into the KOI8-R Russian code page.
171 
172   This example first creates a UChar String out of the Unicode chars.
173 
174   targetSize must be set to the amount of space available in the target
175   buffer. After fromUChars is called,
176   len will contain the number of bytes in target[] which were
177   used in the resulting codepage.  In this case, there is a 1:1 mapping
178   between the input and output characters. The exclamation mark has the
179   same value in both KOI8-R and Unicode.
180 
181   src: 0      1      2      3      4      5      6
182   uni: \u041C \u043E \u0441 \u043A \u0432 \u0430 \u0021
183    ch: CYRILL CYRILL CYRILL CYRILL CYRILL CYRILL   '!'
184 
185  targ:  0    1    2    3    4    5    6
186   uni: \xED \xCF \xD3 \xCB \xD7 \xC1 \x21
187    ch:                                '!'
188 
189 
190 Converting FROM unicode
191   to koi8-r.
192   You must call ucnv_close to clean up the memory used by the
193   converter.
194 
195   'len' returns the number of OUTPUT bytes resulting from the
196   conversion.
197  */
198 
convsample_02()199 UErrorCode convsample_02()
200 {
201   printf("\n\n==============================================\n"
202          "Sample 02: C: simple Unicode -> koi8-r conversion\n");
203 
204 
205   // **************************** START SAMPLE *******************
206   // "cat<cat>OK"
207   UChar source[] = { 0x041C, 0x043E, 0x0441, 0x043A, 0x0432,
208                      0x0430, 0x0021, 0x0000 };
209   char target[100];
210   UErrorCode status = U_ZERO_ERROR;
211   UConverter *conv;
212   int32_t     len;
213 
214   // set up the converter
215   conv = ucnv_open("koi8-r", &status);
216   assert(U_SUCCESS(status));
217 
218   // convert to koi8-r
219   len = ucnv_fromUChars(conv, target, 100, source, -1, &status);
220   assert(U_SUCCESS(status));
221 
222   // close the converter
223   ucnv_close(conv);
224 
225   // ***************************** END SAMPLE ********************
226 
227   // Print it out
228   printUChars("src", source);
229   printf("\n");
230   printBytes("targ", target, len);
231 
232   return U_ZERO_ERROR;
233 }
234 
235 
convsample_03()236 UErrorCode convsample_03()
237 {
238   printf("\n\n==============================================\n"
239          "Sample 03: C: print out all converters\n");
240 
241   int32_t count;
242   int32_t i;
243 
244   // **************************** START SAMPLE *******************
245   count = ucnv_countAvailable();
246   printf("Available converters: %d\n", count);
247 
248   for(i=0;i<count;i++)
249   {
250     printf("%s ", ucnv_getAvailableName(i));
251   }
252 
253   // ***************************** END SAMPLE ********************
254 
255   printf("\n");
256 
257   return U_ZERO_ERROR;
258 }
259 
260 
261 
262 #define BUFFERSIZE 17 /* make it interesting :) */
263 
264 /*
265   Converting from a codepage to Unicode in bulk..
266   What is the best way to determine the buffer size?
267 
268      The 'buffersize' is in bytes of input.
269     For a given converter, divinding this by the minimum char size
270     give you the maximum number of Unicode characters that could be
271     expected for a given number of input bytes.
272      see: ucnv_getMinCharSize()
273 
274      For example, a single byte codepage like 'Latin-3' has a
275     minimum char size of 1. (It takes at least 1 byte to represent
276     each Unicode char.) So the unicode buffer has the same number of
277     UChars as the input buffer has bytes.
278 
279      In a strictly double byte codepage such as cp1362 (Windows
280     Korean), the minimum char size is 2. So, only half as many Unicode
281     chars as bytes are needed.
282 
283      This work to calculate the buffer size is an optimization. Any
284     size of input and output buffer can be used, as long as the
285     program handles the following cases: If the input buffer is empty,
286     the source pointer will be equal to sourceLimit.  If the output
287     buffer has overflowed, U_BUFFER_OVERFLOW_ERROR will be returned.
288  */
289 
convsample_05()290 UErrorCode convsample_05()
291 {
292   printf("\n\n==============================================\n"
293          "Sample 05: C: count the number of letters in a UTF-8 document\n");
294 
295   FILE *f;
296   int32_t count;
297   char inBuf[BUFFERSIZE];
298   const char *source;
299   const char *sourceLimit;
300   UChar *uBuf;
301   UChar *target;
302   UChar *targetLimit;
303   UChar *p;
304   int32_t uBufSize = 0;
305   UConverter *conv;
306   UErrorCode status = U_ZERO_ERROR;
307   uint32_t letters=0, total=0;
308 
309   f = fopen("data01.txt", "r");
310   if(!f)
311   {
312     fprintf(stderr, "Couldn't open file 'data01.txt' (UTF-8 data file).\n");
313     return U_FILE_ACCESS_ERROR;
314   }
315 
316   // **************************** START SAMPLE *******************
317   conv = ucnv_open("utf-8", &status);
318   assert(U_SUCCESS(status));
319 
320   uBufSize = (BUFFERSIZE/ucnv_getMinCharSize(conv));
321   printf("input bytes %d / min chars %d = %d UChars\n",
322          BUFFERSIZE, ucnv_getMinCharSize(conv), uBufSize);
323   uBuf = (UChar*)malloc(uBufSize * sizeof(UChar));
324   assert(uBuf!=NULL);
325 
326   // grab another buffer's worth
327   while((!feof(f)) &&
328         ((count=fread(inBuf, 1, BUFFERSIZE , f)) > 0) )
329   {
330     // Convert bytes to unicode
331     source = inBuf;
332     sourceLimit = inBuf + count;
333 
334     do
335     {
336         target = uBuf;
337         targetLimit = uBuf + uBufSize;
338 
339         ucnv_toUnicode(conv, &target, targetLimit,
340                        &source, sourceLimit, NULL,
341                        feof(f)?TRUE:FALSE,         /* pass 'flush' when eof */
342                                    /* is true (when no more data will come) */
343                        &status);
344 
345         if(status == U_BUFFER_OVERFLOW_ERROR)
346         {
347           // simply ran out of space - we'll reset the target ptr the next
348           // time through the loop.
349           status = U_ZERO_ERROR;
350         }
351         else
352         {
353           //  Check other errors here.
354           assert(U_SUCCESS(status));
355           // Break out of the loop (by force)
356         }
357 
358         // Process the Unicode
359         // Todo: handle UTF-16/surrogates
360 
361         for(p = uBuf; p<target; p++)
362         {
363           if(u_isalpha(*p))
364             letters++;
365           total++;
366         }
367     } while (source < sourceLimit); // while simply out of space
368   }
369 
370   printf("%d letters out of %d total UChars.\n", letters, total);
371 
372   // ***************************** END SAMPLE ********************
373   ucnv_close(conv);
374 
375   printf("\n");
376 
377   fclose(f);
378 
379   return U_ZERO_ERROR;
380 }
381 #undef BUFFERSIZE
382 
383 #define BUFFERSIZE 1024
384 typedef struct
385 {
386   UChar32  codepoint;
387   uint32_t frequency;
388 } CharFreqInfo;
389 
convsample_06()390 UErrorCode convsample_06()
391 {
392   printf("\n\n==============================================\n"
393          "Sample 06: C: frequency distribution of letters in a UTF-8 document\n");
394 
395   FILE *f;
396   int32_t count;
397   char inBuf[BUFFERSIZE];
398   const char *source;
399   const char *sourceLimit;
400   UChar *uBuf;
401   int32_t uBufSize = 0;
402   UConverter *conv;
403   UErrorCode status = U_ZERO_ERROR;
404   uint32_t letters=0, total=0;
405 
406   CharFreqInfo   *info;
407   UChar32   charCount = 0x10000;  /* increase this if you want to handle non bmp.. todo: automatically bump it.. */
408   UChar32   p;
409 
410   uint32_t ie = 0;
411   uint32_t gh = 0;
412   UChar32 l = 0;
413 
414   f = fopen("data06.txt", "r");
415   if(!f)
416   {
417     fprintf(stderr, "Couldn't open file 'data06.txt' (UTF-8 data file).\n");
418     return U_FILE_ACCESS_ERROR;
419   }
420 
421   info = (CharFreqInfo*)malloc(sizeof(CharFreqInfo) * charCount);
422   if(!info)
423   {
424     fprintf(stderr, " Couldn't allocate %d bytes for freq counter\n", sizeof(CharFreqInfo)*charCount);
425   }
426 
427   /* reset frequencies */
428   for(p=0;p<charCount;p++)
429   {
430     info[p].codepoint = p;
431     info[p].frequency = 0;
432   }
433 
434   // **************************** START SAMPLE *******************
435   conv = ucnv_open("utf-8", &status);
436   assert(U_SUCCESS(status));
437 
438   uBufSize = (BUFFERSIZE/ucnv_getMinCharSize(conv));
439   printf("input bytes %d / min chars %d = %d UChars\n",
440          BUFFERSIZE, ucnv_getMinCharSize(conv), uBufSize);
441   uBuf = (UChar*)malloc(uBufSize * sizeof(UChar));
442   assert(uBuf!=NULL);
443 
444   // grab another buffer's worth
445   while((!feof(f)) &&
446         ((count=fread(inBuf, 1, BUFFERSIZE , f)) > 0) )
447   {
448     // Convert bytes to unicode
449     source = inBuf;
450     sourceLimit = inBuf + count;
451 
452     while(source < sourceLimit)
453     {
454       p = ucnv_getNextUChar(conv, &source, sourceLimit, &status);
455       if(U_FAILURE(status))
456       {
457         fprintf(stderr, "%s @ %d\n", u_errorName(status), total);
458         status = U_ZERO_ERROR;
459         continue;
460       }
461       U_ASSERT(status);
462       total++;
463 
464       if(u_isalpha(p))
465         letters++;
466 
467       if((u_tolower(l) == 'i') && (u_tolower(p) == 'e'))
468         ie++;
469 
470       if((u_tolower(l) == 'g') && (u_tolower(p) == 0x0127))
471         gh++;
472 
473       if(p>charCount)
474       {
475         fprintf(stderr, "U+%06X: oh.., we only handle BMP characters so far.. redesign!\n", p);
476         return U_UNSUPPORTED_ERROR;
477       }
478       info[p].frequency++;
479       l = p;
480     }
481   }
482 
483   fclose(f);
484   ucnv_close(conv);
485 
486   printf("%d letters out of %d total UChars.\n", letters, total);
487   printf("%d ie digraphs, %d gh digraphs.\n", ie, gh);
488 
489   // now, we could sort it..
490 
491   //  qsort(info, charCount, sizeof(info[0]), charfreq_compare);
492 
493   for(p=0;p<charCount;p++)
494   {
495     if(info[p].frequency)
496     {
497       printf("% 5d U+%06X ", info[p].frequency, p);
498       if(p <= 0xFFFF)
499       {
500         prettyPrintUChar((UChar)p);
501       }
502       printf("\n");
503     }
504   }
505   free(info);
506   // ***************************** END SAMPLE ********************
507 
508   printf("\n");
509 
510   return U_ZERO_ERROR;
511 }
512 #undef BUFFERSIZE
513 
514 
515 /******************************************************
516   You must call ucnv_close to clean up the memory used by the
517   converter.
518 
519   'len' returns the number of OUTPUT bytes resulting from the
520   conversion.
521  */
522 
convsample_12()523 UErrorCode convsample_12()
524 {
525   printf("\n\n==============================================\n"
526          "Sample 12: C: simple sjis -> unicode conversion\n");
527 
528 
529   // **************************** START SAMPLE *******************
530 
531   char source[] = { 0x63, 0x61, 0x74, (char)0x94, 0x4C, (char)0x82, 0x6E, (char)0x82, 0x6A, 0x00 };
532   UChar target[100];
533   UErrorCode status = U_ZERO_ERROR;
534   UConverter *conv;
535   int32_t     len;
536 
537   // set up the converter
538   conv = ucnv_open("shift_jis", &status);
539   assert(U_SUCCESS(status));
540 
541   // convert to Unicode
542   // Note: we can use strlen, we know it's an 8 bit null terminated codepage
543   target[6] = 0xFDCA;
544   len = ucnv_toUChars(conv, target, 100, source, strlen(source), &status);
545   U_ASSERT(status);
546   // close the converter
547   ucnv_close(conv);
548 
549   // ***************************** END SAMPLE ********************
550 
551   // Print it out
552   printBytes("src", source, strlen(source) );
553   printf("\n");
554   printUChars("targ", target, len);
555 
556   return U_ZERO_ERROR;
557 }
558 
559 /******************************************************************
560    C: Convert from codepage to Unicode one at a time.
561 */
562 
convsample_13()563 UErrorCode convsample_13()
564 {
565   printf("\n\n==============================================\n"
566          "Sample 13: C: simple Big5 -> unicode conversion, char at a time\n");
567 
568 
569   const char sourceChars[] = { 0x7a, 0x68, 0x3d, (char)0xa4, (char)0xa4, (char)0xa4, (char)0xe5, (char)0x2e };
570   //  const char sourceChars[] = { 0x7a, 0x68, 0x3d, 0xe4, 0xb8, 0xad, 0xe6, 0x96, 0x87, 0x2e };
571   const char *source, *sourceLimit;
572   UChar32 target;
573   UErrorCode status = U_ZERO_ERROR;
574   UConverter *conv = NULL;
575   int32_t srcCount=0;
576   int32_t dstCount=0;
577 
578   srcCount = sizeof(sourceChars);
579 
580   conv = ucnv_open("Big5", &status);
581   U_ASSERT(status);
582 
583   source = sourceChars;
584   sourceLimit = sourceChars + sizeof(sourceChars);
585 
586   // **************************** START SAMPLE *******************
587 
588 
589   printBytes("src",source,sourceLimit-source);
590 
591   while(source < sourceLimit)
592   {
593     puts("");
594     target = ucnv_getNextUChar (conv,
595                                 &source,
596                                 sourceLimit,
597                                 &status);
598 
599     //    printBytes("src",source,sourceLimit-source);
600     U_ASSERT(status);
601     printUChar(target);
602     dstCount++;
603   }
604 
605 
606   // ************************** END SAMPLE *************************
607 
608   printf("src=%d bytes, dst=%d uchars\n", srcCount, dstCount);
609   ucnv_close(conv);
610 
611   return U_ZERO_ERROR;
612 }
613 
614 
615 
616 
convsample_20_didSubstitute(const char * source)617 UBool convsample_20_didSubstitute(const char *source)
618 {
619   UChar uchars[100];
620   char bytes[100];
621   UConverter *conv = NULL;
622   UErrorCode status = U_ZERO_ERROR;
623   uint32_t len, len2;
624   UBool  flagVal;
625 
626   FromUFLAGContext * context = NULL;
627 
628   printf("\n\n==============================================\n"
629          "Sample 20: C: Test for substitution using callbacks\n");
630 
631   /* print out the original source */
632   printBytes("src", source);
633   printf("\n");
634 
635   /* First, convert from UTF8 to unicode */
636   conv = ucnv_open("utf-8", &status);
637   U_ASSERT(status);
638 
639   len = ucnv_toUChars(conv, uchars, 100, source, strlen(source), &status);
640   U_ASSERT(status);
641 
642   printUChars("uch", uchars, len);
643   printf("\n");
644 
645   /* Now, close the converter */
646   ucnv_close(conv);
647 
648   /* Now, convert to windows-1252 */
649   conv = ucnv_open("windows-1252", &status);
650   U_ASSERT(status);
651 
652   /* Converter starts out with the SUBSTITUTE callback set. */
653 
654   /* initialize our callback */
655   context = flagCB_fromU_openContext();
656 
657   /* Set our special callback */
658   ucnv_setFromUCallBack(conv,
659                         flagCB_fromU,
660                         context,
661                         &(context->subCallback),
662                         &(context->subContext),
663                         &status);
664 
665   U_ASSERT(status);
666 
667   len2 = ucnv_fromUChars(conv, bytes, 100, uchars, len, &status);
668   U_ASSERT(status);
669 
670   flagVal = context->flag;  /* it's about to go away when we close the cnv */
671 
672   ucnv_close(conv);
673 
674   /* print out the original source */
675   printBytes("bytes", bytes, len2);
676 
677   return flagVal; /* true if callback was called */
678 }
679 
convsample_20()680 UErrorCode convsample_20()
681 {
682   const char *sample1 = "abc\xdf\xbf";
683   const char *sample2 = "abc_def";
684 
685 
686   if(convsample_20_didSubstitute(sample1))
687   {
688     printf("DID substitute.\n******\n");
689   }
690   else
691   {
692     printf("Did NOT substitute.\n*****\n");
693   }
694 
695   if(convsample_20_didSubstitute(sample2))
696   {
697     printf("DID substitute.\n******\n");
698   }
699   else
700   {
701     printf("Did NOT substitute.\n*****\n");
702   }
703 
704   return U_ZERO_ERROR;
705 }
706 
707 // 21  - C, callback, with clone and debug
708 
709 
710 
convsample_21_didSubstitute(const char * source)711 UBool convsample_21_didSubstitute(const char *source)
712 {
713   UChar uchars[100];
714   char bytes[100];
715   UConverter *conv = NULL, *cloneCnv = NULL;
716   UErrorCode status = U_ZERO_ERROR;
717   uint32_t len, len2;
718   int32_t  cloneLen;
719   UBool  flagVal = FALSE;
720   UConverterFromUCallback junkCB;
721 
722   FromUFLAGContext *flagCtx = NULL,
723                    *cloneFlagCtx = NULL;
724 
725   debugCBContext   *debugCtx1 = NULL,
726                    *debugCtx2 = NULL,
727                    *cloneDebugCtx = NULL;
728 
729   printf("\n\n==============================================\n"
730          "Sample 21: C: Test for substitution w/ callbacks & clones \n");
731 
732   /* print out the original source */
733   printBytes("src", source);
734   printf("\n");
735 
736   /* First, convert from UTF8 to unicode */
737   conv = ucnv_open("utf-8", &status);
738   U_ASSERT(status);
739 
740   len = ucnv_toUChars(conv, uchars, 100, source, strlen(source), &status);
741   U_ASSERT(status);
742 
743   printUChars("uch", uchars, len);
744   printf("\n");
745 
746   /* Now, close the converter */
747   ucnv_close(conv);
748 
749   /* Now, convert to windows-1252 */
750   conv = ucnv_open("windows-1252", &status);
751   U_ASSERT(status);
752 
753   /* Converter starts out with the SUBSTITUTE callback set. */
754 
755   /* initialize our callback */
756   /* from the 'bottom' innermost, out
757    *   CNV ->  debugCtx1[debug]  ->  flagCtx[flag] -> debugCtx2[debug]  */
758 
759 #if DEBUG_TMI
760   printf("flagCB_fromU = %p\n", &flagCB_fromU);
761   printf("debugCB_fromU = %p\n", &debugCB_fromU);
762 #endif
763 
764   debugCtx1 = debugCB_openContext();
765    flagCtx  = flagCB_fromU_openContext();
766   debugCtx2 = debugCB_openContext();
767 
768   debugCtx1->subCallback =  flagCB_fromU;  /* debug1 -> flag */
769   debugCtx1->subContext  =  flagCtx;
770 
771   flagCtx->subCallback   =  debugCB_fromU; /*  flag -> debug2 */
772   flagCtx->subContext    =  debugCtx2;
773 
774   debugCtx2->subCallback =  UCNV_FROM_U_CALLBACK_SUBSTITUTE;
775   debugCtx2->subContext  = NULL;
776 
777   /* Set our special callback */
778 
779   ucnv_setFromUCallBack(conv,
780                         debugCB_fromU,
781                         debugCtx1,
782                         &(debugCtx2->subCallback),
783                         &(debugCtx2->subContext),
784                         &status);
785 
786   U_ASSERT(status);
787 
788 #if DEBUG_TMI
789   printf("Callback chain now: Converter %p -> debug1:%p-> (%p:%p)==flag:%p -> debug2:%p -> cb %p\n",
790          conv, debugCtx1, debugCtx1->subCallback,
791          debugCtx1->subContext, flagCtx, debugCtx2, debugCtx2->subCallback);
792 #endif
793 
794   cloneLen = 1; /* but passing in null so it will clone */
795   cloneCnv = ucnv_safeClone(conv,  NULL,  &cloneLen, &status);
796 
797   U_ASSERT(status);
798 
799 #if DEBUG_TMI
800   printf("Cloned converter from %p -> %p.  Closing %p.\n", conv, cloneCnv, conv);
801 #endif
802 
803   ucnv_close(conv);
804 
805 #if DEBUG_TMI
806   printf("%p closed.\n", conv);
807 #endif
808 
809   U_ASSERT(status);
810   /* Now, we have to extract the context */
811   cloneDebugCtx = NULL;
812   cloneFlagCtx  = NULL;
813 
814   ucnv_getFromUCallBack(cloneCnv, &junkCB, (const void **)&cloneDebugCtx);
815   if(cloneDebugCtx != NULL) {
816       cloneFlagCtx = (FromUFLAGContext*) cloneDebugCtx -> subContext;
817   }
818 
819   printf("Cloned converter chain: %p -> %p[debug1] -> %p[flag] -> %p[debug2] -> substitute\n",
820          cloneCnv, cloneDebugCtx, cloneFlagCtx, cloneFlagCtx?cloneFlagCtx->subContext:NULL );
821 
822   len2 = ucnv_fromUChars(cloneCnv, bytes, 100, uchars, len, &status);
823   U_ASSERT(status);
824 
825   if(cloneFlagCtx != NULL) {
826       flagVal = cloneFlagCtx->flag;  /* it's about to go away when we close the cnv */
827   } else {
828       printf("** Warning, couldn't get the subcallback \n");
829   }
830 
831   ucnv_close(cloneCnv);
832 
833   /* print out the original source */
834   printBytes("bytes", bytes, len2);
835 
836   return flagVal; /* true if callback was called */
837 }
838 
convsample_21()839 UErrorCode convsample_21()
840 {
841   const char *sample1 = "abc\xdf\xbf";
842   const char *sample2 = "abc_def";
843 
844   if(convsample_21_didSubstitute(sample1))
845   {
846     printf("DID substitute.\n******\n");
847   }
848   else
849   {
850     printf("Did NOT substitute.\n*****\n");
851   }
852 
853   if(convsample_21_didSubstitute(sample2))
854   {
855     printf("DID substitute.\n******\n");
856   }
857   else
858   {
859     printf("Did NOT substitute.\n*****\n");
860   }
861 
862   return U_ZERO_ERROR;
863 }
864 
865 
866 //  40-  C, cp37 -> UTF16 [data02.bin -> data40.utf16]
867 
868 #define BUFFERSIZE 17 /* make it interesting :) */
869 
convsample_40()870 UErrorCode convsample_40()
871 {
872   printf("\n\n==============================================\n"
873     "Sample 40: C: convert data02.bin from cp37 to UTF16 [data40.utf16]\n");
874 
875   FILE *f;
876   FILE *out;
877   int32_t count;
878   char inBuf[BUFFERSIZE];
879   const char *source;
880   const char *sourceLimit;
881   UChar *uBuf;
882   UChar *target;
883   UChar *targetLimit;
884   int32_t uBufSize = 0;
885   UConverter *conv = NULL;
886   UErrorCode status = U_ZERO_ERROR;
887   uint32_t inbytes=0, total=0;
888 
889   f = fopen("data02.bin", "rb");
890   if(!f)
891   {
892     fprintf(stderr, "Couldn't open file 'data02.bin' (cp37 data file).\n");
893     return U_FILE_ACCESS_ERROR;
894   }
895 
896   out = fopen("data40.utf16", "wb");
897   if(!out)
898   {
899     fprintf(stderr, "Couldn't create file 'data40.utf16'.\n");
900     fclose(f);
901     return U_FILE_ACCESS_ERROR;
902   }
903 
904   // **************************** START SAMPLE *******************
905   conv = ucnv_openCCSID(37, UCNV_IBM, &status);
906   assert(U_SUCCESS(status));
907 
908   uBufSize = (BUFFERSIZE/ucnv_getMinCharSize(conv));
909   printf("input bytes %d / min chars %d = %d UChars\n",
910          BUFFERSIZE, ucnv_getMinCharSize(conv), uBufSize);
911   uBuf = (UChar*)malloc(uBufSize * sizeof(UChar));
912   assert(uBuf!=NULL);
913 
914   // grab another buffer's worth
915   while((!feof(f)) &&
916         ((count=fread(inBuf, 1, BUFFERSIZE , f)) > 0) )
917   {
918     inbytes += count;
919 
920     // Convert bytes to unicode
921     source = inBuf;
922     sourceLimit = inBuf + count;
923 
924     do
925     {
926         target = uBuf;
927         targetLimit = uBuf + uBufSize;
928 
929         ucnv_toUnicode( conv, &target, targetLimit,
930                        &source, sourceLimit, NULL,
931                        feof(f)?TRUE:FALSE,         /* pass 'flush' when eof */
932                                    /* is true (when no more data will come) */
933                          &status);
934 
935         if(status == U_BUFFER_OVERFLOW_ERROR)
936         {
937           // simply ran out of space - we'll reset the target ptr the next
938           // time through the loop.
939           status = U_ZERO_ERROR;
940         }
941         else
942         {
943           //  Check other errors here.
944           assert(U_SUCCESS(status));
945           // Break out of the loop (by force)
946         }
947 
948         // Process the Unicode
949         // Todo: handle UTF-16/surrogates
950         assert(fwrite(uBuf, sizeof(uBuf[0]), (target-uBuf), out) ==
951                (size_t)(target-uBuf));
952         total += (target-uBuf);
953     } while (source < sourceLimit); // while simply out of space
954   }
955 
956   printf("%d bytes in,  %d UChars out.\n", inbytes, total);
957 
958   // ***************************** END SAMPLE ********************
959   ucnv_close(conv);
960 
961   fclose(f);
962   fclose(out);
963   printf("\n");
964 
965   return U_ZERO_ERROR;
966 }
967 #undef BUFFERSIZE
968 
969 
970 
971 //  46-  C, UTF16 -> latin2 [data40.utf16 -> data46.out]
972 
973 #define BUFFERSIZE 24 /* make it interesting :) */
974 
convsample_46()975 UErrorCode convsample_46()
976 {
977   printf("\n\n==============================================\n"
978     "Sample 46: C: convert data40.utf16 from UTF16 to latin2 [data46.out]\n");
979 
980   FILE *f;
981   FILE *out;
982   int32_t count;
983   UChar inBuf[BUFFERSIZE];
984   const UChar *source;
985   const UChar *sourceLimit;
986   char *buf;
987   char *target;
988   char *targetLimit;
989 
990   int32_t bufSize = 0;
991   UConverter *conv = NULL;
992   UErrorCode status = U_ZERO_ERROR;
993   uint32_t inchars=0, total=0;
994 
995   f = fopen("data40.utf16", "rb");
996   if(!f)
997   {
998     fprintf(stderr, "Couldn't open file 'data40.utf16' (did you run convsample_40() ?)\n");
999     return U_FILE_ACCESS_ERROR;
1000   }
1001 
1002   out = fopen("data46.out", "wb");
1003   if(!out)
1004   {
1005     fprintf(stderr, "Couldn't create file 'data46.out'.\n");
1006     fclose(f);
1007     return U_FILE_ACCESS_ERROR;
1008   }
1009 
1010   // **************************** START SAMPLE *******************
1011   conv = ucnv_open( "iso-8859-2", &status);
1012   assert(U_SUCCESS(status));
1013 
1014   bufSize = (BUFFERSIZE*ucnv_getMaxCharSize(conv));
1015   printf("input UChars[16] %d * max charsize %d = %d bytes output buffer\n",
1016          BUFFERSIZE, ucnv_getMaxCharSize(conv), bufSize);
1017   buf = (char*)malloc(bufSize * sizeof(char));
1018   assert(buf!=NULL);
1019 
1020   // grab another buffer's worth
1021   while((!feof(f)) &&
1022         ((count=fread(inBuf, sizeof(UChar), BUFFERSIZE , f)) > 0) )
1023   {
1024     inchars += count;
1025 
1026     // Convert bytes to unicode
1027     source = inBuf;
1028     sourceLimit = inBuf + count;
1029 
1030     do
1031     {
1032         target = buf;
1033         targetLimit = buf + bufSize;
1034 
1035         ucnv_fromUnicode( conv, &target, targetLimit,
1036                        &source, sourceLimit, NULL,
1037                        feof(f)?TRUE:FALSE,         /* pass 'flush' when eof */
1038                                    /* is true (when no more data will come) */
1039                          &status);
1040 
1041         if(status == U_BUFFER_OVERFLOW_ERROR)
1042         {
1043           // simply ran out of space - we'll reset the target ptr the next
1044           // time through the loop.
1045           status = U_ZERO_ERROR;
1046         }
1047         else
1048         {
1049           //  Check other errors here.
1050           assert(U_SUCCESS(status));
1051           // Break out of the loop (by force)
1052         }
1053 
1054         // Process the Unicode
1055         assert(fwrite(buf, sizeof(buf[0]), (target-buf), out) ==
1056                (size_t)(target-buf));
1057         total += (target-buf);
1058     } while (source < sourceLimit); // while simply out of space
1059   }
1060 
1061   printf("%d Uchars (%d bytes) in, %d chars out.\n", inchars, inchars * sizeof(UChar), total);
1062 
1063   // ***************************** END SAMPLE ********************
1064   ucnv_close(conv);
1065 
1066   fclose(f);
1067   fclose(out);
1068   printf("\n");
1069 
1070   return U_ZERO_ERROR;
1071 }
1072 #undef BUFFERSIZE
1073 
1074 #define BUFFERSIZE 219
1075 
1076 
1077 /* main */
1078 
main()1079 int main()
1080 {
1081 
1082   printf("Default Converter=%s\n", ucnv_getDefaultName() );
1083 
1084   convsample_02();  // C  , u->koi8r, conv
1085   convsample_03();  // C,   iterate
1086 
1087   convsample_05();  // C,  utf8->u, getNextUChar
1088   convsample_06(); // C freq counter thingy
1089 
1090   convsample_12();  // C,  sjis->u, conv
1091   convsample_13();  // C,  big5->u, getNextU
1092 
1093   convsample_20();  // C, callback
1094   convsample_21();  // C, callback debug
1095 
1096   convsample_40();  // C,   cp37 -> UTF16 [data02.bin -> data40.utf16]
1097 
1098   convsample_46();  // C,  UTF16 -> latin3 [data41.utf16 -> data46.out]
1099 
1100   printf("End of converter samples.\n");
1101 
1102   fflush(stdout);
1103   fflush(stderr);
1104 
1105   return 0;
1106 }
1107