• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*****************************************************************************
2 *
3 *   Copyright (C) 1999-2010, International Business Machines
4 *   Corporation and others.  All Rights Reserved.
5 *
6 ******************************************************************************/
7 
8 /*
9  * uconv(1): an iconv(1)-like converter using ICU.
10  *
11  * Original code by Jonas Utterstr&#x00F6;m <jonas.utterstrom@vittran.norrnod.se>
12  * contributed in 1999.
13  *
14  * Conversion to the C conversion API and many improvements by
15  * Yves Arrouye <yves@realnames.com>, current maintainer.
16  *
17  * Markus Scherer maintainer from 2003.
18  * See source code repository history for changes.
19  */
20 
21 #include <unicode/utypes.h>
22 #include <unicode/putil.h>
23 #include <unicode/ucnv.h>
24 #include <unicode/uenum.h>
25 #include <unicode/unistr.h>
26 #include <unicode/translit.h>
27 #include <unicode/uset.h>
28 #include <unicode/uclean.h>
29 
30 #include <stdio.h>
31 #include <errno.h>
32 #include <string.h>
33 #include <stdlib.h>
34 
35 #include "cmemory.h"
36 #include "cstring.h"
37 #include "ustrfmt.h"
38 
39 #include "unicode/uwmsg.h"
40 
41 U_NAMESPACE_USE
42 
43 #if (defined(U_WINDOWS) || defined(U_CYGWIN)) && !defined(__STRICT_ANSI__)
44 #include <io.h>
45 #include <fcntl.h>
46 #if defined(U_WINDOWS)
47 #define USE_FILENO_BINARY_MODE 1
48 /* Windows likes to rename Unix-like functions */
49 #ifndef fileno
50 #define fileno _fileno
51 #endif
52 #ifndef setmode
53 #define setmode _setmode
54 #endif
55 #ifndef O_BINARY
56 #define O_BINARY _O_BINARY
57 #endif
58 #endif
59 #endif
60 
61 #ifdef UCONVMSG_LINK
62 /* below from the README */
63 #include "unicode/utypes.h"
64 #include "unicode/udata.h"
65 U_CFUNC char uconvmsg_dat[];
66 #endif
67 
68 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
69 
70 #define DEFAULT_BUFSZ   4096
71 #define UCONVMSG "uconvmsg"
72 
73 static UResourceBundle *gBundle = 0;    /* Bundle containing messages. */
74 
75 /*
76  * Initialize the message bundle so that message strings can be fetched
77  * by u_wmsg().
78  *
79  */
80 
initMsg(const char * pname)81 static void initMsg(const char *pname) {
82     static int ps = 0;
83 
84     if (!ps) {
85         char dataPath[2048];        /* XXX Sloppy: should be PATH_MAX. */
86         UErrorCode err = U_ZERO_ERROR;
87 
88         ps = 1;
89 
90         /* Set up our static data - if any */
91 #ifdef UCONVMSG_LINK
92         udata_setAppData(UCONVMSG, (const void*) uconvmsg_dat, &err);
93         if (U_FAILURE(err)) {
94           fprintf(stderr, "%s: warning, problem installing our static resource bundle data uconvmsg: %s - trying anyways.\n",
95                   pname, u_errorName(err));
96           err = U_ZERO_ERROR; /* It may still fail */
97         }
98 #endif
99 
100         /* Get messages. */
101         gBundle = u_wmsg_setPath(UCONVMSG, &err);
102         if (U_FAILURE(err)) {
103             fprintf(stderr,
104                     "%s: warning: couldn't open bundle %s: %s\n",
105                     pname, UCONVMSG, u_errorName(err));
106 #ifdef UCONVMSG_LINK
107             fprintf(stderr,
108                     "%s: setAppData was called, internal data %s failed to load\n",
109                         pname, UCONVMSG);
110 #endif
111 
112             err = U_ZERO_ERROR;
113             /* that was try #1, try again with a path */
114             uprv_strcpy(dataPath, u_getDataDirectory());
115             uprv_strcat(dataPath, U_FILE_SEP_STRING);
116             uprv_strcat(dataPath, UCONVMSG);
117 
118             gBundle = u_wmsg_setPath(dataPath, &err);
119             if (U_FAILURE(err)) {
120                 fprintf(stderr,
121                     "%s: warning: still couldn't open bundle %s: %s\n",
122                     pname, dataPath, u_errorName(err));
123                 fprintf(stderr, "%s: warning: messages will not be displayed\n", pname);
124             }
125         }
126     }
127 }
128 
129 /* Mapping of callback names to the callbacks passed to the converter
130    API. */
131 
132 static struct callback_ent {
133     const char *name;
134     UConverterFromUCallback fromu;
135     const void *fromuctxt;
136     UConverterToUCallback tou;
137     const void *touctxt;
138 } transcode_callbacks[] = {
139     { "substitute",
140       UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0,
141       UCNV_TO_U_CALLBACK_SUBSTITUTE, 0 },
142     { "skip",
143       UCNV_FROM_U_CALLBACK_SKIP, 0,
144       UCNV_TO_U_CALLBACK_SKIP, 0 },
145     { "stop",
146       UCNV_FROM_U_CALLBACK_STOP, 0,
147       UCNV_TO_U_CALLBACK_STOP, 0 },
148     { "escape",
149       UCNV_FROM_U_CALLBACK_ESCAPE, 0,
150       UCNV_TO_U_CALLBACK_ESCAPE, 0},
151     { "escape-icu",
152       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_ICU,
153       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_ICU },
154     { "escape-java",
155       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_JAVA,
156       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_JAVA },
157     { "escape-c",
158       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C,
159       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C },
160     { "escape-xml",
161       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX,
162       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX },
163     { "escape-xml-hex",
164       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX,
165       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX },
166     { "escape-xml-dec",
167       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC,
168       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC },
169     { "escape-unicode", UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_UNICODE,
170       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_UNICODE }
171 };
172 
173 /* Return a pointer to a callback record given its name. */
174 
findCallback(const char * name)175 static const struct callback_ent *findCallback(const char *name) {
176     int i, count =
177         sizeof(transcode_callbacks) / sizeof(*transcode_callbacks);
178 
179     /* We'll do a linear search, there aren't many of them and bsearch()
180        may not be that portable. */
181 
182     for (i = 0; i < count; ++i) {
183         if (!uprv_stricmp(name, transcode_callbacks[i].name)) {
184             return &transcode_callbacks[i];
185         }
186     }
187 
188     return 0;
189 }
190 
191 /* Print converter information. If lookfor is set, only that converter will
192    be printed, otherwise all converters will be printed. If canon is non
193    zero, tags and aliases for each converter are printed too, in the format
194    expected for convrters.txt(5). */
195 
printConverters(const char * pname,const char * lookfor,UBool canon)196 static int printConverters(const char *pname, const char *lookfor,
197     UBool canon)
198 {
199     UErrorCode err = U_ZERO_ERROR;
200     int32_t num;
201     uint16_t num_stds;
202     const char **stds;
203 
204     /* If there is a specified name, just handle that now. */
205 
206     if (lookfor) {
207         if (!canon) {
208             printf("%s\n", lookfor);
209             return 0;
210         } else {
211         /*  Because we are printing a canonical name, we need the
212             true converter name. We've done that already except for
213             the default name (because we want to print the exact
214             name one would get when calling ucnv_getDefaultName()
215             in non-canon mode). But since we do not know at this
216             point if we have the default name or something else, we
217             need to normalize again to the canonical converter
218             name. */
219 
220             const char *truename = ucnv_getAlias(lookfor, 0, &err);
221             if (U_SUCCESS(err)) {
222                 lookfor = truename;
223             } else {
224                 err = U_ZERO_ERROR;
225             }
226         }
227     }
228 
229     /* Print converter names. We come here for one of two reasons: we
230        are printing all the names (lookfor was null), or we have a
231        single converter to print but in canon mode, hence we need to
232        get to it in order to print everything. */
233 
234     num = ucnv_countAvailable();
235     if (num <= 0) {
236         initMsg(pname);
237         u_wmsg(stderr, "cantGetNames");
238         return -1;
239     }
240     if (lookfor) {
241         num = 1;                /* We know where we want to be. */
242     }
243 
244     num_stds = ucnv_countStandards();
245     stds = (const char **) uprv_malloc(num_stds * sizeof(*stds));
246     if (!stds) {
247         u_wmsg(stderr, "cantGetTag", u_wmsg_errorName(U_MEMORY_ALLOCATION_ERROR));
248         return -1;
249     } else {
250         uint16_t s;
251 
252         if (canon) {
253             printf("{ ");
254         }
255         for (s = 0; s < num_stds; ++s) {
256             stds[s] = ucnv_getStandard(s, &err);
257             if (canon) {
258                 printf("%s ", stds[s]);
259             }
260             if (U_FAILURE(err)) {
261                 u_wmsg(stderr, "cantGetTag", u_wmsg_errorName(err));
262                 goto error_cleanup;
263             }
264         }
265         if (canon) {
266             puts("}");
267         }
268     }
269 
270     for (int32_t i = 0; i < num; i++) {
271         const char *name;
272         uint16_t num_aliases;
273 
274         /* Set the name either to what we are looking for, or
275         to the current converter name. */
276 
277         if (lookfor) {
278             name = lookfor;
279         } else {
280             name = ucnv_getAvailableName(i);
281         }
282 
283         /* Get all the aliases associated to the name. */
284 
285         err = U_ZERO_ERROR;
286         num_aliases = ucnv_countAliases(name, &err);
287         if (U_FAILURE(err)) {
288             printf("%s", name);
289 
290             UnicodeString str(name, "");
291             putchar('\t');
292             u_wmsg(stderr, "cantGetAliases", str.getTerminatedBuffer(),
293                 u_wmsg_errorName(err));
294             goto error_cleanup;
295         } else {
296             uint16_t a, s, t;
297 
298             /* Write all the aliases and their tags. */
299 
300             for (a = 0; a < num_aliases; ++a) {
301                 const char *alias = ucnv_getAlias(name, a, &err);
302 
303                 if (U_FAILURE(err)) {
304                     UnicodeString str(name, "");
305                     putchar('\t');
306                     u_wmsg(stderr, "cantGetAliases", str.getTerminatedBuffer(),
307                         u_wmsg_errorName(err));
308                     goto error_cleanup;
309                 }
310 
311                 /* Print the current alias so that it looks right. */
312                 printf("%s%s%s", (canon ? (a == 0? "" : "\t" ) : "") ,
313                                  alias,
314                                  (canon ? "" : " "));
315 
316                 /* Look (slowly, linear searching) for a tag. */
317 
318                 if (canon) {
319                     /* -1 to skip the last standard */
320                     for (s = t = 0; s < num_stds-1; ++s) {
321                         UEnumeration *nameEnum = ucnv_openStandardNames(name, stds[s], &err);
322                         if (U_SUCCESS(err)) {
323                             /* List the standard tags */
324                             const char *standardName;
325                             UBool isFirst = TRUE;
326                             UErrorCode enumError = U_ZERO_ERROR;
327                             while ((standardName = uenum_next(nameEnum, NULL, &enumError))) {
328                                 /* See if this alias is supported by this standard. */
329                                 if (!strcmp(standardName, alias)) {
330                                     if (!t) {
331                                         printf(" {");
332                                         t = 1;
333                                     }
334                                     /* Print a * after the default standard name */
335                                     printf(" %s%s", stds[s], (isFirst ? "*" : ""));
336                                 }
337                                 isFirst = FALSE;
338                             }
339                         }
340                     }
341                     if (t) {
342                         printf(" }");
343                     }
344                 }
345                 /* Terminate this entry. */
346                 if (canon) {
347                     puts("");
348                 }
349 
350                 /* Move on. */
351             }
352             /* Terminate this entry. */
353             if (!canon) {
354                 puts("");
355             }
356         }
357     }
358 
359     /* Free temporary data. */
360 
361     uprv_free(stds);
362 
363     /* Success. */
364 
365     return 0;
366 error_cleanup:
367     uprv_free(stds);
368     return -1;
369 }
370 
371 /* Print all available transliterators. If canon is non zero, print
372    one transliterator per line. */
373 
printTransliterators(UBool canon)374 static int printTransliterators(UBool canon)
375 {
376 #if UCONFIG_NO_TRANSLITERATION
377     printf("no transliterators available because of UCONFIG_NO_TRANSLITERATION, see uconfig.h\n");
378     return 1;
379 #else
380     UErrorCode status = U_ZERO_ERROR;
381     UEnumeration *ids = utrans_openIDs(&status);
382     int32_t i, numtrans = uenum_count(ids, &status);
383 
384     char sepchar = canon ? '\n' : ' ';
385 
386     for (i = 0; U_SUCCESS(status)&& (i < numtrans); ++i) {
387     	int32_t len;
388     	const char *nextTrans = uenum_next(ids, &len, &status);
389 
390         printf("%s", nextTrans);
391         if (i < numtrans - 1) {
392             putchar(sepchar);
393         }
394     }
395 
396     uenum_close(ids);
397 
398     /* Add a terminating newline if needed. */
399 
400     if (sepchar != '\n') {
401         putchar('\n');
402     }
403 
404     /* Success. */
405 
406     return 0;
407 #endif
408 }
409 
410 enum {
411     uSP = 0x20,         // space
412     uCR = 0xd,          // carriage return
413     uLF = 0xa,          // line feed
414     uNL = 0x85,         // newline
415     uLS = 0x2028,       // line separator
416     uPS = 0x2029,       // paragraph separator
417     uSig = 0xfeff       // signature/BOM character
418 };
419 
420 static inline int32_t
getChunkLimit(const UnicodeString & prev,const UnicodeString & s)421 getChunkLimit(const UnicodeString &prev, const UnicodeString &s) {
422     // find one of
423     // CR, LF, CRLF, NL, LS, PS
424     // for paragraph ends (see UAX #13/Unicode 4)
425     // and include it in the chunk
426     // all of these characters are on the BMP
427     // do not include FF or VT in case they are part of a paragraph
428     // (important for bidi contexts)
429     static const UChar paraEnds[] = {
430         0xd, 0xa, 0x85, 0x2028, 0x2029
431     };
432     enum {
433         iCR, iLF, iNL, iLS, iPS, iCount
434     };
435 
436     // first, see if there is a CRLF split between prev and s
437     if (prev.endsWith(paraEnds + iCR, 1)) {
438         if (s.startsWith(paraEnds + iLF, 1)) {
439             return 1; // split CRLF, include the LF
440         } else if (!s.isEmpty()) {
441             return 0; // complete the last chunk
442         } else {
443             return -1; // wait for actual further contents to arrive
444         }
445     }
446 
447     const UChar *u = s.getBuffer(), *limit = u + s.length();
448     UChar c;
449 
450     while (u < limit) {
451         c = *u++;
452         if (
453             ((c < uSP) && (c == uCR || c == uLF)) ||
454             (c == uNL) ||
455             ((c & uLS) == uLS)
456         ) {
457             if (c == uCR) {
458                 // check for CRLF
459                 if (u == limit) {
460                     return -1; // LF may be in the next chunk
461                 } else if (*u == uLF) {
462                     ++u; // include the LF in this chunk
463                 }
464             }
465             return (int32_t)(u - s.getBuffer());
466         }
467     }
468 
469     return -1; // continue collecting the chunk
470 }
471 
472 enum {
473     CNV_NO_FEFF,    // cannot convert the U+FEFF Unicode signature character (BOM)
474     CNV_WITH_FEFF,  // can convert the U+FEFF signature character
475     CNV_ADDS_FEFF   // automatically adds/detects the U+FEFF signature character
476 };
477 
478 static inline UChar
nibbleToHex(uint8_t n)479 nibbleToHex(uint8_t n) {
480     n &= 0xf;
481     return
482         n <= 9 ?
483             (UChar)(0x30 + n) :
484             (UChar)((0x61 - 10) + n);
485 }
486 
487 // check the converter's Unicode signature properties;
488 // the fromUnicode side of the converter must be in its initial state
489 // and will be reset again if it was used
490 static int32_t
cnvSigType(UConverter * cnv)491 cnvSigType(UConverter *cnv) {
492     UErrorCode err;
493     int32_t result;
494 
495     // test if the output charset can convert U+FEFF
496     USet *set = uset_open(1, 0);
497     err = U_ZERO_ERROR;
498     ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &err);
499     if (U_SUCCESS(err) && uset_contains(set, uSig)) {
500         result = CNV_WITH_FEFF;
501     } else {
502         result = CNV_NO_FEFF; // an error occurred or U+FEFF cannot be converted
503     }
504     uset_close(set);
505 
506     if (result == CNV_WITH_FEFF) {
507         // test if the output charset emits a signature anyway
508         const UChar a[1] = { 0x61 }; // "a"
509         const UChar *in;
510 
511         char buffer[20];
512         char *out;
513 
514         in = a;
515         out = buffer;
516         err = U_ZERO_ERROR;
517         ucnv_fromUnicode(cnv,
518             &out, buffer + sizeof(buffer),
519             &in, a + 1,
520             NULL, TRUE, &err);
521         ucnv_resetFromUnicode(cnv);
522 
523         if (NULL != ucnv_detectUnicodeSignature(buffer, (int32_t)(out - buffer), NULL, &err) &&
524             U_SUCCESS(err)
525         ) {
526             result = CNV_ADDS_FEFF;
527         }
528     }
529 
530     return result;
531 }
532 
533 class ConvertFile {
534 public:
ConvertFile()535     ConvertFile() :
536         buf(NULL), outbuf(NULL), fromoffsets(NULL),
537         bufsz(0), signature(0) {}
538 
539     void
setBufferSize(size_t bufferSize)540     setBufferSize(size_t bufferSize) {
541         bufsz = bufferSize;
542 
543         buf = new char[2 * bufsz];
544         outbuf = buf + bufsz;
545 
546         // +1 for an added U+FEFF in the intermediate Unicode buffer
547         fromoffsets = new int32_t[bufsz + 1];
548     }
549 
~ConvertFile()550     ~ConvertFile() {
551         delete [] buf;
552         delete [] fromoffsets;
553     }
554 
555     UBool convertFile(const char *pname,
556                       const char *fromcpage,
557                       UConverterToUCallback toucallback,
558                       const void *touctxt,
559                       const char *tocpage,
560                       UConverterFromUCallback fromucallback,
561                       const void *fromuctxt,
562                       UBool fallback,
563                       const char *translit,
564                       const char *infilestr,
565                       FILE * outfile, int verbose);
566 private:
567     friend int main(int argc, char **argv);
568 
569     char *buf, *outbuf;
570     int32_t *fromoffsets;
571 
572     size_t bufsz;
573     int8_t signature; // add (1) or remove (-1) a U+FEFF Unicode signature character
574 };
575 
576 // Convert a file from one encoding to another
577 UBool
convertFile(const char * pname,const char * fromcpage,UConverterToUCallback toucallback,const void * touctxt,const char * tocpage,UConverterFromUCallback fromucallback,const void * fromuctxt,UBool fallback,const char * translit,const char * infilestr,FILE * outfile,int verbose)578 ConvertFile::convertFile(const char *pname,
579                          const char *fromcpage,
580                          UConverterToUCallback toucallback,
581                          const void *touctxt,
582                          const char *tocpage,
583                          UConverterFromUCallback fromucallback,
584                          const void *fromuctxt,
585                          UBool fallback,
586                          const char *translit,
587                          const char *infilestr,
588                          FILE * outfile, int verbose)
589 {
590     FILE *infile;
591     UBool ret = TRUE;
592     UConverter *convfrom = 0;
593     UConverter *convto = 0;
594     UErrorCode err = U_ZERO_ERROR;
595     UBool flush;
596     const char *cbufp, *prevbufp;
597     char *bufp;
598 
599     uint32_t infoffset = 0, outfoffset = 0;   /* Where we are in the file, for error reporting. */
600 
601     const UChar *unibuf, *unibufbp;
602     UChar *unibufp;
603 
604     size_t rd, wr;
605 
606 #if !UCONFIG_NO_TRANSLITERATION
607     Transliterator *t = 0;      // Transliterator acting on Unicode data.
608     UnicodeString chunk;        // One chunk of the text being collected for transformation.
609 #endif
610     UnicodeString u;            // String to do the transliteration.
611     int32_t ulen;
612 
613     // use conversion offsets for error messages
614     // unless a transliterator is used -
615     // a text transformation will reorder characters in unpredictable ways
616     UBool useOffsets = TRUE;
617 
618     // Open the correct input file or connect to stdin for reading input
619 
620     if (infilestr != 0 && strcmp(infilestr, "-")) {
621         infile = fopen(infilestr, "rb");
622         if (infile == 0) {
623             UnicodeString str1(infilestr, "");
624             str1.append((UChar32) 0);
625             UnicodeString str2(strerror(errno), "");
626             str2.append((UChar32) 0);
627             initMsg(pname);
628             u_wmsg(stderr, "cantOpenInputF", str1.getBuffer(), str2.getBuffer());
629             return FALSE;
630         }
631     } else {
632         infilestr = "-";
633         infile = stdin;
634 #ifdef USE_FILENO_BINARY_MODE
635         if (setmode(fileno(stdin), O_BINARY) == -1) {
636             initMsg(pname);
637             u_wmsg(stderr, "cantSetInBinMode");
638             return FALSE;
639         }
640 #endif
641     }
642 
643     if (verbose) {
644         fprintf(stderr, "%s:\n", infilestr);
645     }
646 
647 #if !UCONFIG_NO_TRANSLITERATION
648     // Create transliterator as needed.
649 
650     if (translit != NULL && *translit) {
651         UParseError parse;
652         UnicodeString str(translit), pestr;
653 
654         /* Create from rules or by ID as needed. */
655 
656         parse.line = -1;
657 
658         if (uprv_strchr(translit, ':') || uprv_strchr(translit, '>') || uprv_strchr(translit, '<') || uprv_strchr(translit, '>')) {
659             t = Transliterator::createFromRules("Uconv", str, UTRANS_FORWARD, parse, err);
660         } else {
661             t = Transliterator::createInstance(translit, UTRANS_FORWARD, err);
662         }
663 
664         if (U_FAILURE(err)) {
665             str.append((UChar32) 0);
666             initMsg(pname);
667 
668             if (parse.line >= 0) {
669                 UChar linebuf[20], offsetbuf[20];
670                 uprv_itou(linebuf, 20, parse.line, 10, 0);
671                 uprv_itou(offsetbuf, 20, parse.offset, 10, 0);
672                 u_wmsg(stderr, "cantCreateTranslitParseErr", str.getTerminatedBuffer(),
673                     u_wmsg_errorName(err), linebuf, offsetbuf);
674             } else {
675                 u_wmsg(stderr, "cantCreateTranslit", str.getTerminatedBuffer(),
676                     u_wmsg_errorName(err));
677             }
678 
679             if (t) {
680                 delete t;
681                 t = 0;
682             }
683             goto error_exit;
684         }
685 
686         useOffsets = FALSE;
687     }
688 #endif
689 
690     // Create codepage converter. If the codepage or its aliases weren't
691     // available, it returns NULL and a failure code. We also set the
692     // callbacks, and return errors in the same way.
693 
694     convfrom = ucnv_open(fromcpage, &err);
695     if (U_FAILURE(err)) {
696         UnicodeString str(fromcpage, "");
697         initMsg(pname);
698         u_wmsg(stderr, "cantOpenFromCodeset", str.getTerminatedBuffer(),
699             u_wmsg_errorName(err));
700         goto error_exit;
701     }
702     ucnv_setToUCallBack(convfrom, toucallback, touctxt, 0, 0, &err);
703     if (U_FAILURE(err)) {
704         initMsg(pname);
705         u_wmsg(stderr, "cantSetCallback", u_wmsg_errorName(err));
706         goto error_exit;
707     }
708 
709     convto = ucnv_open(tocpage, &err);
710     if (U_FAILURE(err)) {
711         UnicodeString str(tocpage, "");
712         initMsg(pname);
713         u_wmsg(stderr, "cantOpenToCodeset", str.getTerminatedBuffer(),
714             u_wmsg_errorName(err));
715         goto error_exit;
716     }
717     ucnv_setFromUCallBack(convto, fromucallback, fromuctxt, 0, 0, &err);
718     if (U_FAILURE(err)) {
719         initMsg(pname);
720         u_wmsg(stderr, "cantSetCallback", u_wmsg_errorName(err));
721         goto error_exit;
722     }
723     ucnv_setFallback(convto, fallback);
724 
725     UBool willexit, fromSawEndOfBytes, toSawEndOfUnicode;
726     int8_t sig;
727 
728     // OK, we can convert now.
729     sig = signature;
730     rd = 0;
731 
732     do {
733         willexit = FALSE;
734 
735         // input file offset at the beginning of the next buffer
736         infoffset += rd;
737 
738         rd = fread(buf, 1, bufsz, infile);
739         if (ferror(infile) != 0) {
740             UnicodeString str(strerror(errno));
741             initMsg(pname);
742             u_wmsg(stderr, "cantRead", str.getTerminatedBuffer());
743             goto error_exit;
744         }
745 
746         // Convert the read buffer into the new encoding via Unicode.
747         // After the call 'unibufp' will be placed behind the last
748         // character that was converted in the 'unibuf'.
749         // Also the 'cbufp' is positioned behind the last converted
750         // character.
751         // At the last conversion in the file, flush should be set to
752         // true so that we get all characters converted.
753         //
754         // The converter must be flushed at the end of conversion so
755         // that characters on hold also will be written.
756 
757         cbufp = buf;
758         flush = (UBool)(rd != bufsz);
759 
760         // convert until the input is consumed
761         do {
762             // remember the start of the current byte-to-Unicode conversion
763             prevbufp = cbufp;
764 
765             unibuf = unibufp = u.getBuffer((int32_t)bufsz);
766 
767             // Use bufsz instead of u.getCapacity() for the targetLimit
768             // so that we don't overflow fromoffsets[].
769             ucnv_toUnicode(convfrom, &unibufp, unibuf + bufsz, &cbufp,
770                 buf + rd, useOffsets ? fromoffsets : NULL, flush, &err);
771 
772             ulen = (int32_t)(unibufp - unibuf);
773             u.releaseBuffer(U_SUCCESS(err) ? ulen : 0);
774 
775             // fromSawEndOfBytes indicates that ucnv_toUnicode() is done
776             // converting all of the input bytes.
777             // It works like this because ucnv_toUnicode() returns only under the
778             // following conditions:
779             // - an error occurred during conversion (an error code is set)
780             // - the target buffer is filled (the error code indicates an overflow)
781             // - the source is consumed
782             // That is, if the error code does not indicate a failure,
783             // not even an overflow, then the source must be consumed entirely.
784             fromSawEndOfBytes = (UBool)U_SUCCESS(err);
785 
786             if (err == U_BUFFER_OVERFLOW_ERROR) {
787                 err = U_ZERO_ERROR;
788             } else if (U_FAILURE(err)) {
789                 char pos[32], errorBytes[32];
790                 int8_t i, length, errorLength;
791 
792                 UErrorCode localError = U_ZERO_ERROR;
793                 errorLength = (int8_t)sizeof(errorBytes);
794                 ucnv_getInvalidChars(convfrom, errorBytes, &errorLength, &localError);
795                 if (U_FAILURE(localError) || errorLength == 0) {
796                     errorLength = 1;
797                 }
798 
799                 // print the input file offset of the start of the error bytes:
800                 // input file offset of the current byte buffer +
801                 // length of the just consumed bytes -
802                 // length of the error bytes
803                 length =
804                     (int8_t)sprintf(pos, "%d",
805                         (int)(infoffset + (cbufp - buf) - errorLength));
806 
807                 // output the bytes that caused the error
808                 UnicodeString str;
809                 for (i = 0; i < errorLength; ++i) {
810                     if (i > 0) {
811                         str.append((UChar)uSP);
812                     }
813                     str.append(nibbleToHex((uint8_t)errorBytes[i] >> 4));
814                     str.append(nibbleToHex((uint8_t)errorBytes[i]));
815                 }
816 
817                 initMsg(pname);
818                 u_wmsg(stderr, "problemCvtToU",
819                         UnicodeString(pos, length, "").getTerminatedBuffer(),
820                         str.getTerminatedBuffer(),
821                         u_wmsg_errorName(err));
822 
823                 willexit = TRUE;
824                 err = U_ZERO_ERROR; /* reset the error for the rest of the conversion. */
825             }
826 
827             // Replaced a check for whether the input was consumed by
828             // looping until it is; message key "premEndInput" now obsolete.
829 
830             if (ulen == 0) {
831                 continue;
832             }
833 
834             // remove a U+FEFF Unicode signature character if requested
835             if (sig < 0) {
836                 if (u.charAt(0) == uSig) {
837                     u.remove(0, 1);
838 
839                     // account for the removed UChar and offset
840                     --ulen;
841 
842                     if (useOffsets) {
843                         // remove an offset from fromoffsets[] as well
844                         // to keep the array parallel with the UChars
845                         memmove(fromoffsets, fromoffsets + 1, ulen * 4);
846                     }
847 
848                 }
849                 sig = 0;
850             }
851 
852 #if !UCONFIG_NO_TRANSLITERATION
853             // Transliterate/transform if needed.
854 
855             // For transformation, we use chunking code -
856             // collect Unicode input until, for example, an end-of-line,
857             // then transform and output-convert that and continue collecting.
858             // This makes the transformation result independent of the buffer size
859             // while avoiding the slower keyboard mode.
860             // The end-of-chunk characters are completely included in the
861             // transformed string in case they are to be transformed themselves.
862             if (t != NULL) {
863                 UnicodeString out;
864                 int32_t chunkLimit;
865 
866                 do {
867                     chunkLimit = getChunkLimit(chunk, u);
868                     if (chunkLimit < 0 && flush && fromSawEndOfBytes) {
869                         // use all of the rest at the end of the text
870                         chunkLimit = u.length();
871                     }
872                     if (chunkLimit >= 0) {
873                         // complete the chunk and transform it
874                         chunk.append(u, 0, chunkLimit);
875                         u.remove(0, chunkLimit);
876                         t->transliterate(chunk);
877 
878                         // append the transformation result to the result and empty the chunk
879                         out.append(chunk);
880                         chunk.remove();
881                     } else {
882                         // continue collecting the chunk
883                         chunk.append(u);
884                         break;
885                     }
886                 } while (!u.isEmpty());
887 
888                 u = out;
889                 ulen = u.length();
890             }
891 #endif
892 
893             // add a U+FEFF Unicode signature character if requested
894             // and possible/necessary
895             if (sig > 0) {
896                 if (u.charAt(0) != uSig && cnvSigType(convto) == CNV_WITH_FEFF) {
897                     u.insert(0, (UChar)uSig);
898 
899                     if (useOffsets) {
900                         // insert a pseudo-offset into fromoffsets[] as well
901                         // to keep the array parallel with the UChars
902                         memmove(fromoffsets + 1, fromoffsets, ulen * 4);
903                         fromoffsets[0] = -1;
904                     }
905 
906                     // account for the additional UChar and offset
907                     ++ulen;
908                 }
909                 sig = 0;
910             }
911 
912             // Convert the Unicode buffer into the destination codepage
913             // Again 'bufp' will be placed behind the last converted character
914             // And 'unibufp' will be placed behind the last converted unicode character
915             // At the last conversion flush should be set to true to ensure that
916             // all characters left get converted
917 
918             unibuf = unibufbp = u.getBuffer();
919 
920             do {
921                 bufp = outbuf;
922 
923                 // Use fromSawEndOfBytes in addition to the flush flag -
924                 // it indicates whether the intermediate Unicode string
925                 // contains the very last UChars for the very last input bytes.
926                 ucnv_fromUnicode(convto, &bufp, outbuf + bufsz,
927                                  &unibufbp,
928                                  unibuf + ulen,
929                                  NULL, (UBool)(flush && fromSawEndOfBytes), &err);
930 
931                 // toSawEndOfUnicode indicates that ucnv_fromUnicode() is done
932                 // converting all of the intermediate UChars.
933                 // See comment for fromSawEndOfBytes.
934                 toSawEndOfUnicode = (UBool)U_SUCCESS(err);
935 
936                 if (err == U_BUFFER_OVERFLOW_ERROR) {
937                     err = U_ZERO_ERROR;
938                 } else if (U_FAILURE(err)) {
939                     UChar errorUChars[4];
940                     const char *errtag;
941                     char pos[32];
942                     UChar32 c;
943                     int8_t i, length, errorLength;
944 
945                     UErrorCode localError = U_ZERO_ERROR;
946                     errorLength = (int8_t)LENGTHOF(errorUChars);
947                     ucnv_getInvalidUChars(convto, errorUChars, &errorLength, &localError);
948                     if (U_FAILURE(localError) || errorLength == 0) {
949                         // need at least 1 so that we don't access beyond the length of fromoffsets[]
950                         errorLength = 1;
951                     }
952 
953                     int32_t ferroffset;
954 
955                     if (useOffsets) {
956                         // Unicode buffer offset of the start of the error UChars
957                         ferroffset = (int32_t)((unibufbp - unibuf) - errorLength);
958                         if (ferroffset < 0) {
959                             // approximation - the character started in the previous Unicode buffer
960                             ferroffset = 0;
961                         }
962 
963                         // get the corresponding byte offset out of fromoffsets[]
964                         // go back if the offset is not known for some of the UChars
965                         int32_t fromoffset;
966                         do {
967                             fromoffset = fromoffsets[ferroffset];
968                         } while (fromoffset < 0 && --ferroffset >= 0);
969 
970                         // total input file offset =
971                         // input file offset of the current byte buffer +
972                         // byte buffer offset of where the current Unicode buffer is converted from +
973                         // fromoffsets[Unicode offset]
974                         ferroffset = infoffset + (prevbufp - buf) + fromoffset;
975                         errtag = "problemCvtFromU";
976                     } else {
977                         // Do not use fromoffsets if (t != NULL) because the Unicode text may
978                         // be different from what the offsets refer to.
979 
980                         // output file offset
981                         ferroffset = (int32_t)(outfoffset + (bufp - outbuf));
982                         errtag = "problemCvtFromUOut";
983                     }
984 
985                     length = (int8_t)sprintf(pos, "%u", (int)ferroffset);
986 
987                     // output the code points that caused the error
988                     UnicodeString str;
989                     for (i = 0; i < errorLength;) {
990                         if (i > 0) {
991                             str.append((UChar)uSP);
992                         }
993                         U16_NEXT(errorUChars, i, errorLength, c);
994                         if (c >= 0x100000) {
995                             str.append(nibbleToHex((uint8_t)(c >> 20)));
996                         }
997                         if (c >= 0x10000) {
998                             str.append(nibbleToHex((uint8_t)(c >> 16)));
999                         }
1000                         str.append(nibbleToHex((uint8_t)(c >> 12)));
1001                         str.append(nibbleToHex((uint8_t)(c >> 8)));
1002                         str.append(nibbleToHex((uint8_t)(c >> 4)));
1003                         str.append(nibbleToHex((uint8_t)c));
1004                     }
1005 
1006                     initMsg(pname);
1007                     u_wmsg(stderr, errtag,
1008                             UnicodeString(pos, length, "").getTerminatedBuffer(),
1009                             str.getTerminatedBuffer(),
1010                            u_wmsg_errorName(err));
1011                     u_wmsg(stderr, "errorUnicode", str.getTerminatedBuffer());
1012 
1013                     willexit = TRUE;
1014                     err = U_ZERO_ERROR; /* reset the error for the rest of the conversion. */
1015                 }
1016 
1017                 // Replaced a check for whether the intermediate Unicode characters were all consumed by
1018                 // looping until they are; message key "premEnd" now obsolete.
1019 
1020                 // Finally, write the converted buffer to the output file
1021                 size_t outlen = (size_t) (bufp - outbuf);
1022                 outfoffset += (int32_t)(wr = fwrite(outbuf, 1, outlen, outfile));
1023                 if (wr != outlen) {
1024                     UnicodeString str(strerror(errno));
1025                     initMsg(pname);
1026                     u_wmsg(stderr, "cantWrite", str.getTerminatedBuffer());
1027                     willexit = TRUE;
1028                 }
1029 
1030                 if (willexit) {
1031                     goto error_exit;
1032                 }
1033             } while (!toSawEndOfUnicode);
1034         } while (!fromSawEndOfBytes);
1035     } while (!flush);           // Stop when we have flushed the
1036                                 // converters (this means that it's
1037                                 // the end of output)
1038 
1039     goto normal_exit;
1040 
1041 error_exit:
1042     ret = FALSE;
1043 
1044 normal_exit:
1045     // Cleanup.
1046 
1047     ucnv_close(convfrom);
1048     ucnv_close(convto);
1049 
1050 #if !UCONFIG_NO_TRANSLITERATION
1051     delete t;
1052 #endif
1053 
1054     if (infile != stdin) {
1055         fclose(infile);
1056     }
1057 
1058     return ret;
1059 }
1060 
usage(const char * pname,int ecode)1061 static void usage(const char *pname, int ecode) {
1062     const UChar *msg;
1063     int32_t msgLen;
1064     UErrorCode err = U_ZERO_ERROR;
1065     FILE *fp = ecode ? stderr : stdout;
1066     int res;
1067 
1068     initMsg(pname);
1069     msg =
1070         ures_getStringByKey(gBundle, ecode ? "lcUsageWord" : "ucUsageWord",
1071                             &msgLen, &err);
1072     UnicodeString upname(pname, (int32_t)(uprv_strlen(pname) + 1));
1073     UnicodeString mname(msg, msgLen + 1);
1074 
1075     res = u_wmsg(fp, "usage", mname.getBuffer(), upname.getBuffer());
1076     if (!ecode) {
1077         if (!res) {
1078             fputc('\n', fp);
1079         }
1080         if (!u_wmsg(fp, "help")) {
1081             /* Now dump callbacks and finish. */
1082 
1083             int i, count =
1084                 sizeof(transcode_callbacks) / sizeof(*transcode_callbacks);
1085             for (i = 0; i < count; ++i) {
1086                 fprintf(fp, " %s", transcode_callbacks[i].name);
1087             }
1088             fputc('\n', fp);
1089         }
1090     }
1091 
1092     exit(ecode);
1093 }
1094 
1095 extern int
main(int argc,char ** argv)1096 main(int argc, char **argv)
1097 {
1098     FILE *outfile;
1099     int ret = 0;
1100 
1101     size_t bufsz = DEFAULT_BUFSZ;
1102 
1103     const char *fromcpage = 0;
1104     const char *tocpage = 0;
1105     const char *translit = 0;
1106     const char *outfilestr = 0;
1107     UBool fallback = FALSE;
1108 
1109     UConverterFromUCallback fromucallback = UCNV_FROM_U_CALLBACK_STOP;
1110     const void *fromuctxt = 0;
1111     UConverterToUCallback toucallback = UCNV_TO_U_CALLBACK_STOP;
1112     const void *touctxt = 0;
1113 
1114     char **iter, **remainArgv, **remainArgvLimit;
1115     char **end = argv + argc;
1116 
1117     const char *pname;
1118 
1119     UBool printConvs = FALSE, printCanon = FALSE, printTranslits = FALSE;
1120     const char *printName = 0;
1121 
1122     UBool verbose = FALSE;
1123     UErrorCode status = U_ZERO_ERROR;
1124 
1125     ConvertFile cf;
1126 
1127     /* Initialize ICU */
1128     u_init(&status);
1129     if (U_FAILURE(status)) {
1130         fprintf(stderr, "%s: can not initialize ICU.  status = %s\n",
1131             argv[0], u_errorName(status));
1132         exit(1);
1133     }
1134 
1135     // Get and prettify pname.
1136     pname = uprv_strrchr(*argv, U_FILE_SEP_CHAR);
1137 #ifdef U_WINDOWS
1138     if (!pname) {
1139         pname = uprv_strrchr(*argv, '/');
1140     }
1141 #endif
1142     if (!pname) {
1143         pname = *argv;
1144     } else {
1145         ++pname;
1146     }
1147 
1148     // First, get the arguments from command-line
1149     // to know the codepages to convert between
1150 
1151     remainArgv = remainArgvLimit = argv + 1;
1152     for (iter = argv + 1; iter != end; iter++) {
1153         // Check for from charset
1154         if (strcmp("-f", *iter) == 0 || !strcmp("--from-code", *iter)) {
1155             iter++;
1156             if (iter != end)
1157                 fromcpage = *iter;
1158             else
1159                 usage(pname, 1);
1160         } else if (strcmp("-t", *iter) == 0 || !strcmp("--to-code", *iter)) {
1161             iter++;
1162             if (iter != end)
1163                 tocpage = *iter;
1164             else
1165                 usage(pname, 1);
1166         } else if (strcmp("-x", *iter) == 0) {
1167             iter++;
1168             if (iter != end)
1169                 translit = *iter;
1170             else
1171                 usage(pname, 1);
1172         } else if (!strcmp("--fallback", *iter)) {
1173             fallback = TRUE;
1174         } else if (!strcmp("--no-fallback", *iter)) {
1175             fallback = FALSE;
1176         } else if (strcmp("-b", *iter) == 0 || !strcmp("--block-size", *iter)) {
1177             iter++;
1178             if (iter != end) {
1179                 bufsz = atoi(*iter);
1180                 if ((int) bufsz <= 0) {
1181                     initMsg(pname);
1182                     UnicodeString str(*iter);
1183                     initMsg(pname);
1184                     u_wmsg(stderr, "badBlockSize", str.getTerminatedBuffer());
1185                     return 3;
1186                 }
1187             } else {
1188                 usage(pname, 1);
1189             }
1190         } else if (strcmp("-l", *iter) == 0 || !strcmp("--list", *iter)) {
1191             if (printTranslits) {
1192                 usage(pname, 1);
1193             }
1194             printConvs = TRUE;
1195         } else if (strcmp("--default-code", *iter) == 0) {
1196             if (printTranslits) {
1197                 usage(pname, 1);
1198             }
1199             printName = ucnv_getDefaultName();
1200         } else if (strcmp("--list-code", *iter) == 0) {
1201             if (printTranslits) {
1202                 usage(pname, 1);
1203             }
1204 
1205             iter++;
1206             if (iter != end) {
1207                 UErrorCode e = U_ZERO_ERROR;
1208                 printName = ucnv_getAlias(*iter, 0, &e);
1209                 if (U_FAILURE(e) || !printName) {
1210                     UnicodeString str(*iter);
1211                     initMsg(pname);
1212                     u_wmsg(stderr, "noSuchCodeset", str.getTerminatedBuffer());
1213                     return 2;
1214                 }
1215             } else
1216                 usage(pname, 1);
1217         } else if (strcmp("--canon", *iter) == 0) {
1218             printCanon = TRUE;
1219         } else if (strcmp("-L", *iter) == 0
1220             || !strcmp("--list-transliterators", *iter)) {
1221             if (printConvs) {
1222                 usage(pname, 1);
1223             }
1224             printTranslits = TRUE;
1225         } else if (strcmp("-h", *iter) == 0 || !strcmp("-?", *iter)
1226             || !strcmp("--help", *iter)) {
1227             usage(pname, 0);
1228         } else if (!strcmp("-c", *iter)) {
1229             fromucallback = UCNV_FROM_U_CALLBACK_SKIP;
1230         } else if (!strcmp("--to-callback", *iter)) {
1231             iter++;
1232             if (iter != end) {
1233                 const struct callback_ent *cbe = findCallback(*iter);
1234                 if (cbe) {
1235                     fromucallback = cbe->fromu;
1236                     fromuctxt = cbe->fromuctxt;
1237                 } else {
1238                     UnicodeString str(*iter);
1239                     initMsg(pname);
1240                     u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer());
1241                     return 4;
1242                 }
1243             } else {
1244                 usage(pname, 1);
1245             }
1246         } else if (!strcmp("--from-callback", *iter)) {
1247             iter++;
1248             if (iter != end) {
1249                 const struct callback_ent *cbe = findCallback(*iter);
1250                 if (cbe) {
1251                     toucallback = cbe->tou;
1252                     touctxt = cbe->touctxt;
1253                 } else {
1254                     UnicodeString str(*iter);
1255                     initMsg(pname);
1256                     u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer());
1257                     return 4;
1258                 }
1259             } else {
1260                 usage(pname, 1);
1261             }
1262         } else if (!strcmp("-i", *iter)) {
1263             toucallback = UCNV_TO_U_CALLBACK_SKIP;
1264         } else if (!strcmp("--callback", *iter)) {
1265             iter++;
1266             if (iter != end) {
1267                 const struct callback_ent *cbe = findCallback(*iter);
1268                 if (cbe) {
1269                     fromucallback = cbe->fromu;
1270                     fromuctxt = cbe->fromuctxt;
1271                     toucallback = cbe->tou;
1272                     touctxt = cbe->touctxt;
1273                 } else {
1274                     UnicodeString str(*iter);
1275                     initMsg(pname);
1276                     u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer());
1277                     return 4;
1278                 }
1279             } else {
1280                 usage(pname, 1);
1281             }
1282         } else if (!strcmp("-s", *iter) || !strcmp("--silent", *iter)) {
1283             verbose = FALSE;
1284         } else if (!strcmp("-v", *iter) || !strcmp("--verbose", *iter)) {
1285             verbose = TRUE;
1286         } else if (!strcmp("-V", *iter) || !strcmp("--version", *iter)) {
1287             printf("%s v2.1  ICU " U_ICU_VERSION "\n", pname);
1288             return 0;
1289         } else if (!strcmp("-o", *iter) || !strcmp("--output", *iter)) {
1290             ++iter;
1291             if (iter != end && !outfilestr) {
1292                 outfilestr = *iter;
1293             } else {
1294                 usage(pname, 1);
1295             }
1296         } else if (0 == strcmp("--add-signature", *iter)) {
1297             cf.signature = 1;
1298         } else if (0 == strcmp("--remove-signature", *iter)) {
1299             cf.signature = -1;
1300         } else if (**iter == '-' && (*iter)[1]) {
1301             usage(pname, 1);
1302         } else {
1303             // move a non-option up in argv[]
1304             *remainArgvLimit++ = *iter;
1305         }
1306     }
1307 
1308     if (printConvs || printName) {
1309         return printConverters(pname, printName, printCanon) ? 2 : 0;
1310     } else if (printTranslits) {
1311         return printTransliterators(printCanon) ? 3 : 0;
1312     }
1313 
1314     if (!fromcpage || !uprv_strcmp(fromcpage, "-")) {
1315         fromcpage = ucnv_getDefaultName();
1316     }
1317     if (!tocpage || !uprv_strcmp(tocpage, "-")) {
1318         tocpage = ucnv_getDefaultName();
1319     }
1320 
1321     // Open the correct output file or connect to stdout for reading input
1322     if (outfilestr != 0 && strcmp(outfilestr, "-")) {
1323         outfile = fopen(outfilestr, "wb");
1324         if (outfile == 0) {
1325             UnicodeString str1(outfilestr, "");
1326             UnicodeString str2(strerror(errno), "");
1327             initMsg(pname);
1328             u_wmsg(stderr, "cantCreateOutputF",
1329                 str1.getBuffer(), str2.getBuffer());
1330             return 1;
1331         }
1332     } else {
1333         outfilestr = "-";
1334         outfile = stdout;
1335 #ifdef USE_FILENO_BINARY_MODE
1336         if (setmode(fileno(outfile), O_BINARY) == -1) {
1337             u_wmsg(stderr, "cantSetOutBinMode");
1338             exit(-1);
1339         }
1340 #endif
1341     }
1342 
1343     /* Loop again on the arguments to find all the input files, and
1344     convert them. */
1345 
1346     cf.setBufferSize(bufsz);
1347 
1348     if(remainArgv < remainArgvLimit) {
1349         for (iter = remainArgv; iter != remainArgvLimit; iter++) {
1350             if (!cf.convertFile(
1351                     pname, fromcpage, toucallback, touctxt, tocpage,
1352                     fromucallback, fromuctxt, fallback, translit, *iter,
1353                     outfile, verbose)
1354             ) {
1355                 goto error_exit;
1356             }
1357         }
1358     } else {
1359         if (!cf.convertFile(
1360                 pname, fromcpage, toucallback, touctxt, tocpage,
1361                 fromucallback, fromuctxt, fallback, translit, 0,
1362                 outfile, verbose)
1363         ) {
1364             goto error_exit;
1365         }
1366     }
1367 
1368     goto normal_exit;
1369 error_exit:
1370 #if !UCONFIG_NO_LEGACY_CONVERSION
1371     ret = 1;
1372 #else
1373     fprintf(stderr, "uconv error: UCONFIG_NO_LEGACY_CONVERSION is on. See uconfig.h\n");
1374 #endif
1375 normal_exit:
1376 
1377     if (outfile != stdout) {
1378         fclose(outfile);
1379     }
1380 
1381     return ret;
1382 }
1383 
1384 
1385 /*
1386  * Hey, Emacs, please set the following:
1387  *
1388  * Local Variables:
1389  * indent-tabs-mode: nil
1390  * End:
1391  *
1392  */
1393