• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*****************************************************************************
2 *
3 *   Copyright (C) 1999-2009, International Business Machines
4 *   Corporation and others.  All Rights Reserved.
5 *
6 ******************************************************************************/
7 
8 /*
9  * uconv(1): an iconv(1)-like converter using ICU.
10  *
11  * Original code by Jonas Utterstr&#x00F6;m <jonas.utterstrom@vittran.norrnod.se>
12  * contributed in 1999.
13  *
14  * Conversion to the C conversion API and many improvements by
15  * Yves Arrouye <yves@realnames.com>, current maintainer.
16  *
17  * Markus Scherer maintainer from 2003.
18  * See source code repository history for changes.
19  */
20 
21 #include <unicode/utypes.h>
22 #include <unicode/putil.h>
23 #include <unicode/ucnv.h>
24 #include <unicode/uenum.h>
25 #include <unicode/unistr.h>
26 #include <unicode/translit.h>
27 #include <unicode/uset.h>
28 #include <unicode/uclean.h>
29 
30 #include <stdio.h>
31 #include <errno.h>
32 #include <string.h>
33 #include <stdlib.h>
34 
35 #include "cmemory.h"
36 #include "cstring.h"
37 #include "ustrfmt.h"
38 
39 #include "unicode/uwmsg.h"
40 
41 U_NAMESPACE_USE
42 
43 #if (defined(U_WINDOWS) || defined(U_CYGWIN)) && !defined(__STRICT_ANSI__)
44 #include <io.h>
45 #include <fcntl.h>
46 #if defined(U_WINDOWS)
47 #define USE_FILENO_BINARY_MODE 1
48 /* Windows likes to rename Unix-like functions */
49 #ifndef fileno
50 #define fileno _fileno
51 #endif
52 #ifndef setmode
53 #define setmode _setmode
54 #endif
55 #ifndef O_BINARY
56 #define O_BINARY _O_BINARY
57 #endif
58 #endif
59 #endif
60 
61 #ifdef UCONVMSG_LINK
62 /* below from the README */
63 #include "unicode/utypes.h"
64 #include "unicode/udata.h"
65 U_CFUNC char uconvmsg_dat[];
66 #endif
67 
68 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
69 
70 #define DEFAULT_BUFSZ   4096
71 #define UCONVMSG "uconvmsg"
72 
73 static UResourceBundle *gBundle = 0;    /* Bundle containing messages. */
74 
75 /*
76  * Initialize the message bundle so that message strings can be fetched
77  * by u_wmsg().
78  *
79  */
80 
initMsg(const char * pname)81 static void initMsg(const char *pname) {
82     static int ps = 0;
83 
84     if (!ps) {
85         char dataPath[2048];        /* XXX Sloppy: should be PATH_MAX. */
86         UErrorCode err = U_ZERO_ERROR;
87 
88         ps = 1;
89 
90         /* Set up our static data - if any */
91 #ifdef UCONVMSG_LINK
92         udata_setAppData(UCONVMSG, (const void*) uconvmsg_dat, &err);
93         if (U_FAILURE(err)) {
94           fprintf(stderr, "%s: warning, problem installing our static resource bundle data uconvmsg: %s - trying anyways.\n",
95                   pname, u_errorName(err));
96           err = U_ZERO_ERROR; /* It may still fail */
97         }
98 #endif
99 
100         /* Get messages. */
101         gBundle = u_wmsg_setPath(UCONVMSG, &err);
102         if (U_FAILURE(err)) {
103             fprintf(stderr,
104                     "%s: warning: couldn't open bundle %s: %s\n",
105                     pname, UCONVMSG, u_errorName(err));
106 #ifdef UCONVMSG_LINK
107             fprintf(stderr,
108                     "%s: setAppData was called, internal data %s failed to load\n",
109                         pname, UCONVMSG);
110 #endif
111 
112             err = U_ZERO_ERROR;
113             /* that was try #1, try again with a path */
114             uprv_strcpy(dataPath, u_getDataDirectory());
115             uprv_strcat(dataPath, U_FILE_SEP_STRING);
116             uprv_strcat(dataPath, UCONVMSG);
117 
118             gBundle = u_wmsg_setPath(dataPath, &err);
119             if (U_FAILURE(err)) {
120                 fprintf(stderr,
121                     "%s: warning: still couldn't open bundle %s: %s\n",
122                     pname, dataPath, u_errorName(err));
123                 fprintf(stderr, "%s: warning: messages will not be displayed\n", pname);
124             }
125         }
126     }
127 }
128 
129 /* Mapping of callback names to the callbacks passed to the converter
130    API. */
131 
132 static struct callback_ent {
133     const char *name;
134     UConverterFromUCallback fromu;
135     const void *fromuctxt;
136     UConverterToUCallback tou;
137     const void *touctxt;
138 } transcode_callbacks[] = {
139     { "substitute",
140       UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0,
141       UCNV_TO_U_CALLBACK_SUBSTITUTE, 0 },
142     { "skip",
143       UCNV_FROM_U_CALLBACK_SKIP, 0,
144       UCNV_TO_U_CALLBACK_SKIP, 0 },
145     { "stop",
146       UCNV_FROM_U_CALLBACK_STOP, 0,
147       UCNV_TO_U_CALLBACK_STOP, 0 },
148     { "escape",
149       UCNV_FROM_U_CALLBACK_ESCAPE, 0,
150       UCNV_TO_U_CALLBACK_ESCAPE, 0},
151     { "escape-icu",
152       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_ICU,
153       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_ICU },
154     { "escape-java",
155       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_JAVA,
156       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_JAVA },
157     { "escape-c",
158       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C,
159       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C },
160     { "escape-xml",
161       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX,
162       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX },
163     { "escape-xml-hex",
164       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX,
165       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX },
166     { "escape-xml-dec",
167       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC,
168       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC },
169     { "escape-unicode", UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_UNICODE,
170       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_UNICODE }
171 };
172 
173 /* Return a pointer to a callback record given its name. */
174 
findCallback(const char * name)175 static const struct callback_ent *findCallback(const char *name) {
176     int i, count =
177         sizeof(transcode_callbacks) / sizeof(*transcode_callbacks);
178 
179     /* We'll do a linear search, there aren't many of them and bsearch()
180        may not be that portable. */
181 
182     for (i = 0; i < count; ++i) {
183         if (!uprv_stricmp(name, transcode_callbacks[i].name)) {
184             return &transcode_callbacks[i];
185         }
186     }
187 
188     return 0;
189 }
190 
191 /* Print converter information. If lookfor is set, only that converter will
192    be printed, otherwise all converters will be printed. If canon is non
193    zero, tags and aliases for each converter are printed too, in the format
194    expected for convrters.txt(5). */
195 
printConverters(const char * pname,const char * lookfor,UBool canon)196 static int printConverters(const char *pname, const char *lookfor,
197     UBool canon)
198 {
199     UErrorCode err = U_ZERO_ERROR;
200     int32_t num;
201     uint16_t num_stds;
202     const char **stds;
203 
204     /* If there is a specified name, just handle that now. */
205 
206     if (lookfor) {
207         if (!canon) {
208             printf("%s\n", lookfor);
209             return 0;
210         } else {
211         /*  Because we are printing a canonical name, we need the
212             true converter name. We've done that already except for
213             the default name (because we want to print the exact
214             name one would get when calling ucnv_getDefaultName()
215             in non-canon mode). But since we do not know at this
216             point if we have the default name or something else, we
217             need to normalize again to the canonical converter
218             name. */
219 
220             const char *truename = ucnv_getAlias(lookfor, 0, &err);
221             if (U_SUCCESS(err)) {
222                 lookfor = truename;
223             } else {
224                 err = U_ZERO_ERROR;
225             }
226         }
227     }
228 
229     /* Print converter names. We come here for one of two reasons: we
230        are printing all the names (lookfor was null), or we have a
231        single converter to print but in canon mode, hence we need to
232        get to it in order to print everything. */
233 
234     num = ucnv_countAvailable();
235     if (num <= 0) {
236         initMsg(pname);
237         u_wmsg(stderr, "cantGetNames");
238         return -1;
239     }
240     if (lookfor) {
241         num = 1;                /* We know where we want to be. */
242     }
243 
244     num_stds = ucnv_countStandards();
245     stds = (const char **) uprv_malloc(num_stds * sizeof(*stds));
246     if (!stds) {
247         u_wmsg(stderr, "cantGetTag", u_wmsg_errorName(U_MEMORY_ALLOCATION_ERROR));
248         return -1;
249     } else {
250         uint16_t s;
251 
252         if (canon) {
253             printf("{ ");
254         }
255         for (s = 0; s < num_stds; ++s) {
256             stds[s] = ucnv_getStandard(s, &err);
257             if (canon) {
258                 printf("%s ", stds[s]);
259             }
260             if (U_FAILURE(err)) {
261                 u_wmsg(stderr, "cantGetTag", u_wmsg_errorName(err));
262                 goto error_cleanup;
263             }
264         }
265         if (canon) {
266             puts("}");
267         }
268     }
269 
270     for (int32_t i = 0; i < num; i++) {
271         const char *name;
272         uint16_t num_aliases;
273 
274         /* Set the name either to what we are looking for, or
275         to the current converter name. */
276 
277         if (lookfor) {
278             name = lookfor;
279         } else {
280             name = ucnv_getAvailableName(i);
281         }
282 
283         /* Get all the aliases associated to the name. */
284 
285         err = U_ZERO_ERROR;
286         num_aliases = ucnv_countAliases(name, &err);
287         if (U_FAILURE(err)) {
288             printf("%s", name);
289 
290             UnicodeString str(name, "");
291             putchar('\t');
292             u_wmsg(stderr, "cantGetAliases", str.getTerminatedBuffer(),
293                 u_wmsg_errorName(err));
294             goto error_cleanup;
295         } else {
296             uint16_t a, s, t;
297 
298             /* Write all the aliases and their tags. */
299 
300             for (a = 0; a < num_aliases; ++a) {
301                 const char *alias = ucnv_getAlias(name, a, &err);
302 
303                 if (U_FAILURE(err)) {
304                     UnicodeString str(name, "");
305                     putchar('\t');
306                     u_wmsg(stderr, "cantGetAliases", str.getTerminatedBuffer(),
307                         u_wmsg_errorName(err));
308                     goto error_cleanup;
309                 }
310 
311                 /* Print the current alias so that it looks right. */
312                 printf("%s%s%s", (canon ? (a == 0? "" : "\t" ) : "") ,
313                                  alias,
314                                  (canon ? "" : " "));
315 
316                 /* Look (slowly, linear searching) for a tag. */
317 
318                 if (canon) {
319                     /* -1 to skip the last standard */
320                     for (s = t = 0; s < num_stds-1; ++s) {
321                         UEnumeration *nameEnum = ucnv_openStandardNames(name, stds[s], &err);
322                         if (U_SUCCESS(err)) {
323                             /* List the standard tags */
324                             const char *standardName;
325                             UBool isFirst = TRUE;
326                             UErrorCode enumError = U_ZERO_ERROR;
327                             while ((standardName = uenum_next(nameEnum, NULL, &enumError))) {
328                                 /* See if this alias is supported by this standard. */
329                                 if (!strcmp(standardName, alias)) {
330                                     if (!t) {
331                                         printf(" {");
332                                         t = 1;
333                                     }
334                                     /* Print a * after the default standard name */
335                                     printf(" %s%s", stds[s], (isFirst ? "*" : ""));
336                                 }
337                                 isFirst = FALSE;
338                             }
339                         }
340                     }
341                     if (t) {
342                         printf(" }");
343                     }
344                 }
345                 /* Terminate this entry. */
346                 if (canon) {
347                     puts("");
348                 }
349 
350                 /* Move on. */
351             }
352             /* Terminate this entry. */
353             if (!canon) {
354                 puts("");
355             }
356         }
357     }
358 
359     /* Free temporary data. */
360 
361     uprv_free(stds);
362 
363     /* Success. */
364 
365     return 0;
366 error_cleanup:
367     uprv_free(stds);
368     return -1;
369 }
370 
371 /* Print all available transliterators. If canon is non zero, print
372    one transliterator per line. */
373 
printTransliterators(UBool canon)374 static int printTransliterators(UBool canon)
375 {
376 #if UCONFIG_NO_TRANSLITERATION
377     printf("no transliterators available because of UCONFIG_NO_TRANSLITERATION, see uconfig.h\n");
378     return 1;
379 #else
380     int32_t numtrans = utrans_countAvailableIDs(), i;
381     int buflen = 512;
382     char *buf = (char *) uprv_malloc(buflen);
383     char staticbuf[512];
384 
385     char sepchar = canon ? '\n' : ' ';
386 
387     if (!buf) {
388         buf = staticbuf;
389         buflen = sizeof(staticbuf);
390     }
391 
392     for (i = 0; i < numtrans; ++i) {
393         int32_t len = utrans_getAvailableID(i, buf, buflen);
394         if (len >= buflen - 1) {
395             if (buf != staticbuf) {
396                 buflen <<= 1;
397                 if (buflen < len) {
398                     buflen = len + 64;
399                 }
400                 buf = (char *) uprv_realloc(buf, buflen);
401                 if (!buf) {
402                     buf = staticbuf;
403                     buflen = sizeof(staticbuf);
404                 }
405             }
406             utrans_getAvailableID(i, buf, buflen);
407             if (len >= buflen) {
408                 uprv_strcpy(buf + buflen - 4, "..."); /* Truncate the name. */
409             }
410         }
411 
412         printf("%s", buf);
413         if (i < numtrans - 1) {
414             putchar(sepchar);
415         }
416     }
417 
418     /* Add a terminating newline if needed. */
419 
420     if (sepchar != '\n') {
421         putchar('\n');
422     }
423 
424     /* Free temporary data. */
425 
426     if (buf != staticbuf) {
427         uprv_free(buf);
428     }
429 
430     /* Success. */
431 
432     return 0;
433 #endif
434 }
435 
436 enum {
437     uSP = 0x20,         // space
438     uCR = 0xd,          // carriage return
439     uLF = 0xa,          // line feed
440     uNL = 0x85,         // newline
441     uLS = 0x2028,       // line separator
442     uPS = 0x2029,       // paragraph separator
443     uSig = 0xfeff       // signature/BOM character
444 };
445 
446 static inline int32_t
getChunkLimit(const UnicodeString & prev,const UnicodeString & s)447 getChunkLimit(const UnicodeString &prev, const UnicodeString &s) {
448     // find one of
449     // CR, LF, CRLF, NL, LS, PS
450     // for paragraph ends (see UAX #13/Unicode 4)
451     // and include it in the chunk
452     // all of these characters are on the BMP
453     // do not include FF or VT in case they are part of a paragraph
454     // (important for bidi contexts)
455     static const UChar paraEnds[] = {
456         0xd, 0xa, 0x85, 0x2028, 0x2029
457     };
458     enum {
459         iCR, iLF, iNL, iLS, iPS, iCount
460     };
461 
462     // first, see if there is a CRLF split between prev and s
463     if (prev.endsWith(paraEnds + iCR, 1)) {
464         if (s.startsWith(paraEnds + iLF, 1)) {
465             return 1; // split CRLF, include the LF
466         } else if (!s.isEmpty()) {
467             return 0; // complete the last chunk
468         } else {
469             return -1; // wait for actual further contents to arrive
470         }
471     }
472 
473     const UChar *u = s.getBuffer(), *limit = u + s.length();
474     UChar c;
475 
476     while (u < limit) {
477         c = *u++;
478         if (
479             ((c < uSP) && (c == uCR || c == uLF)) ||
480             (c == uNL) ||
481             ((c & uLS) == uLS)
482         ) {
483             if (c == uCR) {
484                 // check for CRLF
485                 if (u == limit) {
486                     return -1; // LF may be in the next chunk
487                 } else if (*u == uLF) {
488                     ++u; // include the LF in this chunk
489                 }
490             }
491             return (int32_t)(u - s.getBuffer());
492         }
493     }
494 
495     return -1; // continue collecting the chunk
496 }
497 
498 enum {
499     CNV_NO_FEFF,    // cannot convert the U+FEFF Unicode signature character (BOM)
500     CNV_WITH_FEFF,  // can convert the U+FEFF signature character
501     CNV_ADDS_FEFF   // automatically adds/detects the U+FEFF signature character
502 };
503 
504 static inline UChar
nibbleToHex(uint8_t n)505 nibbleToHex(uint8_t n) {
506     n &= 0xf;
507     return
508         n <= 9 ?
509             (UChar)(0x30 + n) :
510             (UChar)((0x61 - 10) + n);
511 }
512 
513 // check the converter's Unicode signature properties;
514 // the fromUnicode side of the converter must be in its initial state
515 // and will be reset again if it was used
516 static int32_t
cnvSigType(UConverter * cnv)517 cnvSigType(UConverter *cnv) {
518     UErrorCode err;
519     int32_t result;
520 
521     // test if the output charset can convert U+FEFF
522     USet *set = uset_open(1, 0);
523     err = U_ZERO_ERROR;
524     ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &err);
525     if (U_SUCCESS(err) && uset_contains(set, uSig)) {
526         result = CNV_WITH_FEFF;
527     } else {
528         result = CNV_NO_FEFF; // an error occurred or U+FEFF cannot be converted
529     }
530     uset_close(set);
531 
532     if (result == CNV_WITH_FEFF) {
533         // test if the output charset emits a signature anyway
534         const UChar a[1] = { 0x61 }; // "a"
535         const UChar *in;
536 
537         char buffer[20];
538         char *out;
539 
540         in = a;
541         out = buffer;
542         err = U_ZERO_ERROR;
543         ucnv_fromUnicode(cnv,
544             &out, buffer + sizeof(buffer),
545             &in, a + 1,
546             NULL, TRUE, &err);
547         ucnv_resetFromUnicode(cnv);
548 
549         if (NULL != ucnv_detectUnicodeSignature(buffer, (int32_t)(out - buffer), NULL, &err) &&
550             U_SUCCESS(err)
551         ) {
552             result = CNV_ADDS_FEFF;
553         }
554     }
555 
556     return result;
557 }
558 
559 class ConvertFile {
560 public:
ConvertFile()561     ConvertFile() :
562         buf(NULL), outbuf(NULL), fromoffsets(NULL),
563         bufsz(0), signature(0) {}
564 
565     void
setBufferSize(size_t bufferSize)566     setBufferSize(size_t bufferSize) {
567         bufsz = bufferSize;
568 
569         buf = new char[2 * bufsz];
570         outbuf = buf + bufsz;
571 
572         // +1 for an added U+FEFF in the intermediate Unicode buffer
573         fromoffsets = new int32_t[bufsz + 1];
574     }
575 
~ConvertFile()576     ~ConvertFile() {
577         delete [] buf;
578         delete [] fromoffsets;
579     }
580 
581     UBool convertFile(const char *pname,
582                       const char *fromcpage,
583                       UConverterToUCallback toucallback,
584                       const void *touctxt,
585                       const char *tocpage,
586                       UConverterFromUCallback fromucallback,
587                       const void *fromuctxt,
588                       UBool fallback,
589                       const char *translit,
590                       const char *infilestr,
591                       FILE * outfile, int verbose);
592 private:
593     friend int main(int argc, char **argv);
594 
595     char *buf, *outbuf;
596     int32_t *fromoffsets;
597 
598     size_t bufsz;
599     int8_t signature; // add (1) or remove (-1) a U+FEFF Unicode signature character
600 };
601 
602 // Convert a file from one encoding to another
603 UBool
convertFile(const char * pname,const char * fromcpage,UConverterToUCallback toucallback,const void * touctxt,const char * tocpage,UConverterFromUCallback fromucallback,const void * fromuctxt,UBool fallback,const char * translit,const char * infilestr,FILE * outfile,int verbose)604 ConvertFile::convertFile(const char *pname,
605                          const char *fromcpage,
606                          UConverterToUCallback toucallback,
607                          const void *touctxt,
608                          const char *tocpage,
609                          UConverterFromUCallback fromucallback,
610                          const void *fromuctxt,
611                          UBool fallback,
612                          const char *translit,
613                          const char *infilestr,
614                          FILE * outfile, int verbose)
615 {
616     FILE *infile;
617     UBool ret = TRUE;
618     UConverter *convfrom = 0;
619     UConverter *convto = 0;
620     UErrorCode err = U_ZERO_ERROR;
621     UBool flush;
622     const char *cbufp, *prevbufp;
623     char *bufp;
624 
625     uint32_t infoffset = 0, outfoffset = 0;   /* Where we are in the file, for error reporting. */
626 
627     const UChar *unibuf, *unibufbp;
628     UChar *unibufp;
629 
630     size_t rd, wr;
631 
632 #if !UCONFIG_NO_TRANSLITERATION
633     Transliterator *t = 0;      // Transliterator acting on Unicode data.
634     UnicodeString chunk;        // One chunk of the text being collected for transformation.
635 #endif
636     UnicodeString u;            // String to do the transliteration.
637     int32_t ulen;
638 
639     // use conversion offsets for error messages
640     // unless a transliterator is used -
641     // a text transformation will reorder characters in unpredictable ways
642     UBool useOffsets = TRUE;
643 
644     // Open the correct input file or connect to stdin for reading input
645 
646     if (infilestr != 0 && strcmp(infilestr, "-")) {
647         infile = fopen(infilestr, "rb");
648         if (infile == 0) {
649             UnicodeString str1(infilestr, "");
650             str1.append((UChar32) 0);
651             UnicodeString str2(strerror(errno), "");
652             str2.append((UChar32) 0);
653             initMsg(pname);
654             u_wmsg(stderr, "cantOpenInputF", str1.getBuffer(), str2.getBuffer());
655             return FALSE;
656         }
657     } else {
658         infilestr = "-";
659         infile = stdin;
660 #ifdef USE_FILENO_BINARY_MODE
661         if (setmode(fileno(stdin), O_BINARY) == -1) {
662             initMsg(pname);
663             u_wmsg(stderr, "cantSetInBinMode");
664             return FALSE;
665         }
666 #endif
667     }
668 
669     if (verbose) {
670         fprintf(stderr, "%s:\n", infilestr);
671     }
672 
673 #if !UCONFIG_NO_TRANSLITERATION
674     // Create transliterator as needed.
675 
676     if (translit != NULL && *translit) {
677         UParseError parse;
678         UnicodeString str(translit), pestr;
679 
680         /* Create from rules or by ID as needed. */
681 
682         parse.line = -1;
683 
684         if (uprv_strchr(translit, ':') || uprv_strchr(translit, '>') || uprv_strchr(translit, '<') || uprv_strchr(translit, '>')) {
685             t = Transliterator::createFromRules("Uconv", str, UTRANS_FORWARD, parse, err);
686         } else {
687             t = Transliterator::createInstance(translit, UTRANS_FORWARD, err);
688         }
689 
690         if (U_FAILURE(err)) {
691             str.append((UChar32) 0);
692             initMsg(pname);
693 
694             if (parse.line >= 0) {
695                 UChar linebuf[20], offsetbuf[20];
696                 uprv_itou(linebuf, 20, parse.line, 10, 0);
697                 uprv_itou(offsetbuf, 20, parse.offset, 10, 0);
698                 u_wmsg(stderr, "cantCreateTranslitParseErr", str.getTerminatedBuffer(),
699                     u_wmsg_errorName(err), linebuf, offsetbuf);
700             } else {
701                 u_wmsg(stderr, "cantCreateTranslit", str.getTerminatedBuffer(),
702                     u_wmsg_errorName(err));
703             }
704 
705             if (t) {
706                 delete t;
707                 t = 0;
708             }
709             goto error_exit;
710         }
711 
712         useOffsets = FALSE;
713     }
714 #endif
715 
716     // Create codepage converter. If the codepage or its aliases weren't
717     // available, it returns NULL and a failure code. We also set the
718     // callbacks, and return errors in the same way.
719 
720     convfrom = ucnv_open(fromcpage, &err);
721     if (U_FAILURE(err)) {
722         UnicodeString str(fromcpage, "");
723         initMsg(pname);
724         u_wmsg(stderr, "cantOpenFromCodeset", str.getTerminatedBuffer(),
725             u_wmsg_errorName(err));
726         goto error_exit;
727     }
728     ucnv_setToUCallBack(convfrom, toucallback, touctxt, 0, 0, &err);
729     if (U_FAILURE(err)) {
730         initMsg(pname);
731         u_wmsg(stderr, "cantSetCallback", u_wmsg_errorName(err));
732         goto error_exit;
733     }
734 
735     convto = ucnv_open(tocpage, &err);
736     if (U_FAILURE(err)) {
737         UnicodeString str(tocpage, "");
738         initMsg(pname);
739         u_wmsg(stderr, "cantOpenToCodeset", str.getTerminatedBuffer(),
740             u_wmsg_errorName(err));
741         goto error_exit;
742     }
743     ucnv_setFromUCallBack(convto, fromucallback, fromuctxt, 0, 0, &err);
744     if (U_FAILURE(err)) {
745         initMsg(pname);
746         u_wmsg(stderr, "cantSetCallback", u_wmsg_errorName(err));
747         goto error_exit;
748     }
749     ucnv_setFallback(convto, fallback);
750 
751     UBool willexit, fromSawEndOfBytes, toSawEndOfUnicode;
752     int8_t sig;
753 
754     // OK, we can convert now.
755     sig = signature;
756     rd = 0;
757 
758     do {
759         willexit = FALSE;
760 
761         // input file offset at the beginning of the next buffer
762         infoffset += rd;
763 
764         rd = fread(buf, 1, bufsz, infile);
765         if (ferror(infile) != 0) {
766             UnicodeString str(strerror(errno));
767             initMsg(pname);
768             u_wmsg(stderr, "cantRead", str.getTerminatedBuffer());
769             goto error_exit;
770         }
771 
772         // Convert the read buffer into the new encoding via Unicode.
773         // After the call 'unibufp' will be placed behind the last
774         // character that was converted in the 'unibuf'.
775         // Also the 'cbufp' is positioned behind the last converted
776         // character.
777         // At the last conversion in the file, flush should be set to
778         // true so that we get all characters converted.
779         //
780         // The converter must be flushed at the end of conversion so
781         // that characters on hold also will be written.
782 
783         cbufp = buf;
784         flush = (UBool)(rd != bufsz);
785 
786         // convert until the input is consumed
787         do {
788             // remember the start of the current byte-to-Unicode conversion
789             prevbufp = cbufp;
790 
791             unibuf = unibufp = u.getBuffer((int32_t)bufsz);
792 
793             // Use bufsz instead of u.getCapacity() for the targetLimit
794             // so that we don't overflow fromoffsets[].
795             ucnv_toUnicode(convfrom, &unibufp, unibuf + bufsz, &cbufp,
796                 buf + rd, useOffsets ? fromoffsets : NULL, flush, &err);
797 
798             ulen = (int32_t)(unibufp - unibuf);
799             u.releaseBuffer(U_SUCCESS(err) ? ulen : 0);
800 
801             // fromSawEndOfBytes indicates that ucnv_toUnicode() is done
802             // converting all of the input bytes.
803             // It works like this because ucnv_toUnicode() returns only under the
804             // following conditions:
805             // - an error occurred during conversion (an error code is set)
806             // - the target buffer is filled (the error code indicates an overflow)
807             // - the source is consumed
808             // That is, if the error code does not indicate a failure,
809             // not even an overflow, then the source must be consumed entirely.
810             fromSawEndOfBytes = (UBool)U_SUCCESS(err);
811 
812             if (err == U_BUFFER_OVERFLOW_ERROR) {
813                 err = U_ZERO_ERROR;
814             } else if (U_FAILURE(err)) {
815                 char pos[32], errorBytes[32];
816                 int8_t i, length, errorLength;
817 
818                 UErrorCode localError = U_ZERO_ERROR;
819                 errorLength = (int8_t)sizeof(errorBytes);
820                 ucnv_getInvalidChars(convfrom, errorBytes, &errorLength, &localError);
821                 if (U_FAILURE(localError) || errorLength == 0) {
822                     errorLength = 1;
823                 }
824 
825                 // print the input file offset of the start of the error bytes:
826                 // input file offset of the current byte buffer +
827                 // length of the just consumed bytes -
828                 // length of the error bytes
829                 length =
830                     (int8_t)sprintf(pos, "%d",
831                         (int)(infoffset + (cbufp - buf) - errorLength));
832 
833                 // output the bytes that caused the error
834                 UnicodeString str;
835                 for (i = 0; i < errorLength; ++i) {
836                     if (i > 0) {
837                         str.append((UChar)uSP);
838                     }
839                     str.append(nibbleToHex((uint8_t)errorBytes[i] >> 4));
840                     str.append(nibbleToHex((uint8_t)errorBytes[i]));
841                 }
842 
843                 initMsg(pname);
844                 u_wmsg(stderr, "problemCvtToU",
845                         UnicodeString(pos, length, "").getTerminatedBuffer(),
846                         str.getTerminatedBuffer(),
847                         u_wmsg_errorName(err));
848 
849                 willexit = TRUE;
850                 err = U_ZERO_ERROR; /* reset the error for the rest of the conversion. */
851             }
852 
853             // Replaced a check for whether the input was consumed by
854             // looping until it is; message key "premEndInput" now obsolete.
855 
856             if (ulen == 0) {
857                 continue;
858             }
859 
860             // remove a U+FEFF Unicode signature character if requested
861             if (sig < 0) {
862                 if (u.charAt(0) == uSig) {
863                     u.remove(0, 1);
864 
865                     // account for the removed UChar and offset
866                     --ulen;
867 
868                     if (useOffsets) {
869                         // remove an offset from fromoffsets[] as well
870                         // to keep the array parallel with the UChars
871                         memmove(fromoffsets, fromoffsets + 1, ulen * 4);
872                     }
873 
874                 }
875                 sig = 0;
876             }
877 
878 #if !UCONFIG_NO_TRANSLITERATION
879             // Transliterate/transform if needed.
880 
881             // For transformation, we use chunking code -
882             // collect Unicode input until, for example, an end-of-line,
883             // then transform and output-convert that and continue collecting.
884             // This makes the transformation result independent of the buffer size
885             // while avoiding the slower keyboard mode.
886             // The end-of-chunk characters are completely included in the
887             // transformed string in case they are to be transformed themselves.
888             if (t != NULL) {
889                 UnicodeString out;
890                 int32_t chunkLimit;
891 
892                 do {
893                     chunkLimit = getChunkLimit(chunk, u);
894                     if (chunkLimit < 0 && flush && fromSawEndOfBytes) {
895                         // use all of the rest at the end of the text
896                         chunkLimit = u.length();
897                     }
898                     if (chunkLimit >= 0) {
899                         // complete the chunk and transform it
900                         chunk.append(u, 0, chunkLimit);
901                         u.remove(0, chunkLimit);
902                         t->transliterate(chunk);
903 
904                         // append the transformation result to the result and empty the chunk
905                         out.append(chunk);
906                         chunk.remove();
907                     } else {
908                         // continue collecting the chunk
909                         chunk.append(u);
910                         break;
911                     }
912                 } while (!u.isEmpty());
913 
914                 u = out;
915                 ulen = u.length();
916             }
917 #endif
918 
919             // add a U+FEFF Unicode signature character if requested
920             // and possible/necessary
921             if (sig > 0) {
922                 if (u.charAt(0) != uSig && cnvSigType(convto) == CNV_WITH_FEFF) {
923                     u.insert(0, (UChar)uSig);
924 
925                     if (useOffsets) {
926                         // insert a pseudo-offset into fromoffsets[] as well
927                         // to keep the array parallel with the UChars
928                         memmove(fromoffsets + 1, fromoffsets, ulen * 4);
929                         fromoffsets[0] = -1;
930                     }
931 
932                     // account for the additional UChar and offset
933                     ++ulen;
934                 }
935                 sig = 0;
936             }
937 
938             // Convert the Unicode buffer into the destination codepage
939             // Again 'bufp' will be placed behind the last converted character
940             // And 'unibufp' will be placed behind the last converted unicode character
941             // At the last conversion flush should be set to true to ensure that
942             // all characters left get converted
943 
944             unibuf = unibufbp = u.getBuffer();
945 
946             do {
947                 bufp = outbuf;
948 
949                 // Use fromSawEndOfBytes in addition to the flush flag -
950                 // it indicates whether the intermediate Unicode string
951                 // contains the very last UChars for the very last input bytes.
952                 ucnv_fromUnicode(convto, &bufp, outbuf + bufsz,
953                                  &unibufbp,
954                                  unibuf + ulen,
955                                  NULL, (UBool)(flush && fromSawEndOfBytes), &err);
956 
957                 // toSawEndOfUnicode indicates that ucnv_fromUnicode() is done
958                 // converting all of the intermediate UChars.
959                 // See comment for fromSawEndOfBytes.
960                 toSawEndOfUnicode = (UBool)U_SUCCESS(err);
961 
962                 if (err == U_BUFFER_OVERFLOW_ERROR) {
963                     err = U_ZERO_ERROR;
964                 } else if (U_FAILURE(err)) {
965                     UChar errorUChars[4];
966                     const char *errtag;
967                     char pos[32];
968                     UChar32 c;
969                     int8_t i, length, errorLength;
970 
971                     UErrorCode localError = U_ZERO_ERROR;
972                     errorLength = (int8_t)LENGTHOF(errorUChars);
973                     ucnv_getInvalidUChars(convto, errorUChars, &errorLength, &localError);
974                     if (U_FAILURE(localError) || errorLength == 0) {
975                         // need at least 1 so that we don't access beyond the length of fromoffsets[]
976                         errorLength = 1;
977                     }
978 
979                     int32_t ferroffset;
980 
981                     if (useOffsets) {
982                         // Unicode buffer offset of the start of the error UChars
983                         ferroffset = (int32_t)((unibufbp - unibuf) - errorLength);
984                         if (ferroffset < 0) {
985                             // approximation - the character started in the previous Unicode buffer
986                             ferroffset = 0;
987                         }
988 
989                         // get the corresponding byte offset out of fromoffsets[]
990                         // go back if the offset is not known for some of the UChars
991                         int32_t fromoffset;
992                         do {
993                             fromoffset = fromoffsets[ferroffset];
994                         } while (fromoffset < 0 && --ferroffset >= 0);
995 
996                         // total input file offset =
997                         // input file offset of the current byte buffer +
998                         // byte buffer offset of where the current Unicode buffer is converted from +
999                         // fromoffsets[Unicode offset]
1000                         ferroffset = infoffset + (prevbufp - buf) + fromoffset;
1001                         errtag = "problemCvtFromU";
1002                     } else {
1003                         // Do not use fromoffsets if (t != NULL) because the Unicode text may
1004                         // be different from what the offsets refer to.
1005 
1006                         // output file offset
1007                         ferroffset = (int32_t)(outfoffset + (bufp - outbuf));
1008                         errtag = "problemCvtFromUOut";
1009                     }
1010 
1011                     length = (int8_t)sprintf(pos, "%u", (int)ferroffset);
1012 
1013                     // output the code points that caused the error
1014                     UnicodeString str;
1015                     for (i = 0; i < errorLength;) {
1016                         if (i > 0) {
1017                             str.append((UChar)uSP);
1018                         }
1019                         U16_NEXT(errorUChars, i, errorLength, c);
1020                         if (c >= 0x100000) {
1021                             str.append(nibbleToHex((uint8_t)(c >> 20)));
1022                         }
1023                         if (c >= 0x10000) {
1024                             str.append(nibbleToHex((uint8_t)(c >> 16)));
1025                         }
1026                         str.append(nibbleToHex((uint8_t)(c >> 12)));
1027                         str.append(nibbleToHex((uint8_t)(c >> 8)));
1028                         str.append(nibbleToHex((uint8_t)(c >> 4)));
1029                         str.append(nibbleToHex((uint8_t)c));
1030                     }
1031 
1032                     initMsg(pname);
1033                     u_wmsg(stderr, errtag,
1034                             UnicodeString(pos, length, "").getTerminatedBuffer(),
1035                             str.getTerminatedBuffer(),
1036                            u_wmsg_errorName(err));
1037                     u_wmsg(stderr, "errorUnicode", str.getTerminatedBuffer());
1038 
1039                     willexit = TRUE;
1040                     err = U_ZERO_ERROR; /* reset the error for the rest of the conversion. */
1041                 }
1042 
1043                 // Replaced a check for whether the intermediate Unicode characters were all consumed by
1044                 // looping until they are; message key "premEnd" now obsolete.
1045 
1046                 // Finally, write the converted buffer to the output file
1047                 size_t outlen = (size_t) (bufp - outbuf);
1048                 outfoffset += (int32_t)(wr = fwrite(outbuf, 1, outlen, outfile));
1049                 if (wr != outlen) {
1050                     UnicodeString str(strerror(errno));
1051                     initMsg(pname);
1052                     u_wmsg(stderr, "cantWrite", str.getTerminatedBuffer());
1053                     willexit = TRUE;
1054                 }
1055 
1056                 if (willexit) {
1057                     goto error_exit;
1058                 }
1059             } while (!toSawEndOfUnicode);
1060         } while (!fromSawEndOfBytes);
1061     } while (!flush);           // Stop when we have flushed the
1062                                 // converters (this means that it's
1063                                 // the end of output)
1064 
1065     goto normal_exit;
1066 
1067 error_exit:
1068     ret = FALSE;
1069 
1070 normal_exit:
1071     // Cleanup.
1072 
1073     ucnv_close(convfrom);
1074     ucnv_close(convto);
1075 
1076 #if !UCONFIG_NO_TRANSLITERATION
1077     delete t;
1078 #endif
1079 
1080     if (infile != stdin) {
1081         fclose(infile);
1082     }
1083 
1084     return ret;
1085 }
1086 
usage(const char * pname,int ecode)1087 static void usage(const char *pname, int ecode) {
1088     const UChar *msg;
1089     int32_t msgLen;
1090     UErrorCode err = U_ZERO_ERROR;
1091     FILE *fp = ecode ? stderr : stdout;
1092     int res;
1093 
1094     initMsg(pname);
1095     msg =
1096         ures_getStringByKey(gBundle, ecode ? "lcUsageWord" : "ucUsageWord",
1097                             &msgLen, &err);
1098     UnicodeString upname(pname, (int32_t)(uprv_strlen(pname) + 1));
1099     UnicodeString mname(msg, msgLen + 1);
1100 
1101     res = u_wmsg(fp, "usage", mname.getBuffer(), upname.getBuffer());
1102     if (!ecode) {
1103         if (!res) {
1104             fputc('\n', fp);
1105         }
1106         if (!u_wmsg(fp, "help")) {
1107             /* Now dump callbacks and finish. */
1108 
1109             int i, count =
1110                 sizeof(transcode_callbacks) / sizeof(*transcode_callbacks);
1111             for (i = 0; i < count; ++i) {
1112                 fprintf(fp, " %s", transcode_callbacks[i].name);
1113             }
1114             fputc('\n', fp);
1115         }
1116     }
1117 
1118     exit(ecode);
1119 }
1120 
1121 extern int
main(int argc,char ** argv)1122 main(int argc, char **argv)
1123 {
1124     FILE *outfile;
1125     int ret = 0;
1126 
1127     size_t bufsz = DEFAULT_BUFSZ;
1128 
1129     const char *fromcpage = 0;
1130     const char *tocpage = 0;
1131     const char *translit = 0;
1132     const char *outfilestr = 0;
1133     UBool fallback = FALSE;
1134 
1135     UConverterFromUCallback fromucallback = UCNV_FROM_U_CALLBACK_STOP;
1136     const void *fromuctxt = 0;
1137     UConverterToUCallback toucallback = UCNV_TO_U_CALLBACK_STOP;
1138     const void *touctxt = 0;
1139 
1140     char **iter, **remainArgv, **remainArgvLimit;
1141     char **end = argv + argc;
1142 
1143     const char *pname;
1144 
1145     UBool printConvs = FALSE, printCanon = FALSE, printTranslits = FALSE;
1146     const char *printName = 0;
1147 
1148     UBool verbose = FALSE;
1149     UErrorCode status = U_ZERO_ERROR;
1150 
1151     ConvertFile cf;
1152 
1153     /* Initialize ICU */
1154     u_init(&status);
1155     if (U_FAILURE(status)) {
1156         fprintf(stderr, "%s: can not initialize ICU.  status = %s\n",
1157             argv[0], u_errorName(status));
1158         exit(1);
1159     }
1160 
1161     // Get and prettify pname.
1162     pname = uprv_strrchr(*argv, U_FILE_SEP_CHAR);
1163 #ifdef U_WINDOWS
1164     if (!pname) {
1165         pname = uprv_strrchr(*argv, '/');
1166     }
1167 #endif
1168     if (!pname) {
1169         pname = *argv;
1170     } else {
1171         ++pname;
1172     }
1173 
1174     // First, get the arguments from command-line
1175     // to know the codepages to convert between
1176 
1177     remainArgv = remainArgvLimit = argv + 1;
1178     for (iter = argv + 1; iter != end; iter++) {
1179         // Check for from charset
1180         if (strcmp("-f", *iter) == 0 || !strcmp("--from-code", *iter)) {
1181             iter++;
1182             if (iter != end)
1183                 fromcpage = *iter;
1184             else
1185                 usage(pname, 1);
1186         } else if (strcmp("-t", *iter) == 0 || !strcmp("--to-code", *iter)) {
1187             iter++;
1188             if (iter != end)
1189                 tocpage = *iter;
1190             else
1191                 usage(pname, 1);
1192         } else if (strcmp("-x", *iter) == 0) {
1193             iter++;
1194             if (iter != end)
1195                 translit = *iter;
1196             else
1197                 usage(pname, 1);
1198         } else if (!strcmp("--fallback", *iter)) {
1199             fallback = TRUE;
1200         } else if (!strcmp("--no-fallback", *iter)) {
1201             fallback = FALSE;
1202         } else if (strcmp("-b", *iter) == 0 || !strcmp("--block-size", *iter)) {
1203             iter++;
1204             if (iter != end) {
1205                 bufsz = atoi(*iter);
1206                 if ((int) bufsz <= 0) {
1207                     initMsg(pname);
1208                     UnicodeString str(*iter);
1209                     initMsg(pname);
1210                     u_wmsg(stderr, "badBlockSize", str.getTerminatedBuffer());
1211                     return 3;
1212                 }
1213             } else {
1214                 usage(pname, 1);
1215             }
1216         } else if (strcmp("-l", *iter) == 0 || !strcmp("--list", *iter)) {
1217             if (printTranslits) {
1218                 usage(pname, 1);
1219             }
1220             printConvs = TRUE;
1221         } else if (strcmp("--default-code", *iter) == 0) {
1222             if (printTranslits) {
1223                 usage(pname, 1);
1224             }
1225             printName = ucnv_getDefaultName();
1226         } else if (strcmp("--list-code", *iter) == 0) {
1227             if (printTranslits) {
1228                 usage(pname, 1);
1229             }
1230 
1231             iter++;
1232             if (iter != end) {
1233                 UErrorCode e = U_ZERO_ERROR;
1234                 printName = ucnv_getAlias(*iter, 0, &e);
1235                 if (U_FAILURE(e) || !printName) {
1236                     UnicodeString str(*iter);
1237                     initMsg(pname);
1238                     u_wmsg(stderr, "noSuchCodeset", str.getTerminatedBuffer());
1239                     return 2;
1240                 }
1241             } else
1242                 usage(pname, 1);
1243         } else if (strcmp("--canon", *iter) == 0) {
1244             printCanon = TRUE;
1245         } else if (strcmp("-L", *iter) == 0
1246             || !strcmp("--list-transliterators", *iter)) {
1247             if (printConvs) {
1248                 usage(pname, 1);
1249             }
1250             printTranslits = TRUE;
1251         } else if (strcmp("-h", *iter) == 0 || !strcmp("-?", *iter)
1252             || !strcmp("--help", *iter)) {
1253             usage(pname, 0);
1254         } else if (!strcmp("-c", *iter)) {
1255             fromucallback = UCNV_FROM_U_CALLBACK_SKIP;
1256         } else if (!strcmp("--to-callback", *iter)) {
1257             iter++;
1258             if (iter != end) {
1259                 const struct callback_ent *cbe = findCallback(*iter);
1260                 if (cbe) {
1261                     fromucallback = cbe->fromu;
1262                     fromuctxt = cbe->fromuctxt;
1263                 } else {
1264                     UnicodeString str(*iter);
1265                     initMsg(pname);
1266                     u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer());
1267                     return 4;
1268                 }
1269             } else {
1270                 usage(pname, 1);
1271             }
1272         } else if (!strcmp("--from-callback", *iter)) {
1273             iter++;
1274             if (iter != end) {
1275                 const struct callback_ent *cbe = findCallback(*iter);
1276                 if (cbe) {
1277                     toucallback = cbe->tou;
1278                     touctxt = cbe->touctxt;
1279                 } else {
1280                     UnicodeString str(*iter);
1281                     initMsg(pname);
1282                     u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer());
1283                     return 4;
1284                 }
1285             } else {
1286                 usage(pname, 1);
1287             }
1288         } else if (!strcmp("-i", *iter)) {
1289             toucallback = UCNV_TO_U_CALLBACK_SKIP;
1290         } else if (!strcmp("--callback", *iter)) {
1291             iter++;
1292             if (iter != end) {
1293                 const struct callback_ent *cbe = findCallback(*iter);
1294                 if (cbe) {
1295                     fromucallback = cbe->fromu;
1296                     fromuctxt = cbe->fromuctxt;
1297                     toucallback = cbe->tou;
1298                     touctxt = cbe->touctxt;
1299                 } else {
1300                     UnicodeString str(*iter);
1301                     initMsg(pname);
1302                     u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer());
1303                     return 4;
1304                 }
1305             } else {
1306                 usage(pname, 1);
1307             }
1308         } else if (!strcmp("-s", *iter) || !strcmp("--silent", *iter)) {
1309             verbose = FALSE;
1310         } else if (!strcmp("-v", *iter) || !strcmp("--verbose", *iter)) {
1311             verbose = TRUE;
1312         } else if (!strcmp("-V", *iter) || !strcmp("--version", *iter)) {
1313             printf("%s v2.1  ICU " U_ICU_VERSION "\n", pname);
1314             return 0;
1315         } else if (!strcmp("-o", *iter) || !strcmp("--output", *iter)) {
1316             ++iter;
1317             if (iter != end && !outfilestr) {
1318                 outfilestr = *iter;
1319             } else {
1320                 usage(pname, 1);
1321             }
1322         } else if (0 == strcmp("--add-signature", *iter)) {
1323             cf.signature = 1;
1324         } else if (0 == strcmp("--remove-signature", *iter)) {
1325             cf.signature = -1;
1326         } else if (**iter == '-' && (*iter)[1]) {
1327             usage(pname, 1);
1328         } else {
1329             // move a non-option up in argv[]
1330             *remainArgvLimit++ = *iter;
1331         }
1332     }
1333 
1334     if (printConvs || printName) {
1335         return printConverters(pname, printName, printCanon) ? 2 : 0;
1336     } else if (printTranslits) {
1337         return printTransliterators(printCanon) ? 3 : 0;
1338     }
1339 
1340     if (!fromcpage || !uprv_strcmp(fromcpage, "-")) {
1341         fromcpage = ucnv_getDefaultName();
1342     }
1343     if (!tocpage || !uprv_strcmp(tocpage, "-")) {
1344         tocpage = ucnv_getDefaultName();
1345     }
1346 
1347     // Open the correct output file or connect to stdout for reading input
1348     if (outfilestr != 0 && strcmp(outfilestr, "-")) {
1349         outfile = fopen(outfilestr, "wb");
1350         if (outfile == 0) {
1351             UnicodeString str1(outfilestr, "");
1352             UnicodeString str2(strerror(errno), "");
1353             initMsg(pname);
1354             u_wmsg(stderr, "cantCreateOutputF",
1355                 str1.getBuffer(), str2.getBuffer());
1356             return 1;
1357         }
1358     } else {
1359         outfilestr = "-";
1360         outfile = stdout;
1361 #ifdef USE_FILENO_BINARY_MODE
1362         if (setmode(fileno(outfile), O_BINARY) == -1) {
1363             u_wmsg(stderr, "cantSetOutBinMode");
1364             exit(-1);
1365         }
1366 #endif
1367     }
1368 
1369     /* Loop again on the arguments to find all the input files, and
1370     convert them. */
1371 
1372     cf.setBufferSize(bufsz);
1373 
1374     if(remainArgv < remainArgvLimit) {
1375         for (iter = remainArgv; iter != remainArgvLimit; iter++) {
1376             if (!cf.convertFile(
1377                     pname, fromcpage, toucallback, touctxt, tocpage,
1378                     fromucallback, fromuctxt, fallback, translit, *iter,
1379                     outfile, verbose)
1380             ) {
1381                 goto error_exit;
1382             }
1383         }
1384     } else {
1385         if (!cf.convertFile(
1386                 pname, fromcpage, toucallback, touctxt, tocpage,
1387                 fromucallback, fromuctxt, fallback, translit, 0,
1388                 outfile, verbose)
1389         ) {
1390             goto error_exit;
1391         }
1392     }
1393 
1394     goto normal_exit;
1395 error_exit:
1396 #if !UCONFIG_NO_LEGACY_CONVERSION
1397     ret = 1;
1398 #else
1399     fprintf(stderr, "uconv error: UCONFIG_NO_LEGACY_CONVERSION is on. See uconfig.h\n");
1400 #endif
1401 normal_exit:
1402 
1403     if (outfile != stdout) {
1404         fclose(outfile);
1405     }
1406 
1407     return ret;
1408 }
1409 
1410 
1411 /*
1412  * Hey, Emacs, please set the following:
1413  *
1414  * Local Variables:
1415  * indent-tabs-mode: nil
1416  * End:
1417  *
1418  */
1419