• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*****************************************************************************
2 *
3 *   Copyright (C) 1999-2006, International Business Machines
4 *   Corporation and others.  All Rights Reserved.
5 *
6 ******************************************************************************/
7 
8 /*
9  * uconv(1): an iconv(1)-like converter using ICU.
10  *
11  * Original code by Jonas Utterstr&#x00F6;m <jonas.utterstrom@vittran.norrnod.se>
12  * contributed in 1999.
13  *
14  * Conversion to the C conversion API and many improvements by
15  * Yves Arrouye <yves@realnames.com>, current maintainer.
16  *
17  * Markus Scherer maintainer from 2003.
18  * See source code repository history for changes.
19  */
20 
21 #include <unicode/utypes.h>
22 #include <unicode/putil.h>
23 #include <unicode/ucnv.h>
24 #include <unicode/uenum.h>
25 #include <unicode/unistr.h>
26 #include <unicode/translit.h>
27 #include <unicode/uset.h>
28 #include <unicode/uclean.h>
29 
30 #include <stdio.h>
31 #include <errno.h>
32 #include <string.h>
33 #include <stdlib.h>
34 
35 #include "cmemory.h"
36 #include "cstring.h"
37 #include "ustrfmt.h"
38 
39 #include "unicode/uwmsg.h"
40 
41 U_NAMESPACE_USE
42 
43 #if (defined(U_WINDOWS) || defined(U_CYGWIN)) && !defined(__STRICT_ANSI__)
44 #include <io.h>
45 #include <fcntl.h>
46 #if defined(U_WINDOWS)
47 #define USE_FILENO_BINARY_MODE 1
48 /* Windows likes to rename Unix-like functions */
49 #ifndef fileno
50 #define fileno _fileno
51 #endif
52 #ifndef setmode
53 #define setmode _setmode
54 #endif
55 #ifndef O_BINARY
56 #define O_BINARY _O_BINARY
57 #endif
58 #endif
59 #endif
60 
61 #ifdef UCONVMSG_LINK
62 /* below from the README */
63 #include "unicode/utypes.h"
64 #include "unicode/udata.h"
65 U_CFUNC char uconvmsg_dat[];
66 #endif
67 
68 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
69 
70 #define DEFAULT_BUFSZ   4096
71 #define UCONVMSG "uconvmsg"
72 
73 static UResourceBundle *gBundle = 0;    /* Bundle containing messages. */
74 
75 /*
76  * Initialize the message bundle so that message strings can be fetched
77  * by u_wmsg().
78  *
79  */
80 
initMsg(const char * pname)81 static void initMsg(const char *pname) {
82     static int ps = 0;
83 
84     if (!ps) {
85         char dataPath[2048];        /* XXX Sloppy: should be PATH_MAX. */
86         UErrorCode err = U_ZERO_ERROR;
87 
88         ps = 1;
89 
90         /* Set up our static data - if any */
91 #ifdef UCONVMSG_LINK
92         udata_setAppData(UCONVMSG, (const void*) uconvmsg_dat, &err);
93         if (U_FAILURE(err)) {
94           fprintf(stderr, "%s: warning, problem installing our static resource bundle data uconvmsg: %s - trying anyways.\n",
95                   pname, u_errorName(err));
96           err = U_ZERO_ERROR; /* It may still fail */
97         }
98 #endif
99 
100         /* Get messages. */
101         gBundle = u_wmsg_setPath(UCONVMSG, &err);
102         if (U_FAILURE(err)) {
103             fprintf(stderr,
104                     "%s: warning: couldn't open bundle %s: %s\n",
105                     pname, UCONVMSG, u_errorName(err));
106 #ifdef UCONVMSG_LINK
107             fprintf(stderr,
108                     "%s: setAppData was called, internal data %s failed to load\n",
109                         pname, UCONVMSG);
110 #endif
111 
112             err = U_ZERO_ERROR;
113             /* that was try #1, try again with a path */
114             uprv_strcpy(dataPath, u_getDataDirectory());
115             uprv_strcat(dataPath, U_FILE_SEP_STRING);
116             uprv_strcat(dataPath, UCONVMSG);
117 
118             gBundle = u_wmsg_setPath(dataPath, &err);
119             if (U_FAILURE(err)) {
120                 fprintf(stderr,
121                     "%s: warning: still couldn't open bundle %s: %s\n",
122                     pname, dataPath, u_errorName(err));
123                 fprintf(stderr, "%s: warning: messages will not be displayed\n", pname);
124             }
125         }
126     }
127 }
128 
129 /* Mapping of callback names to the callbacks passed to the converter
130    API. */
131 
132 static struct callback_ent {
133     const char *name;
134     UConverterFromUCallback fromu;
135     const void *fromuctxt;
136     UConverterToUCallback tou;
137     const void *touctxt;
138 } transcode_callbacks[] = {
139     { "substitute",
140       UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0,
141       UCNV_TO_U_CALLBACK_SUBSTITUTE, 0 },
142     { "skip",
143       UCNV_FROM_U_CALLBACK_SKIP, 0,
144       UCNV_TO_U_CALLBACK_SKIP, 0 },
145     { "stop",
146       UCNV_FROM_U_CALLBACK_STOP, 0,
147       UCNV_TO_U_CALLBACK_STOP, 0 },
148     { "escape",
149       UCNV_FROM_U_CALLBACK_ESCAPE, 0,
150       UCNV_TO_U_CALLBACK_ESCAPE, 0},
151     { "escape-icu",
152       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_ICU,
153       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_ICU },
154     { "escape-java",
155       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_JAVA,
156       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_JAVA },
157     { "escape-c",
158       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C,
159       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C },
160     { "escape-xml",
161       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX,
162       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX },
163     { "escape-xml-hex",
164       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX,
165       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX },
166     { "escape-xml-dec",
167       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC,
168       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC },
169     { "escape-unicode", UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_UNICODE,
170       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_UNICODE }
171 };
172 
173 /* Return a pointer to a callback record given its name. */
174 
findCallback(const char * name)175 static const struct callback_ent *findCallback(const char *name) {
176     int i, count =
177         sizeof(transcode_callbacks) / sizeof(*transcode_callbacks);
178 
179     /* We'll do a linear search, there aren't many of them and bsearch()
180        may not be that portable. */
181 
182     for (i = 0; i < count; ++i) {
183         if (!uprv_stricmp(name, transcode_callbacks[i].name)) {
184             return &transcode_callbacks[i];
185         }
186     }
187 
188     return 0;
189 }
190 
191 /* Print converter information. If lookfor is set, only that converter will
192    be printed, otherwise all converters will be printed. If canon is non
193    zero, tags and aliases for each converter are printed too, in the format
194    expected for convrters.txt(5). */
195 
printConverters(const char * pname,const char * lookfor,UBool canon)196 static int printConverters(const char *pname, const char *lookfor,
197     UBool canon)
198 {
199     UErrorCode err = U_ZERO_ERROR;
200     int32_t num;
201     uint16_t num_stds;
202     const char **stds;
203 
204     /* If there is a specified name, just handle that now. */
205 
206     if (lookfor) {
207         if (!canon) {
208             printf("%s\n", lookfor);
209             return 0;
210         } else {
211         /*  Because we are printing a canonical name, we need the
212             true converter name. We've done that already except for
213             the default name (because we want to print the exact
214             name one would get when calling ucnv_getDefaultName()
215             in non-canon mode). But since we do not know at this
216             point if we have the default name or something else, we
217             need to normalize again to the canonical converter
218             name. */
219 
220             const char *truename = ucnv_getAlias(lookfor, 0, &err);
221             if (U_SUCCESS(err)) {
222                 lookfor = truename;
223             } else {
224                 err = U_ZERO_ERROR;
225             }
226         }
227     }
228 
229     /* Print converter names. We come here for one of two reasons: we
230        are printing all the names (lookfor was null), or we have a
231        single converter to print but in canon mode, hence we need to
232        get to it in order to print everything. */
233 
234     num = ucnv_countAvailable();
235     if (num <= 0) {
236         initMsg(pname);
237         u_wmsg(stderr, "cantGetNames");
238         return -1;
239     }
240     if (lookfor) {
241         num = 1;                /* We know where we want to be. */
242     }
243 
244     num_stds = ucnv_countStandards();
245     stds = (const char **) uprv_malloc(num_stds * sizeof(*stds));
246     if (!stds) {
247         u_wmsg(stderr, "cantGetTag", u_wmsg_errorName(U_MEMORY_ALLOCATION_ERROR));
248         return -1;
249     } else {
250         uint16_t s;
251 
252         if (canon) {
253             printf("{ ");
254         }
255         for (s = 0; s < num_stds; ++s) {
256             stds[s] = ucnv_getStandard(s, &err);
257             if (canon) {
258                 printf("%s ", stds[s]);
259             }
260             if (U_FAILURE(err)) {
261                 u_wmsg(stderr, "cantGetTag", u_wmsg_errorName(err));
262                 return -1;
263             }
264         }
265         if (canon) {
266             puts("}");
267         }
268     }
269 
270     for (int32_t i = 0; i < num; i++) {
271         const char *name;
272         uint16_t num_aliases;
273 
274         /* Set the name either to what we are looking for, or
275         to the current converter name. */
276 
277         if (lookfor) {
278             name = lookfor;
279         } else {
280             name = ucnv_getAvailableName(i);
281         }
282 
283         /* Get all the aliases associated to the name. */
284 
285         err = U_ZERO_ERROR;
286         num_aliases = ucnv_countAliases(name, &err);
287         if (U_FAILURE(err)) {
288             printf("%s", name);
289 
290             UnicodeString str(name, "");
291             putchar('\t');
292             u_wmsg(stderr, "cantGetAliases", str.getTerminatedBuffer(),
293                 u_wmsg_errorName(err));
294             return -1;
295         } else {
296             uint16_t a, s, t;
297 
298             /* Write all the aliases and their tags. */
299 
300             for (a = 0; a < num_aliases; ++a) {
301                 const char *alias = ucnv_getAlias(name, a, &err);
302 
303                 if (U_FAILURE(err)) {
304                     UnicodeString str(name, "");
305                     putchar('\t');
306                     u_wmsg(stderr, "cantGetAliases", str.getTerminatedBuffer(),
307                         u_wmsg_errorName(err));
308                     return -1;
309                 }
310 
311                 /* Print the current alias so that it looks right. */
312                 printf("%s%s%s", (canon ? (a == 0? "" : "\t" ) : "") ,
313                                  alias,
314                                  (canon ? "" : " "));
315 
316                 /* Look (slowly, linear searching) for a tag. */
317 
318                 if (canon) {
319                     /* -1 to skip the last standard */
320                     for (s = t = 0; s < num_stds-1; ++s) {
321                         UEnumeration *nameEnum = ucnv_openStandardNames(name, stds[s], &err);
322                         if (U_SUCCESS(err)) {
323                             /* List the standard tags */
324                             const char *standardName;
325                             UBool isFirst = TRUE;
326                             UErrorCode enumError = U_ZERO_ERROR;
327                             while ((standardName = uenum_next(nameEnum, NULL, &enumError))) {
328                                 /* See if this alias is supported by this standard. */
329                                 if (!strcmp(standardName, alias)) {
330                                     if (!t) {
331                                         printf(" {");
332                                         t = 1;
333                                     }
334                                     /* Print a * after the default standard name */
335                                     printf(" %s%s", stds[s], (isFirst ? "*" : ""));
336                                 }
337                                 isFirst = FALSE;
338                             }
339                         }
340                     }
341                     if (t) {
342                         printf(" }");
343                     }
344                 }
345                 /* Terminate this entry. */
346                 if (canon) {
347                     puts("");
348                 }
349 
350                 /* Move on. */
351             }
352             /* Terminate this entry. */
353             if (!canon) {
354                 puts("");
355             }
356         }
357     }
358 
359     /* Free temporary data. */
360 
361     uprv_free(stds);
362 
363     /* Success. */
364 
365     return 0;
366 }
367 
368 /* Print all available transliterators. If canon is non zero, print
369    one transliterator per line. */
370 
printTransliterators(UBool canon)371 static int printTransliterators(UBool canon)
372 {
373 #if UCONFIG_NO_TRANSLITERATION
374     printf("no transliterators available because of UCONFIG_NO_TRANSLITERATION, see uconfig.h\n");
375     return 1;
376 #else
377     int32_t numtrans = utrans_countAvailableIDs(), i;
378     int buflen = 512;
379     char *buf = (char *) uprv_malloc(buflen);
380     char staticbuf[512];
381 
382     char sepchar = canon ? '\n' : ' ';
383 
384     if (!buf) {
385         buf = staticbuf;
386         buflen = sizeof(staticbuf);
387     }
388 
389     for (i = 0; i < numtrans; ++i) {
390         int32_t len = utrans_getAvailableID(i, buf, buflen);
391         if (len >= buflen - 1) {
392             if (buf != staticbuf) {
393                 buflen <<= 1;
394                 if (buflen < len) {
395                     buflen = len + 64;
396                 }
397                 buf = (char *) uprv_realloc(buf, buflen);
398                 if (!buf) {
399                     buf = staticbuf;
400                     buflen = sizeof(staticbuf);
401                 }
402             }
403             utrans_getAvailableID(i, buf, buflen);
404             if (len >= buflen) {
405                 uprv_strcpy(buf + buflen - 4, "..."); /* Truncate the name. */
406             }
407         }
408 
409         printf("%s", buf);
410         if (i < numtrans - 1) {
411             putchar(sepchar);
412         }
413     }
414 
415     /* Add a terminating newline if needed. */
416 
417     if (sepchar != '\n') {
418         putchar('\n');
419     }
420 
421     /* Free temporary data. */
422 
423     if (buf != staticbuf) {
424         uprv_free(buf);
425     }
426 
427     /* Success. */
428 
429     return 0;
430 #endif
431 }
432 
433 enum {
434     uSP = 0x20,         // space
435     uCR = 0xd,          // carriage return
436     uLF = 0xa,          // line feed
437     uNL = 0x85,         // newline
438     uLS = 0x2028,       // line separator
439     uPS = 0x2029,       // paragraph separator
440     uSig = 0xfeff       // signature/BOM character
441 };
442 
443 static inline int32_t
getChunkLimit(const UnicodeString & prev,const UnicodeString & s)444 getChunkLimit(const UnicodeString &prev, const UnicodeString &s) {
445     // find one of
446     // CR, LF, CRLF, NL, LS, PS
447     // for paragraph ends (see UAX #13/Unicode 4)
448     // and include it in the chunk
449     // all of these characters are on the BMP
450     // do not include FF or VT in case they are part of a paragraph
451     // (important for bidi contexts)
452     static const UChar paraEnds[] = {
453         0xd, 0xa, 0x85, 0x2028, 0x2029
454     };
455     enum {
456         iCR, iLF, iNL, iLS, iPS, iCount
457     };
458 
459     // first, see if there is a CRLF split between prev and s
460     if (prev.endsWith(paraEnds + iCR, 1)) {
461         if (s.startsWith(paraEnds + iLF, 1)) {
462             return 1; // split CRLF, include the LF
463         } else if (!s.isEmpty()) {
464             return 0; // complete the last chunk
465         } else {
466             return -1; // wait for actual further contents to arrive
467         }
468     }
469 
470     const UChar *u = s.getBuffer(), *limit = u + s.length();
471     UChar c;
472 
473     while (u < limit) {
474         c = *u++;
475         if (
476             ((c < uSP) && (c == uCR || c == uLF)) ||
477             (c == uNL) ||
478             ((c & uLS) == uLS)
479         ) {
480             if (c == uCR) {
481                 // check for CRLF
482                 if (u == limit) {
483                     return -1; // LF may be in the next chunk
484                 } else if (*u == uLF) {
485                     ++u; // include the LF in this chunk
486                 }
487             }
488             return (int32_t)(u - s.getBuffer());
489         }
490     }
491 
492     return -1; // continue collecting the chunk
493 }
494 
495 enum {
496     CNV_NO_FEFF,    // cannot convert the U+FEFF Unicode signature character (BOM)
497     CNV_WITH_FEFF,  // can convert the U+FEFF signature character
498     CNV_ADDS_FEFF   // automatically adds/detects the U+FEFF signature character
499 };
500 
501 static inline UChar
nibbleToHex(uint8_t n)502 nibbleToHex(uint8_t n) {
503     n &= 0xf;
504     return
505         n <= 9 ?
506             (UChar)(0x30 + n) :
507             (UChar)((0x61 - 10) + n);
508 }
509 
510 // check the converter's Unicode signature properties;
511 // the fromUnicode side of the converter must be in its initial state
512 // and will be reset again if it was used
513 static int32_t
cnvSigType(UConverter * cnv)514 cnvSigType(UConverter *cnv) {
515     UErrorCode err;
516     int32_t result;
517 
518     // test if the output charset can convert U+FEFF
519     USet *set = uset_open(1, 0);
520     err = U_ZERO_ERROR;
521     ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &err);
522     if (U_SUCCESS(err) && uset_contains(set, uSig)) {
523         result = CNV_WITH_FEFF;
524     } else {
525         result = CNV_NO_FEFF; // an error occurred or U+FEFF cannot be converted
526     }
527     uset_close(set);
528 
529     if (result == CNV_WITH_FEFF) {
530         // test if the output charset emits a signature anyway
531         const UChar a[1] = { 0x61 }; // "a"
532         const UChar *in;
533 
534         char buffer[20];
535         char *out;
536 
537         in = a;
538         out = buffer;
539         err = U_ZERO_ERROR;
540         ucnv_fromUnicode(cnv,
541             &out, buffer + sizeof(buffer),
542             &in, a + 1,
543             NULL, TRUE, &err);
544         ucnv_resetFromUnicode(cnv);
545 
546         if (NULL != ucnv_detectUnicodeSignature(buffer, (int32_t)(out - buffer), NULL, &err) &&
547             U_SUCCESS(err)
548         ) {
549             result = CNV_ADDS_FEFF;
550         }
551     }
552 
553     return result;
554 }
555 
556 class ConvertFile {
557 public:
ConvertFile()558     ConvertFile() :
559         buf(NULL), outbuf(NULL), fromoffsets(NULL),
560         bufsz(0), signature(0) {}
561 
562     void
setBufferSize(size_t bufferSize)563     setBufferSize(size_t bufferSize) {
564         bufsz = bufferSize;
565 
566         buf = new char[2 * bufsz];
567         outbuf = buf + bufsz;
568 
569         // +1 for an added U+FEFF in the intermediate Unicode buffer
570         fromoffsets = new int32_t[bufsz + 1];
571     }
572 
~ConvertFile()573     ~ConvertFile() {
574         delete [] buf;
575         delete [] fromoffsets;
576     }
577 
578     UBool convertFile(const char *pname,
579                       const char *fromcpage,
580                       UConverterToUCallback toucallback,
581                       const void *touctxt,
582                       const char *tocpage,
583                       UConverterFromUCallback fromucallback,
584                       const void *fromuctxt,
585                       UBool fallback,
586                       const char *translit,
587                       const char *infilestr,
588                       FILE * outfile, int verbose);
589 private:
590     friend int main(int argc, char **argv);
591 
592     char *buf, *outbuf;
593     int32_t *fromoffsets;
594 
595     size_t bufsz;
596     int8_t signature; // add (1) or remove (-1) a U+FEFF Unicode signature character
597 };
598 
599 // Convert a file from one encoding to another
600 UBool
convertFile(const char * pname,const char * fromcpage,UConverterToUCallback toucallback,const void * touctxt,const char * tocpage,UConverterFromUCallback fromucallback,const void * fromuctxt,UBool fallback,const char * translit,const char * infilestr,FILE * outfile,int verbose)601 ConvertFile::convertFile(const char *pname,
602                          const char *fromcpage,
603                          UConverterToUCallback toucallback,
604                          const void *touctxt,
605                          const char *tocpage,
606                          UConverterFromUCallback fromucallback,
607                          const void *fromuctxt,
608                          UBool fallback,
609                          const char *translit,
610                          const char *infilestr,
611                          FILE * outfile, int verbose)
612 {
613     FILE *infile;
614     UBool ret = TRUE;
615     UConverter *convfrom = 0;
616     UConverter *convto = 0;
617     UErrorCode err = U_ZERO_ERROR;
618     UBool flush;
619     const char *cbufp, *prevbufp;
620     char *bufp;
621 
622     uint32_t infoffset = 0, outfoffset = 0;   /* Where we are in the file, for error reporting. */
623 
624     const UChar *unibuf, *unibufbp;
625     UChar *unibufp;
626 
627     size_t rd, wr;
628 
629 #if !UCONFIG_NO_TRANSLITERATION
630     Transliterator *t = 0;      // Transliterator acting on Unicode data.
631     UnicodeString chunk;        // One chunk of the text being collected for transformation.
632 #endif
633     UnicodeString u;            // String to do the transliteration.
634     int32_t ulen;
635 
636     // use conversion offsets for error messages
637     // unless a transliterator is used -
638     // a text transformation will reorder characters in unpredictable ways
639     UBool useOffsets = TRUE;
640 
641     // Open the correct input file or connect to stdin for reading input
642 
643     if (infilestr != 0 && strcmp(infilestr, "-")) {
644         infile = fopen(infilestr, "rb");
645         if (infile == 0) {
646             UnicodeString str1(infilestr, "");
647             str1.append((UChar32) 0);
648             UnicodeString str2(strerror(errno), "");
649             str2.append((UChar32) 0);
650             initMsg(pname);
651             u_wmsg(stderr, "cantOpenInputF", str1.getBuffer(), str2.getBuffer());
652             return FALSE;
653         }
654     } else {
655         infilestr = "-";
656         infile = stdin;
657 #ifdef USE_FILENO_BINARY_MODE
658         if (setmode(fileno(stdin), O_BINARY) == -1) {
659             initMsg(pname);
660             u_wmsg(stderr, "cantSetInBinMode");
661             return FALSE;
662         }
663 #endif
664     }
665 
666     if (verbose) {
667         fprintf(stderr, "%s:\n", infilestr);
668     }
669 
670 #if !UCONFIG_NO_TRANSLITERATION
671     // Create transliterator as needed.
672 
673     if (translit != NULL && *translit) {
674         UParseError parse;
675         UnicodeString str(translit), pestr;
676 
677         /* Create from rules or by ID as needed. */
678 
679         parse.line = -1;
680 
681         if (uprv_strchr(translit, ':') || uprv_strchr(translit, '>') || uprv_strchr(translit, '<') || uprv_strchr(translit, '>')) {
682             t = Transliterator::createFromRules("Uconv", str, UTRANS_FORWARD, parse, err);
683         } else {
684             t = Transliterator::createInstance(translit, UTRANS_FORWARD, err);
685         }
686 
687         if (U_FAILURE(err)) {
688             str.append((UChar32) 0);
689             initMsg(pname);
690 
691             if (parse.line >= 0) {
692                 UChar linebuf[20], offsetbuf[20];
693                 uprv_itou(linebuf, 20, parse.line, 10, 0);
694                 uprv_itou(offsetbuf, 20, parse.offset, 10, 0);
695                 u_wmsg(stderr, "cantCreateTranslitParseErr", str.getTerminatedBuffer(),
696                     u_wmsg_errorName(err), linebuf, offsetbuf);
697             } else {
698                 u_wmsg(stderr, "cantCreateTranslit", str.getTerminatedBuffer(),
699                     u_wmsg_errorName(err));
700             }
701 
702             if (t) {
703                 delete t;
704                 t = 0;
705             }
706             goto error_exit;
707         }
708 
709         useOffsets = FALSE;
710     }
711 #endif
712 
713     // Create codepage converter. If the codepage or its aliases weren't
714     // available, it returns NULL and a failure code. We also set the
715     // callbacks, and return errors in the same way.
716 
717     convfrom = ucnv_open(fromcpage, &err);
718     if (U_FAILURE(err)) {
719         UnicodeString str(fromcpage, "");
720         initMsg(pname);
721         u_wmsg(stderr, "cantOpenFromCodeset", str.getTerminatedBuffer(),
722             u_wmsg_errorName(err));
723         goto error_exit;
724     }
725     ucnv_setToUCallBack(convfrom, toucallback, touctxt, 0, 0, &err);
726     if (U_FAILURE(err)) {
727         initMsg(pname);
728         u_wmsg(stderr, "cantSetCallback", u_wmsg_errorName(err));
729         goto error_exit;
730     }
731 
732     convto = ucnv_open(tocpage, &err);
733     if (U_FAILURE(err)) {
734         UnicodeString str(tocpage, "");
735         initMsg(pname);
736         u_wmsg(stderr, "cantOpenToCodeset", str.getTerminatedBuffer(),
737             u_wmsg_errorName(err));
738         goto error_exit;
739     }
740     ucnv_setFromUCallBack(convto, fromucallback, fromuctxt, 0, 0, &err);
741     if (U_FAILURE(err)) {
742         initMsg(pname);
743         u_wmsg(stderr, "cantSetCallback", u_wmsg_errorName(err));
744         goto error_exit;
745     }
746     ucnv_setFallback(convto, fallback);
747 
748     UBool willexit, fromSawEndOfBytes, toSawEndOfUnicode;
749     int8_t sig;
750 
751     // OK, we can convert now.
752     sig = signature;
753     rd = 0;
754 
755     do {
756         willexit = FALSE;
757 
758         // input file offset at the beginning of the next buffer
759         infoffset += rd;
760 
761         rd = fread(buf, 1, bufsz, infile);
762         if (ferror(infile) != 0) {
763             UnicodeString str(strerror(errno));
764             initMsg(pname);
765             u_wmsg(stderr, "cantRead", str.getTerminatedBuffer());
766             goto error_exit;
767         }
768 
769         // Convert the read buffer into the new encoding via Unicode.
770         // After the call 'unibufp' will be placed behind the last
771         // character that was converted in the 'unibuf'.
772         // Also the 'cbufp' is positioned behind the last converted
773         // character.
774         // At the last conversion in the file, flush should be set to
775         // true so that we get all characters converted.
776         //
777         // The converter must be flushed at the end of conversion so
778         // that characters on hold also will be written.
779 
780         cbufp = buf;
781         flush = (UBool)(rd != bufsz);
782 
783         // convert until the input is consumed
784         do {
785             // remember the start of the current byte-to-Unicode conversion
786             prevbufp = cbufp;
787 
788             unibuf = unibufp = u.getBuffer((int32_t)bufsz);
789 
790             // Use bufsz instead of u.getCapacity() for the targetLimit
791             // so that we don't overflow fromoffsets[].
792             ucnv_toUnicode(convfrom, &unibufp, unibuf + bufsz, &cbufp,
793                 buf + rd, useOffsets ? fromoffsets : NULL, flush, &err);
794 
795             ulen = (int32_t)(unibufp - unibuf);
796             u.releaseBuffer(U_SUCCESS(err) ? ulen : 0);
797 
798             // fromSawEndOfBytes indicates that ucnv_toUnicode() is done
799             // converting all of the input bytes.
800             // It works like this because ucnv_toUnicode() returns only under the
801             // following conditions:
802             // - an error occurred during conversion (an error code is set)
803             // - the target buffer is filled (the error code indicates an overflow)
804             // - the source is consumed
805             // That is, if the error code does not indicate a failure,
806             // not even an overflow, then the source must be consumed entirely.
807             fromSawEndOfBytes = (UBool)U_SUCCESS(err);
808 
809             if (err == U_BUFFER_OVERFLOW_ERROR) {
810                 err = U_ZERO_ERROR;
811             } else if (U_FAILURE(err)) {
812                 char pos[32], errorBytes[32];
813                 int8_t i, length, errorLength;
814 
815                 UErrorCode localError = U_ZERO_ERROR;
816                 errorLength = (int8_t)sizeof(errorBytes);
817                 ucnv_getInvalidChars(convfrom, errorBytes, &errorLength, &localError);
818                 if (U_FAILURE(localError) || errorLength == 0) {
819                     errorLength = 1;
820                 }
821 
822                 // print the input file offset of the start of the error bytes:
823                 // input file offset of the current byte buffer +
824                 // length of the just consumed bytes -
825                 // length of the error bytes
826                 length =
827                     (int8_t)sprintf(pos, "%d",
828                         (int)(infoffset + (cbufp - buf) - errorLength));
829 
830                 // output the bytes that caused the error
831                 UnicodeString str;
832                 for (i = 0; i < errorLength; ++i) {
833                     if (i > 0) {
834                         str.append((UChar)uSP);
835                     }
836                     str.append(nibbleToHex((uint8_t)errorBytes[i] >> 4));
837                     str.append(nibbleToHex((uint8_t)errorBytes[i]));
838                 }
839 
840                 initMsg(pname);
841                 u_wmsg(stderr, "problemCvtToU",
842                         UnicodeString(pos, length, "").getTerminatedBuffer(),
843                         str.getTerminatedBuffer(),
844                         u_wmsg_errorName(err));
845 
846                 willexit = TRUE;
847                 err = U_ZERO_ERROR; /* reset the error for the rest of the conversion. */
848             }
849 
850             // Replaced a check for whether the input was consumed by
851             // looping until it is; message key "premEndInput" now obsolete.
852 
853             if (ulen == 0) {
854                 continue;
855             }
856 
857             // remove a U+FEFF Unicode signature character if requested
858             if (sig < 0) {
859                 if (u.charAt(0) == uSig) {
860                     u.remove(0, 1);
861 
862                     // account for the removed UChar and offset
863                     --ulen;
864 
865                     if (useOffsets) {
866                         // remove an offset from fromoffsets[] as well
867                         // to keep the array parallel with the UChars
868                         memmove(fromoffsets, fromoffsets + 1, ulen * 4);
869                     }
870 
871                 }
872                 sig = 0;
873             }
874 
875 #if !UCONFIG_NO_TRANSLITERATION
876             // Transliterate/transform if needed.
877 
878             // For transformation, we use chunking code -
879             // collect Unicode input until, for example, an end-of-line,
880             // then transform and output-convert that and continue collecting.
881             // This makes the transformation result independent of the buffer size
882             // while avoiding the slower keyboard mode.
883             // The end-of-chunk characters are completely included in the
884             // transformed string in case they are to be transformed themselves.
885             if (t != NULL) {
886                 UnicodeString out;
887                 int32_t chunkLimit;
888 
889                 do {
890                     chunkLimit = getChunkLimit(chunk, u);
891                     if (chunkLimit < 0 && flush && fromSawEndOfBytes) {
892                         // use all of the rest at the end of the text
893                         chunkLimit = u.length();
894                     }
895                     if (chunkLimit >= 0) {
896                         // complete the chunk and transform it
897                         chunk.append(u, 0, chunkLimit);
898                         u.remove(0, chunkLimit);
899                         t->transliterate(chunk);
900 
901                         // append the transformation result to the result and empty the chunk
902                         out.append(chunk);
903                         chunk.remove();
904                     } else {
905                         // continue collecting the chunk
906                         chunk.append(u);
907                         break;
908                     }
909                 } while (!u.isEmpty());
910 
911                 u = out;
912                 ulen = u.length();
913             }
914 #endif
915 
916             // add a U+FEFF Unicode signature character if requested
917             // and possible/necessary
918             if (sig > 0) {
919                 if (u.charAt(0) != uSig && cnvSigType(convto) == CNV_WITH_FEFF) {
920                     u.insert(0, (UChar)uSig);
921 
922                     if (useOffsets) {
923                         // insert a pseudo-offset into fromoffsets[] as well
924                         // to keep the array parallel with the UChars
925                         memmove(fromoffsets + 1, fromoffsets, ulen * 4);
926                         fromoffsets[0] = -1;
927                     }
928 
929                     // account for the additional UChar and offset
930                     ++ulen;
931                 }
932                 sig = 0;
933             }
934 
935             // Convert the Unicode buffer into the destination codepage
936             // Again 'bufp' will be placed behind the last converted character
937             // And 'unibufp' will be placed behind the last converted unicode character
938             // At the last conversion flush should be set to true to ensure that
939             // all characters left get converted
940 
941             unibuf = unibufbp = u.getBuffer();
942 
943             do {
944                 bufp = outbuf;
945 
946                 // Use fromSawEndOfBytes in addition to the flush flag -
947                 // it indicates whether the intermediate Unicode string
948                 // contains the very last UChars for the very last input bytes.
949                 ucnv_fromUnicode(convto, &bufp, outbuf + bufsz,
950                                  &unibufbp,
951                                  unibuf + ulen,
952                                  NULL, (UBool)(flush && fromSawEndOfBytes), &err);
953 
954                 // toSawEndOfUnicode indicates that ucnv_fromUnicode() is done
955                 // converting all of the intermediate UChars.
956                 // See comment for fromSawEndOfBytes.
957                 toSawEndOfUnicode = (UBool)U_SUCCESS(err);
958 
959                 if (err == U_BUFFER_OVERFLOW_ERROR) {
960                     err = U_ZERO_ERROR;
961                 } else if (U_FAILURE(err)) {
962                     UChar errorUChars[4];
963                     const char *errtag;
964                     char pos[32];
965                     UChar32 c;
966                     int8_t i, length, errorLength;
967 
968                     UErrorCode localError = U_ZERO_ERROR;
969                     errorLength = (int8_t)LENGTHOF(errorUChars);
970                     ucnv_getInvalidUChars(convto, errorUChars, &errorLength, &localError);
971                     if (U_FAILURE(localError) || errorLength == 0) {
972                         // need at least 1 so that we don't access beyond the length of fromoffsets[]
973                         errorLength = 1;
974                     }
975 
976                     int32_t ferroffset;
977 
978                     if (useOffsets) {
979                         // Unicode buffer offset of the start of the error UChars
980                         ferroffset = (int32_t)((unibufbp - unibuf) - errorLength);
981                         if (ferroffset < 0) {
982                             // approximation - the character started in the previous Unicode buffer
983                             ferroffset = 0;
984                         }
985 
986                         // get the corresponding byte offset out of fromoffsets[]
987                         // go back if the offset is not known for some of the UChars
988                         int32_t fromoffset;
989                         do {
990                             fromoffset = fromoffsets[ferroffset];
991                         } while (fromoffset < 0 && --ferroffset >= 0);
992 
993                         // total input file offset =
994                         // input file offset of the current byte buffer +
995                         // byte buffer offset of where the current Unicode buffer is converted from +
996                         // fromoffsets[Unicode offset]
997                         ferroffset = infoffset + (prevbufp - buf) + fromoffset;
998                         errtag = "problemCvtFromU";
999                     } else {
1000                         // Do not use fromoffsets if (t != NULL) because the Unicode text may
1001                         // be different from what the offsets refer to.
1002 
1003                         // output file offset
1004                         ferroffset = (int32_t)(outfoffset + (bufp - outbuf));
1005                         errtag = "problemCvtFromUOut";
1006                     }
1007 
1008                     length = (int8_t)sprintf(pos, "%u", (int)ferroffset);
1009 
1010                     // output the code points that caused the error
1011                     UnicodeString str;
1012                     for (i = 0; i < errorLength;) {
1013                         if (i > 0) {
1014                             str.append((UChar)uSP);
1015                         }
1016                         U16_NEXT(errorUChars, i, errorLength, c);
1017                         if (c >= 0x100000) {
1018                             str.append(nibbleToHex((uint8_t)(c >> 20)));
1019                         }
1020                         if (c >= 0x10000) {
1021                             str.append(nibbleToHex((uint8_t)(c >> 16)));
1022                         }
1023                         str.append(nibbleToHex((uint8_t)(c >> 12)));
1024                         str.append(nibbleToHex((uint8_t)(c >> 8)));
1025                         str.append(nibbleToHex((uint8_t)(c >> 4)));
1026                         str.append(nibbleToHex((uint8_t)c));
1027                     }
1028 
1029                     initMsg(pname);
1030                     u_wmsg(stderr, errtag,
1031                             UnicodeString(pos, length, "").getTerminatedBuffer(),
1032                             str.getTerminatedBuffer(),
1033                            u_wmsg_errorName(err));
1034                     u_wmsg(stderr, "errorUnicode", str.getTerminatedBuffer());
1035 
1036                     willexit = TRUE;
1037                     err = U_ZERO_ERROR; /* reset the error for the rest of the conversion. */
1038                 }
1039 
1040                 // Replaced a check for whether the intermediate Unicode characters were all consumed by
1041                 // looping until they are; message key "premEnd" now obsolete.
1042 
1043                 // Finally, write the converted buffer to the output file
1044                 size_t outlen = (size_t) (bufp - outbuf);
1045                 outfoffset += (int32_t)(wr = fwrite(outbuf, 1, outlen, outfile));
1046                 if (wr != outlen) {
1047                     UnicodeString str(strerror(errno));
1048                     initMsg(pname);
1049                     u_wmsg(stderr, "cantWrite", str.getTerminatedBuffer());
1050                     willexit = TRUE;
1051                 }
1052 
1053                 if (willexit) {
1054                     goto error_exit;
1055                 }
1056             } while (!toSawEndOfUnicode);
1057         } while (!fromSawEndOfBytes);
1058     } while (!flush);           // Stop when we have flushed the
1059                                 // converters (this means that it's
1060                                 // the end of output)
1061 
1062     goto normal_exit;
1063 
1064 error_exit:
1065     ret = FALSE;
1066 
1067 normal_exit:
1068     // Cleanup.
1069 
1070     ucnv_close(convfrom);
1071     ucnv_close(convto);
1072 
1073 #if !UCONFIG_NO_TRANSLITERATION
1074     delete t;
1075 #endif
1076 
1077     if (infile != stdin) {
1078         fclose(infile);
1079     }
1080 
1081     return ret;
1082 }
1083 
usage(const char * pname,int ecode)1084 static void usage(const char *pname, int ecode) {
1085     const UChar *msg;
1086     int32_t msgLen;
1087     UErrorCode err = U_ZERO_ERROR;
1088     FILE *fp = ecode ? stderr : stdout;
1089     int res;
1090 
1091     initMsg(pname);
1092     msg =
1093         ures_getStringByKey(gBundle, ecode ? "lcUsageWord" : "ucUsageWord",
1094                             &msgLen, &err);
1095     UnicodeString upname(pname, (int32_t)(uprv_strlen(pname) + 1));
1096     UnicodeString mname(msg, msgLen + 1);
1097 
1098     res = u_wmsg(fp, "usage", mname.getBuffer(), upname.getBuffer());
1099     if (!ecode) {
1100         if (!res) {
1101             fputc('\n', fp);
1102         }
1103         if (!u_wmsg(fp, "help")) {
1104             /* Now dump callbacks and finish. */
1105 
1106             int i, count =
1107                 sizeof(transcode_callbacks) / sizeof(*transcode_callbacks);
1108             for (i = 0; i < count; ++i) {
1109                 fprintf(fp, " %s", transcode_callbacks[i].name);
1110             }
1111             fputc('\n', fp);
1112         }
1113     }
1114 
1115     exit(ecode);
1116 }
1117 
1118 extern int
main(int argc,char ** argv)1119 main(int argc, char **argv)
1120 {
1121     FILE *outfile;
1122     int ret = 0;
1123 
1124     size_t bufsz = DEFAULT_BUFSZ;
1125 
1126     const char *fromcpage = 0;
1127     const char *tocpage = 0;
1128     const char *translit = 0;
1129     const char *outfilestr = 0;
1130     UBool fallback = FALSE;
1131 
1132     UConverterFromUCallback fromucallback = UCNV_FROM_U_CALLBACK_STOP;
1133     const void *fromuctxt = 0;
1134     UConverterToUCallback toucallback = UCNV_TO_U_CALLBACK_STOP;
1135     const void *touctxt = 0;
1136 
1137     char **iter, **remainArgv, **remainArgvLimit;
1138     char **end = argv + argc;
1139 
1140     const char *pname;
1141 
1142     UBool printConvs = FALSE, printCanon = FALSE, printTranslits = FALSE;
1143     const char *printName = 0;
1144 
1145     UBool verbose = FALSE;
1146     UErrorCode status = U_ZERO_ERROR;
1147 
1148     ConvertFile cf;
1149 
1150     /* Initialize ICU */
1151     u_init(&status);
1152     if (U_FAILURE(status)) {
1153         fprintf(stderr, "%s: can not initialize ICU.  status = %s\n",
1154             argv[0], u_errorName(status));
1155         exit(1);
1156     }
1157 
1158     // Get and prettify pname.
1159     pname = uprv_strrchr(*argv, U_FILE_SEP_CHAR);
1160 #ifdef U_WINDOWS
1161     if (!pname) {
1162         pname = uprv_strrchr(*argv, '/');
1163     }
1164 #endif
1165     if (!pname) {
1166         pname = *argv;
1167     } else {
1168         ++pname;
1169     }
1170 
1171     // First, get the arguments from command-line
1172     // to know the codepages to convert between
1173 
1174     remainArgv = remainArgvLimit = argv + 1;
1175     for (iter = argv + 1; iter != end; iter++) {
1176         // Check for from charset
1177         if (strcmp("-f", *iter) == 0 || !strcmp("--from-code", *iter)) {
1178             iter++;
1179             if (iter != end)
1180                 fromcpage = *iter;
1181             else
1182                 usage(pname, 1);
1183         } else if (strcmp("-t", *iter) == 0 || !strcmp("--to-code", *iter)) {
1184             iter++;
1185             if (iter != end)
1186                 tocpage = *iter;
1187             else
1188                 usage(pname, 1);
1189         } else if (strcmp("-x", *iter) == 0) {
1190             iter++;
1191             if (iter != end)
1192                 translit = *iter;
1193             else
1194                 usage(pname, 1);
1195         } else if (!strcmp("--fallback", *iter)) {
1196             fallback = TRUE;
1197         } else if (!strcmp("--no-fallback", *iter)) {
1198             fallback = FALSE;
1199         } else if (strcmp("-b", *iter) == 0 || !strcmp("--block-size", *iter)) {
1200             iter++;
1201             if (iter != end) {
1202                 bufsz = atoi(*iter);
1203                 if ((int) bufsz <= 0) {
1204                     initMsg(pname);
1205                     UnicodeString str(*iter);
1206                     initMsg(pname);
1207                     u_wmsg(stderr, "badBlockSize", str.getTerminatedBuffer());
1208                     return 3;
1209                 }
1210             } else {
1211                 usage(pname, 1);
1212             }
1213         } else if (strcmp("-l", *iter) == 0 || !strcmp("--list", *iter)) {
1214             if (printTranslits) {
1215                 usage(pname, 1);
1216             }
1217             printConvs = TRUE;
1218         } else if (strcmp("--default-code", *iter) == 0) {
1219             if (printTranslits) {
1220                 usage(pname, 1);
1221             }
1222             printName = ucnv_getDefaultName();
1223         } else if (strcmp("--list-code", *iter) == 0) {
1224             if (printTranslits) {
1225                 usage(pname, 1);
1226             }
1227 
1228             iter++;
1229             if (iter != end) {
1230                 UErrorCode e = U_ZERO_ERROR;
1231                 printName = ucnv_getAlias(*iter, 0, &e);
1232                 if (U_FAILURE(e) || !printName) {
1233                     UnicodeString str(*iter);
1234                     initMsg(pname);
1235                     u_wmsg(stderr, "noSuchCodeset", str.getTerminatedBuffer());
1236                     return 2;
1237                 }
1238             } else
1239                 usage(pname, 1);
1240         } else if (strcmp("--canon", *iter) == 0) {
1241             printCanon = TRUE;
1242         } else if (strcmp("-L", *iter) == 0
1243             || !strcmp("--list-transliterators", *iter)) {
1244             if (printConvs) {
1245                 usage(pname, 1);
1246             }
1247             printTranslits = TRUE;
1248         } else if (strcmp("-h", *iter) == 0 || !strcmp("-?", *iter)
1249             || !strcmp("--help", *iter)) {
1250             usage(pname, 0);
1251         } else if (!strcmp("-c", *iter)) {
1252             fromucallback = UCNV_FROM_U_CALLBACK_SKIP;
1253         } else if (!strcmp("--to-callback", *iter)) {
1254             iter++;
1255             if (iter != end) {
1256                 const struct callback_ent *cbe = findCallback(*iter);
1257                 if (cbe) {
1258                     fromucallback = cbe->fromu;
1259                     fromuctxt = cbe->fromuctxt;
1260                 } else {
1261                     UnicodeString str(*iter);
1262                     initMsg(pname);
1263                     u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer());
1264                     return 4;
1265                 }
1266             } else {
1267                 usage(pname, 1);
1268             }
1269         } else if (!strcmp("--from-callback", *iter)) {
1270             iter++;
1271             if (iter != end) {
1272                 const struct callback_ent *cbe = findCallback(*iter);
1273                 if (cbe) {
1274                     toucallback = cbe->tou;
1275                     touctxt = cbe->touctxt;
1276                 } else {
1277                     UnicodeString str(*iter);
1278                     initMsg(pname);
1279                     u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer());
1280                     return 4;
1281                 }
1282             } else {
1283                 usage(pname, 1);
1284             }
1285         } else if (!strcmp("-i", *iter)) {
1286             toucallback = UCNV_TO_U_CALLBACK_SKIP;
1287         } else if (!strcmp("--callback", *iter)) {
1288             iter++;
1289             if (iter != end) {
1290                 const struct callback_ent *cbe = findCallback(*iter);
1291                 if (cbe) {
1292                     fromucallback = cbe->fromu;
1293                     fromuctxt = cbe->fromuctxt;
1294                     toucallback = cbe->tou;
1295                     touctxt = cbe->touctxt;
1296                 } else {
1297                     UnicodeString str(*iter);
1298                     initMsg(pname);
1299                     u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer());
1300                     return 4;
1301                 }
1302             } else {
1303                 usage(pname, 1);
1304             }
1305         } else if (!strcmp("-s", *iter) || !strcmp("--silent", *iter)) {
1306             verbose = FALSE;
1307         } else if (!strcmp("-v", *iter) || !strcmp("--verbose", *iter)) {
1308             verbose = TRUE;
1309         } else if (!strcmp("-V", *iter) || !strcmp("--version", *iter)) {
1310             printf("%s v2.1  ICU " U_ICU_VERSION "\n", pname);
1311             return 0;
1312         } else if (!strcmp("-o", *iter) || !strcmp("--output", *iter)) {
1313             ++iter;
1314             if (iter != end && !outfilestr) {
1315                 outfilestr = *iter;
1316             } else {
1317                 usage(pname, 1);
1318             }
1319         } else if (0 == strcmp("--add-signature", *iter)) {
1320             cf.signature = 1;
1321         } else if (0 == strcmp("--remove-signature", *iter)) {
1322             cf.signature = -1;
1323         } else if (**iter == '-' && (*iter)[1]) {
1324             usage(pname, 1);
1325         } else {
1326             // move a non-option up in argv[]
1327             *remainArgvLimit++ = *iter;
1328         }
1329     }
1330 
1331     if (printConvs || printName) {
1332         return printConverters(pname, printName, printCanon) ? 2 : 0;
1333     } else if (printTranslits) {
1334         return printTransliterators(printCanon) ? 3 : 0;
1335     }
1336 
1337     if (!fromcpage || !uprv_strcmp(fromcpage, "-")) {
1338         fromcpage = ucnv_getDefaultName();
1339     }
1340     if (!tocpage || !uprv_strcmp(tocpage, "-")) {
1341         tocpage = ucnv_getDefaultName();
1342     }
1343 
1344     // Open the correct output file or connect to stdout for reading input
1345     if (outfilestr != 0 && strcmp(outfilestr, "-")) {
1346         outfile = fopen(outfilestr, "wb");
1347         if (outfile == 0) {
1348             UnicodeString str1(outfilestr, "");
1349             UnicodeString str2(strerror(errno), "");
1350             initMsg(pname);
1351             u_wmsg(stderr, "cantCreateOutputF",
1352                 str1.getBuffer(), str2.getBuffer());
1353             return 1;
1354         }
1355     } else {
1356         outfilestr = "-";
1357         outfile = stdout;
1358 #ifdef USE_FILENO_BINARY_MODE
1359         if (setmode(fileno(outfile), O_BINARY) == -1) {
1360             u_wmsg(stderr, "cantSetOutBinMode");
1361             exit(-1);
1362         }
1363 #endif
1364     }
1365 
1366     /* Loop again on the arguments to find all the input files, and
1367     convert them. */
1368 
1369     cf.setBufferSize(bufsz);
1370 
1371     if(remainArgv < remainArgvLimit) {
1372         for (iter = remainArgv; iter != remainArgvLimit; iter++) {
1373             if (!cf.convertFile(
1374                     pname, fromcpage, toucallback, touctxt, tocpage,
1375                     fromucallback, fromuctxt, fallback, translit, *iter,
1376                     outfile, verbose)
1377             ) {
1378                 goto error_exit;
1379             }
1380         }
1381     } else {
1382         if (!cf.convertFile(
1383                 pname, fromcpage, toucallback, touctxt, tocpage,
1384                 fromucallback, fromuctxt, fallback, translit, 0,
1385                 outfile, verbose)
1386         ) {
1387             goto error_exit;
1388         }
1389     }
1390 
1391     goto normal_exit;
1392 error_exit:
1393     ret = 1;
1394 normal_exit:
1395 
1396     if (outfile != stdout) {
1397         fclose(outfile);
1398     }
1399 
1400     return ret;
1401 }
1402 
1403 
1404 /*
1405  * Hey, Emacs, please set the following:
1406  *
1407  * Local Variables:
1408  * indent-tabs-mode: nil
1409  * End:
1410  *
1411  */
1412