• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  ********************************************************************************
3  *
4  *   Copyright (C) 1998-2015, International Business Machines
5  *   Corporation and others.  All Rights Reserved.
6  *
7  ********************************************************************************
8  *
9  *
10  *  makeconv.cpp:
11  *  tool creating a binary (compressed) representation of the conversion mapping
12  *  table (IBM NLTC ucmap format).
13  *
14  *  05/04/2000    helena     Added fallback mapping into the picture...
15  *  06/29/2000  helena      Major rewrite of the callback APIs.
16  */
17 
18 #include <stdio.h>
19 #include "unicode/putil.h"
20 #include "unicode/ucnv_err.h"
21 #include "charstr.h"
22 #include "ucnv_bld.h"
23 #include "ucnv_imp.h"
24 #include "ucnv_cnv.h"
25 #include "cstring.h"
26 #include "cmemory.h"
27 #include "uinvchar.h"
28 #include "filestrm.h"
29 #include "toolutil.h"
30 #include "uoptions.h"
31 #include "unicode/udata.h"
32 #include "unewdata.h"
33 #include "uparse.h"
34 #include "ucm.h"
35 #include "makeconv.h"
36 #include "genmbcs.h"
37 
38 #define DEBUG 0
39 
40 typedef struct ConvData {
41     UCMFile *ucm;
42     NewConverter *cnvData, *extData;
43     UConverterSharedData sharedData;
44     UConverterStaticData staticData;
45 } ConvData;
46 
47 static void
initConvData(ConvData * data)48 initConvData(ConvData *data) {
49     uprv_memset(data, 0, sizeof(ConvData));
50     data->sharedData.structSize=sizeof(UConverterSharedData);
51     data->staticData.structSize=sizeof(UConverterStaticData);
52     data->sharedData.staticData=&data->staticData;
53 }
54 
55 static void
cleanupConvData(ConvData * data)56 cleanupConvData(ConvData *data) {
57     if(data!=NULL) {
58         if(data->cnvData!=NULL) {
59             data->cnvData->close(data->cnvData);
60             data->cnvData=NULL;
61         }
62         if(data->extData!=NULL) {
63             data->extData->close(data->extData);
64             data->extData=NULL;
65         }
66         ucm_close(data->ucm);
67         data->ucm=NULL;
68     }
69 }
70 
71 /*
72  * from ucnvstat.c - static prototypes of data-based converters
73  */
74 U_CAPI const UConverterStaticData * ucnv_converterStaticData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES];
75 
76 /*
77  * Global - verbosity
78  */
79 UBool VERBOSE = FALSE;
80 UBool QUIET = FALSE;
81 UBool SMALL = FALSE;
82 UBool IGNORE_SISO_CHECK = FALSE;
83 
84 static void
85 createConverter(ConvData *data, const char* converterName, UErrorCode *pErrorCode);
86 
87 /*
88  * Set up the UNewData and write the converter..
89  */
90 static void
91 writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status);
92 
93 UBool haveCopyright=TRUE;
94 
95 static UDataInfo dataInfo={
96     sizeof(UDataInfo),
97     0,
98 
99     U_IS_BIG_ENDIAN,
100     U_CHARSET_FAMILY,
101     sizeof(UChar),
102     0,
103 
104     {0x63, 0x6e, 0x76, 0x74},     /* dataFormat="cnvt" */
105     {6, 2, 0, 0},                 /* formatVersion */
106     {0, 0, 0, 0}                  /* dataVersion (calculated at runtime) */
107 };
108 
109 static void
writeConverterData(ConvData * data,const char * cnvName,const char * cnvDir,UErrorCode * status)110 writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status)
111 {
112     UNewDataMemory *mem = NULL;
113     uint32_t sz2;
114     uint32_t size = 0;
115     int32_t tableType;
116 
117     if(U_FAILURE(*status))
118       {
119         return;
120       }
121 
122     tableType=TABLE_NONE;
123     if(data->cnvData!=NULL) {
124         tableType|=TABLE_BASE;
125     }
126     if(data->extData!=NULL) {
127         tableType|=TABLE_EXT;
128     }
129 
130     mem = udata_create(cnvDir, "cnv", cnvName, &dataInfo, haveCopyright ? U_COPYRIGHT_STRING : NULL, status);
131 
132     if(U_FAILURE(*status))
133       {
134         fprintf(stderr, "Couldn't create the udata %s.%s: %s\n",
135                 cnvName,
136                 "cnv",
137                 u_errorName(*status));
138         return;
139       }
140 
141     if(VERBOSE)
142       {
143         printf("- Opened udata %s.%s\n", cnvName, "cnv");
144       }
145 
146 
147     /* all read only, clean, platform independent data.  Mmmm. :)  */
148     udata_writeBlock(mem, &data->staticData, sizeof(UConverterStaticData));
149     size += sizeof(UConverterStaticData); /* Is 4-aligned  - by size */
150     /* Now, write the table */
151     if(tableType&TABLE_BASE) {
152         size += data->cnvData->write(data->cnvData, &data->staticData, mem, tableType);
153     }
154     if(tableType&TABLE_EXT) {
155         size += data->extData->write(data->extData, &data->staticData, mem, tableType);
156     }
157 
158     sz2 = udata_finish(mem, status);
159     if(size != sz2)
160     {
161         fprintf(stderr, "error: wrote %u bytes to the .cnv file but counted %u bytes\n", (int)sz2, (int)size);
162         *status=U_INTERNAL_PROGRAM_ERROR;
163     }
164     if(VERBOSE)
165     {
166       printf("- Wrote %u bytes to the udata.\n", (int)sz2);
167     }
168 }
169 
170 enum {
171     OPT_HELP_H,
172     OPT_HELP_QUESTION_MARK,
173     OPT_COPYRIGHT,
174     OPT_VERSION,
175     OPT_DESTDIR,
176     OPT_VERBOSE,
177     OPT_SMALL,
178     OPT_IGNORE_SISO_CHECK,
179     OPT_QUIET,
180 
181     OPT_COUNT
182 };
183 
184 static UOption options[]={
185     UOPTION_HELP_H,
186     UOPTION_HELP_QUESTION_MARK,
187     UOPTION_COPYRIGHT,
188     UOPTION_VERSION,
189     UOPTION_DESTDIR,
190     UOPTION_VERBOSE,
191     { "small", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 },
192     { "ignore-siso-check", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 },
193     UOPTION_QUIET,
194 };
195 
main(int argc,char * argv[])196 int main(int argc, char* argv[])
197 {
198     ConvData data;
199     char cnvName[UCNV_MAX_FULL_FILE_NAME_LENGTH];
200 
201     U_MAIN_INIT_ARGS(argc, argv);
202 
203     /* Set up the ICU version number */
204     UVersionInfo icuVersion;
205     u_getVersion(icuVersion);
206     uprv_memcpy(&dataInfo.dataVersion, &icuVersion, sizeof(UVersionInfo));
207 
208     /* preset then read command line options */
209     options[OPT_DESTDIR].value=u_getDataDirectory();
210     argc=u_parseArgs(argc, argv, UPRV_LENGTHOF(options), options);
211 
212     /* error handling, printing usage message */
213     if(argc<0) {
214         fprintf(stderr,
215             "error in command line argument \"%s\"\n",
216             argv[-argc]);
217     } else if(argc<2) {
218         argc=-1;
219     }
220     if(argc<0 || options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur) {
221         FILE *stdfile=argc<0 ? stderr : stdout;
222         fprintf(stdfile,
223             "usage: %s [-options] files...\n"
224             "\tread .ucm codepage mapping files and write .cnv files\n"
225             "options:\n"
226             "\t-h or -? or --help  this usage text\n"
227             "\t-V or --version     show a version message\n"
228             "\t-c or --copyright   include a copyright notice\n"
229             "\t-d or --destdir     destination directory, followed by the path\n"
230             "\t-v or --verbose     Turn on verbose output\n"
231             "\t-q or --quiet       do not display warnings and progress\n",
232             argv[0]);
233         fprintf(stdfile,
234             "\t      --small       Generate smaller .cnv files. They will be\n"
235             "\t                    significantly smaller but may not be compatible with\n"
236             "\t                    older versions of ICU and will require heap memory\n"
237             "\t                    allocation when loaded.\n"
238             "\t      --ignore-siso-check         Use SI/SO other than 0xf/0xe.\n");
239         return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
240     }
241 
242     if(options[OPT_VERSION].doesOccur) {
243         printf("makeconv version %u.%u, ICU tool to read .ucm codepage mapping files and write .cnv files\n",
244                dataInfo.formatVersion[0], dataInfo.formatVersion[1]);
245         printf("%s\n", U_COPYRIGHT_STRING);
246         exit(0);
247     }
248 
249     /* get the options values */
250     haveCopyright = options[OPT_COPYRIGHT].doesOccur;
251     const char *destdir = options[OPT_DESTDIR].value;
252     VERBOSE = options[OPT_VERBOSE].doesOccur;
253     QUIET = options[OPT_QUIET].doesOccur;
254     SMALL = options[OPT_SMALL].doesOccur;
255 
256     if (options[OPT_IGNORE_SISO_CHECK].doesOccur) {
257         IGNORE_SISO_CHECK = TRUE;
258     }
259 
260     icu::CharString outFileName;
261     UErrorCode err = U_ZERO_ERROR;
262     if (destdir != NULL && *destdir != 0) {
263         outFileName.append(destdir, err).ensureEndsWithFileSeparator(err);
264         if (U_FAILURE(err)) {
265             return err;
266         }
267     }
268     int32_t outBasenameStart = outFileName.length();
269 
270 #if DEBUG
271     {
272       int i;
273       printf("makeconv: processing %d files...\n", argc - 1);
274       for(i=1; i<argc; ++i) {
275         printf("%s ", argv[i]);
276       }
277       printf("\n");
278       fflush(stdout);
279     }
280 #endif
281 
282     UBool printFilename = (UBool) (argc > 2 || VERBOSE);
283     for (++argv; --argc; ++argv)
284     {
285         UErrorCode localError = U_ZERO_ERROR;
286         const char *arg = getLongPathname(*argv);
287 
288         /*produces the right destination path for display*/
289         outFileName.truncate(outBasenameStart);
290         if (outBasenameStart != 0)
291         {
292             /* find the last file sepator */
293             const char *basename = findBasename(arg);
294             outFileName.append(basename, localError);
295         }
296         else
297         {
298             outFileName.append(arg, localError);
299         }
300         if (U_FAILURE(localError)) {
301             return localError;
302         }
303 
304         /*removes the extension if any is found*/
305         int32_t lastDotIndex = outFileName.lastIndexOf('.');
306         if (lastDotIndex >= outBasenameStart) {
307             outFileName.truncate(lastDotIndex);
308         }
309 
310         /* the basename without extension is the converter name */
311         if ((outFileName.length() - outBasenameStart) >= UPRV_LENGTHOF(cnvName)) {
312             fprintf(stderr, "converter name %s too long\n", outFileName.data() + outBasenameStart);
313             return U_BUFFER_OVERFLOW_ERROR;
314         }
315         uprv_strcpy(cnvName, outFileName.data() + outBasenameStart);
316 
317         /*Adds the target extension*/
318         outFileName.append(CONVERTER_FILE_EXTENSION, localError);
319         if (U_FAILURE(localError)) {
320             return localError;
321         }
322 
323 #if DEBUG
324         printf("makeconv: processing %s  ...\n", arg);
325         fflush(stdout);
326 #endif
327         initConvData(&data);
328         createConverter(&data, arg, &localError);
329 
330         if (U_FAILURE(localError))
331         {
332             /* if an error is found, print out an error msg and keep going */
333             fprintf(stderr, "Error creating converter for \"%s\" file for \"%s\" (%s)\n",
334                     outFileName.data(), arg, u_errorName(localError));
335             if(U_SUCCESS(err)) {
336                 err = localError;
337             }
338         }
339         else
340         {
341             /* Insure the static data name matches the  file name */
342             /* Changed to ignore directory and only compare base name
343              LDH 1/2/08*/
344             char *p;
345             p = strrchr(cnvName, U_FILE_SEP_CHAR); /* Find last file separator */
346 
347             if(p == NULL)            /* OK, try alternate */
348             {
349                 p = strrchr(cnvName, U_FILE_ALT_SEP_CHAR);
350                 if(p == NULL)
351                 {
352                     p=cnvName; /* If no separators, no problem */
353                 }
354             }
355             else
356             {
357                 p++;   /* If found separator, don't include it in compare */
358             }
359             if(uprv_stricmp(p,data.staticData.name) && !QUIET)
360             {
361                 fprintf(stderr, "Warning: %s%s claims to be '%s'\n",
362                     cnvName,  CONVERTER_FILE_EXTENSION,
363                     data.staticData.name);
364             }
365 
366             uprv_strcpy((char*)data.staticData.name, cnvName);
367 
368             if(!uprv_isInvariantString((char*)data.staticData.name, -1)) {
369                 fprintf(stderr,
370                     "Error: A converter name must contain only invariant characters.\n"
371                     "%s is not a valid converter name.\n",
372                     data.staticData.name);
373                 if(U_SUCCESS(err)) {
374                     err = U_INVALID_TABLE_FORMAT;
375                 }
376             }
377 
378             localError = U_ZERO_ERROR;
379             writeConverterData(&data, cnvName, destdir, &localError);
380 
381             if(U_FAILURE(localError))
382             {
383                 /* if an error is found, print out an error msg and keep going*/
384                 fprintf(stderr, "Error writing \"%s\" file for \"%s\" (%s)\n", outFileName.data(), arg,
385                     u_errorName(localError));
386                 if(U_SUCCESS(err)) {
387                     err = localError;
388                 }
389             }
390             else if (printFilename)
391             {
392                 puts(outFileName.data() + outBasenameStart);
393             }
394         }
395         fflush(stdout);
396         fflush(stderr);
397 
398         cleanupConvData(&data);
399     }
400 
401     return err;
402 }
403 
404 static void
getPlatformAndCCSIDFromName(const char * name,int8_t * pPlatform,int32_t * pCCSID)405 getPlatformAndCCSIDFromName(const char *name, int8_t *pPlatform, int32_t *pCCSID) {
406     if( (name[0]=='i' || name[0]=='I') &&
407         (name[1]=='b' || name[1]=='B') &&
408         (name[2]=='m' || name[2]=='M')
409     ) {
410         name+=3;
411         if(*name=='-') {
412             ++name;
413         }
414         *pPlatform=UCNV_IBM;
415         *pCCSID=(int32_t)uprv_strtoul(name, NULL, 10);
416     } else {
417         *pPlatform=UCNV_UNKNOWN;
418         *pCCSID=0;
419     }
420 }
421 
422 static void
readHeader(ConvData * data,FileStream * convFile,UErrorCode * pErrorCode)423 readHeader(ConvData *data,
424            FileStream* convFile,
425            UErrorCode *pErrorCode) {
426     char line[1024];
427     char *s, *key, *value;
428     const UConverterStaticData *prototype;
429     UConverterStaticData *staticData;
430 
431     if(U_FAILURE(*pErrorCode)) {
432         return;
433     }
434 
435     staticData=&data->staticData;
436     staticData->platform=UCNV_IBM;
437     staticData->subCharLen=0;
438 
439     while(T_FileStream_readLine(convFile, line, sizeof(line))) {
440         /* basic parsing and handling of state-related items */
441         if(ucm_parseHeaderLine(data->ucm, line, &key, &value)) {
442             continue;
443         }
444 
445         /* stop at the beginning of the mapping section */
446         if(uprv_strcmp(line, "CHARMAP")==0) {
447             break;
448         }
449 
450         /* collect the information from the header field, ignore unknown keys */
451         if(uprv_strcmp(key, "code_set_name")==0) {
452             if(*value!=0) {
453                 uprv_strcpy((char *)staticData->name, value);
454                 getPlatformAndCCSIDFromName(value, &staticData->platform, &staticData->codepage);
455             }
456         } else if(uprv_strcmp(key, "subchar")==0) {
457             uint8_t bytes[UCNV_EXT_MAX_BYTES];
458             int8_t length;
459 
460             s=value;
461             length=ucm_parseBytes(bytes, line, (const char **)&s);
462             if(1<=length && length<=4 && *s==0) {
463                 staticData->subCharLen=length;
464                 uprv_memcpy(staticData->subChar, bytes, length);
465             } else {
466                 fprintf(stderr, "error: illegal <subchar> %s\n", value);
467                 *pErrorCode=U_INVALID_TABLE_FORMAT;
468                 return;
469             }
470         } else if(uprv_strcmp(key, "subchar1")==0) {
471             uint8_t bytes[UCNV_EXT_MAX_BYTES];
472 
473             s=value;
474             if(1==ucm_parseBytes(bytes, line, (const char **)&s) && *s==0) {
475                 staticData->subChar1=bytes[0];
476             } else {
477                 fprintf(stderr, "error: illegal <subchar1> %s\n", value);
478                 *pErrorCode=U_INVALID_TABLE_FORMAT;
479                 return;
480             }
481         }
482     }
483 
484     /* copy values from the UCMFile to the static data */
485     staticData->maxBytesPerChar=(int8_t)data->ucm->states.maxCharLength;
486     staticData->minBytesPerChar=(int8_t)data->ucm->states.minCharLength;
487     staticData->conversionType=data->ucm->states.conversionType;
488 
489     if(staticData->conversionType==UCNV_UNSUPPORTED_CONVERTER) {
490         fprintf(stderr, "ucm error: missing conversion type (<uconv_class>)\n");
491         *pErrorCode=U_INVALID_TABLE_FORMAT;
492         return;
493     }
494 
495     /*
496      * Now that we know the type, copy any 'default' values from the table.
497      * We need not check the type any further because the parser only
498      * recognizes what we have prototypes for.
499      *
500      * For delta (extension-only) tables, copy values from the base file
501      * instead, see createConverter().
502      */
503     if(data->ucm->baseName[0]==0) {
504         prototype=ucnv_converterStaticData[staticData->conversionType];
505         if(prototype!=NULL) {
506             if(staticData->name[0]==0) {
507                 uprv_strcpy((char *)staticData->name, prototype->name);
508             }
509 
510             if(staticData->codepage==0) {
511                 staticData->codepage=prototype->codepage;
512             }
513 
514             if(staticData->platform==0) {
515                 staticData->platform=prototype->platform;
516             }
517 
518             if(staticData->minBytesPerChar==0) {
519                 staticData->minBytesPerChar=prototype->minBytesPerChar;
520             }
521 
522             if(staticData->maxBytesPerChar==0) {
523                 staticData->maxBytesPerChar=prototype->maxBytesPerChar;
524             }
525 
526             if(staticData->subCharLen==0) {
527                 staticData->subCharLen=prototype->subCharLen;
528                 if(prototype->subCharLen>0) {
529                     uprv_memcpy(staticData->subChar, prototype->subChar, prototype->subCharLen);
530                 }
531             }
532         }
533     }
534 
535     if(data->ucm->states.outputType<0) {
536         data->ucm->states.outputType=(int8_t)data->ucm->states.maxCharLength-1;
537     }
538 
539     if( staticData->subChar1!=0 &&
540             (staticData->minBytesPerChar>1 ||
541                 (staticData->conversionType!=UCNV_MBCS &&
542                  staticData->conversionType!=UCNV_EBCDIC_STATEFUL))
543     ) {
544         fprintf(stderr, "error: <subchar1> defined for a type other than MBCS or EBCDIC_STATEFUL\n");
545         *pErrorCode=U_INVALID_TABLE_FORMAT;
546     }
547 }
548 
549 /* return TRUE if a base table was read, FALSE for an extension table */
550 static UBool
readFile(ConvData * data,const char * converterName,UErrorCode * pErrorCode)551 readFile(ConvData *data, const char* converterName,
552          UErrorCode *pErrorCode) {
553     char line[1024];
554     char *end;
555     FileStream *convFile;
556 
557     UCMStates *baseStates;
558     UBool dataIsBase;
559 
560     if(U_FAILURE(*pErrorCode)) {
561         return FALSE;
562     }
563 
564     data->ucm=ucm_open();
565 
566     convFile=T_FileStream_open(converterName, "r");
567     if(convFile==NULL) {
568         *pErrorCode=U_FILE_ACCESS_ERROR;
569         return FALSE;
570     }
571 
572     readHeader(data, convFile, pErrorCode);
573     if(U_FAILURE(*pErrorCode)) {
574         return FALSE;
575     }
576 
577     if(data->ucm->baseName[0]==0) {
578         dataIsBase=TRUE;
579         baseStates=&data->ucm->states;
580         ucm_processStates(baseStates, IGNORE_SISO_CHECK);
581     } else {
582         dataIsBase=FALSE;
583         baseStates=NULL;
584     }
585 
586     /* read the base table */
587     ucm_readTable(data->ucm, convFile, dataIsBase, baseStates, pErrorCode);
588     if(U_FAILURE(*pErrorCode)) {
589         return FALSE;
590     }
591 
592     /* read an extension table if there is one */
593     while(T_FileStream_readLine(convFile, line, sizeof(line))) {
594         end=uprv_strchr(line, 0);
595         while(line<end &&
596               (*(end-1)=='\n' || *(end-1)=='\r' || *(end-1)==' ' || *(end-1)=='\t')) {
597             --end;
598         }
599         *end=0;
600 
601         if(line[0]=='#' || u_skipWhitespace(line)==end) {
602             continue; /* ignore empty and comment lines */
603         }
604 
605         if(0==uprv_strcmp(line, "CHARMAP")) {
606             /* read the extension table */
607             ucm_readTable(data->ucm, convFile, FALSE, baseStates, pErrorCode);
608         } else {
609             fprintf(stderr, "unexpected text after the base mapping table\n");
610         }
611         break;
612     }
613 
614     T_FileStream_close(convFile);
615 
616     if(data->ucm->base->flagsType==UCM_FLAGS_MIXED || data->ucm->ext->flagsType==UCM_FLAGS_MIXED) {
617         fprintf(stderr, "error: some entries have the mapping precision (with '|'), some do not\n");
618         *pErrorCode=U_INVALID_TABLE_FORMAT;
619     }
620 
621     return dataIsBase;
622 }
623 
624 static void
createConverter(ConvData * data,const char * converterName,UErrorCode * pErrorCode)625 createConverter(ConvData *data, const char *converterName, UErrorCode *pErrorCode) {
626     ConvData baseData;
627     UBool dataIsBase;
628 
629     UConverterStaticData *staticData;
630     UCMStates *states, *baseStates;
631 
632     if(U_FAILURE(*pErrorCode)) {
633         return;
634     }
635 
636     initConvData(data);
637 
638     dataIsBase=readFile(data, converterName, pErrorCode);
639     if(U_FAILURE(*pErrorCode)) {
640         return;
641     }
642 
643     staticData=&data->staticData;
644     states=&data->ucm->states;
645 
646     if(dataIsBase) {
647         /*
648          * Build a normal .cnv file with a base table
649          * and an optional extension table.
650          */
651         data->cnvData=MBCSOpen(data->ucm);
652         if(data->cnvData==NULL) {
653             *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
654 
655         } else if(!data->cnvData->isValid(data->cnvData,
656                             staticData->subChar, staticData->subCharLen)
657         ) {
658             fprintf(stderr, "       the substitution character byte sequence is illegal in this codepage structure!\n");
659             *pErrorCode=U_INVALID_TABLE_FORMAT;
660 
661         } else if(staticData->subChar1!=0 &&
662                     !data->cnvData->isValid(data->cnvData, &staticData->subChar1, 1)
663         ) {
664             fprintf(stderr, "       the subchar1 byte is illegal in this codepage structure!\n");
665             *pErrorCode=U_INVALID_TABLE_FORMAT;
666 
667         } else if(
668             data->ucm->ext->mappingsLength>0 &&
669             !ucm_checkBaseExt(states, data->ucm->base, data->ucm->ext, data->ucm->ext, FALSE)
670         ) {
671             *pErrorCode=U_INVALID_TABLE_FORMAT;
672         } else if(data->ucm->base->flagsType&UCM_FLAGS_EXPLICIT) {
673             /* sort the table so that it can be turned into UTF-8-friendly data */
674             ucm_sortTable(data->ucm->base);
675         }
676 
677         if(U_SUCCESS(*pErrorCode)) {
678             if(
679                 /* add the base table after ucm_checkBaseExt()! */
680                 !data->cnvData->addTable(data->cnvData, data->ucm->base, &data->staticData)
681             ) {
682                 *pErrorCode=U_INVALID_TABLE_FORMAT;
683             } else {
684                 /*
685                  * addTable() may have requested moving more mappings to the extension table
686                  * if they fit into the base toUnicode table but not into the
687                  * base fromUnicode table.
688                  * (Especially for UTF-8-friendly fromUnicode tables.)
689                  * Such mappings will have the MBCS_FROM_U_EXT_FLAG set, which causes them
690                  * to be excluded from the extension toUnicode data.
691                  * See MBCSOkForBaseFromUnicode() for which mappings do not fit into
692                  * the base fromUnicode table.
693                  */
694                 ucm_moveMappings(data->ucm->base, data->ucm->ext);
695                 ucm_sortTable(data->ucm->ext);
696                 if(data->ucm->ext->mappingsLength>0) {
697                     /* prepare the extension table, if there is one */
698                     data->extData=CnvExtOpen(data->ucm);
699                     if(data->extData==NULL) {
700                         *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
701                     } else if(
702                         !data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)
703                     ) {
704                         *pErrorCode=U_INVALID_TABLE_FORMAT;
705                     }
706                 }
707             }
708         }
709     } else {
710         /* Build an extension-only .cnv file. */
711         char baseFilename[500];
712         char *basename;
713 
714         initConvData(&baseData);
715 
716         /* assemble a path/filename for data->ucm->baseName */
717         uprv_strcpy(baseFilename, converterName);
718         basename=(char *)findBasename(baseFilename);
719         uprv_strcpy(basename, data->ucm->baseName);
720         uprv_strcat(basename, ".ucm");
721 
722         /* read the base table */
723         dataIsBase=readFile(&baseData, baseFilename, pErrorCode);
724         if(U_FAILURE(*pErrorCode)) {
725             return;
726         } else if(!dataIsBase) {
727             fprintf(stderr, "error: the <icu:base> file \"%s\" is not a base table file\n", baseFilename);
728             *pErrorCode=U_INVALID_TABLE_FORMAT;
729         } else {
730             /* prepare the extension table */
731             data->extData=CnvExtOpen(data->ucm);
732             if(data->extData==NULL) {
733                 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
734             } else {
735                 /* fill in gaps in extension file header fields */
736                 UCMapping *m, *mLimit;
737                 uint8_t fallbackFlags;
738 
739                 baseStates=&baseData.ucm->states;
740                 if(states->conversionType==UCNV_DBCS) {
741                     staticData->minBytesPerChar=(int8_t)(states->minCharLength=2);
742                 } else if(states->minCharLength==0) {
743                     staticData->minBytesPerChar=(int8_t)(states->minCharLength=baseStates->minCharLength);
744                 }
745                 if(states->maxCharLength<states->minCharLength) {
746                     staticData->maxBytesPerChar=(int8_t)(states->maxCharLength=baseStates->maxCharLength);
747                 }
748 
749                 if(staticData->subCharLen==0) {
750                     uprv_memcpy(staticData->subChar, baseData.staticData.subChar, 4);
751                     staticData->subCharLen=baseData.staticData.subCharLen;
752                 }
753                 /*
754                  * do not copy subChar1 -
755                  * only use what is explicitly specified
756                  * because it cannot be unset in the extension file header
757                  */
758 
759                 /* get the fallback flags */
760                 fallbackFlags=0;
761                 for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength;
762                     m<mLimit && fallbackFlags!=3;
763                     ++m
764                 ) {
765                     if(m->f==1) {
766                         fallbackFlags|=1;
767                     } else if(m->f==3) {
768                         fallbackFlags|=2;
769                     }
770                 }
771 
772                 if(fallbackFlags&1) {
773                     staticData->hasFromUnicodeFallback=TRUE;
774                 }
775                 if(fallbackFlags&2) {
776                     staticData->hasToUnicodeFallback=TRUE;
777                 }
778 
779                 if(1!=ucm_countChars(baseStates, staticData->subChar, staticData->subCharLen)) {
780                     fprintf(stderr, "       the substitution character byte sequence is illegal in this codepage structure!\n");
781                     *pErrorCode=U_INVALID_TABLE_FORMAT;
782 
783                 } else if(staticData->subChar1!=0 && 1!=ucm_countChars(baseStates, &staticData->subChar1, 1)) {
784                     fprintf(stderr, "       the subchar1 byte is illegal in this codepage structure!\n");
785                     *pErrorCode=U_INVALID_TABLE_FORMAT;
786 
787                 } else if(
788                     !ucm_checkValidity(data->ucm->ext, baseStates) ||
789                     !ucm_checkBaseExt(baseStates, baseData.ucm->base, data->ucm->ext, data->ucm->ext, FALSE)
790                 ) {
791                     *pErrorCode=U_INVALID_TABLE_FORMAT;
792                 } else {
793                     if(states->maxCharLength>1) {
794                         /*
795                          * When building a normal .cnv file with a base table
796                          * for an MBCS (not SBCS) table with explicit precision flags,
797                          * the MBCSAddTable() function marks some mappings for moving
798                          * to the extension table.
799                          * They fit into the base toUnicode table but not into the
800                          * base fromUnicode table.
801                          * (Note: We do have explicit precision flags because they are
802                          * required for extension table generation, and
803                          * ucm_checkBaseExt() verified it.)
804                          *
805                          * We do not call MBCSAddTable() here (we probably could)
806                          * so we need to do the analysis before building the extension table.
807                          * We assume that MBCSAddTable() will build a UTF-8-friendly table.
808                          * Redundant mappings in the extension table are ok except they cost some size.
809                          *
810                          * Do this after ucm_checkBaseExt().
811                          */
812                         const MBCSData *mbcsData=MBCSGetDummy();
813                         int32_t needsMove=0;
814                         for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength;
815                             m<mLimit;
816                             ++m
817                         ) {
818                             if(!MBCSOkForBaseFromUnicode(mbcsData, m->b.bytes, m->bLen, m->u, m->f)) {
819                                 m->f|=MBCS_FROM_U_EXT_FLAG;
820                                 m->moveFlag=UCM_MOVE_TO_EXT;
821                                 ++needsMove;
822                             }
823                         }
824 
825                         if(needsMove!=0) {
826                             ucm_moveMappings(baseData.ucm->base, data->ucm->ext);
827                             ucm_sortTable(data->ucm->ext);
828                         }
829                     }
830                     if(!data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)) {
831                         *pErrorCode=U_INVALID_TABLE_FORMAT;
832                     }
833                 }
834             }
835         }
836 
837         cleanupConvData(&baseData);
838     }
839 }
840 
841 /*
842  * Hey, Emacs, please set the following:
843  *
844  * Local Variables:
845  * indent-tabs-mode: nil
846  * End:
847  *
848  */
849