• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  ********************************************************************************
3  *
4  *   Copyright (C) 1998-2014, International Business Machines
5  *   Corporation and others.  All Rights Reserved.
6  *
7  ********************************************************************************
8  *
9  *
10  *  makeconv.c:
11  *  tool creating a binary (compressed) representation of the conversion mapping
12  *  table (IBM NLTC ucmap format).
13  *
14  *  05/04/2000    helena     Added fallback mapping into the picture...
15  *  06/29/2000  helena      Major rewrite of the callback APIs.
16  */
17 
18 #include <stdio.h>
19 #include "unicode/putil.h"
20 #include "unicode/ucnv_err.h"
21 #include "ucnv_bld.h"
22 #include "ucnv_imp.h"
23 #include "ucnv_cnv.h"
24 #include "cstring.h"
25 #include "cmemory.h"
26 #include "uinvchar.h"
27 #include "filestrm.h"
28 #include "toolutil.h"
29 #include "uoptions.h"
30 #include "unicode/udata.h"
31 #include "unewdata.h"
32 #include "uparse.h"
33 #include "ucm.h"
34 #include "makeconv.h"
35 #include "genmbcs.h"
36 
37 #define DEBUG 0
38 
39 typedef struct ConvData {
40     UCMFile *ucm;
41     NewConverter *cnvData, *extData;
42     UConverterSharedData sharedData;
43     UConverterStaticData staticData;
44 } ConvData;
45 
46 static void
initConvData(ConvData * data)47 initConvData(ConvData *data) {
48     uprv_memset(data, 0, sizeof(ConvData));
49     data->sharedData.structSize=sizeof(UConverterSharedData);
50     data->staticData.structSize=sizeof(UConverterStaticData);
51     data->sharedData.staticData=&data->staticData;
52 }
53 
54 static void
cleanupConvData(ConvData * data)55 cleanupConvData(ConvData *data) {
56     if(data!=NULL) {
57         if(data->cnvData!=NULL) {
58             data->cnvData->close(data->cnvData);
59             data->cnvData=NULL;
60         }
61         if(data->extData!=NULL) {
62             data->extData->close(data->extData);
63             data->extData=NULL;
64         }
65         ucm_close(data->ucm);
66         data->ucm=NULL;
67     }
68 }
69 
70 /*
71  * from ucnvstat.c - static prototypes of data-based converters
72  */
73 extern const UConverterStaticData * ucnv_converterStaticData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES];
74 
75 /*
76  * Global - verbosity
77  */
78 UBool VERBOSE = FALSE;
79 UBool SMALL = FALSE;
80 UBool IGNORE_SISO_CHECK = FALSE;
81 
82 static void
83 createConverter(ConvData *data, const char* converterName, UErrorCode *pErrorCode);
84 
85 /*
86  * Set up the UNewData and write the converter..
87  */
88 static void
89 writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status);
90 
91 UBool haveCopyright=TRUE;
92 
93 static UDataInfo dataInfo={
94     sizeof(UDataInfo),
95     0,
96 
97     U_IS_BIG_ENDIAN,
98     U_CHARSET_FAMILY,
99     sizeof(UChar),
100     0,
101 
102     {0x63, 0x6e, 0x76, 0x74},     /* dataFormat="cnvt" */
103     {6, 2, 0, 0},                 /* formatVersion */
104     {0, 0, 0, 0}                  /* dataVersion (calculated at runtime) */
105 };
106 
107 static void
writeConverterData(ConvData * data,const char * cnvName,const char * cnvDir,UErrorCode * status)108 writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status)
109 {
110     UNewDataMemory *mem = NULL;
111     uint32_t sz2;
112     uint32_t size = 0;
113     int32_t tableType;
114 
115     if(U_FAILURE(*status))
116       {
117         return;
118       }
119 
120     tableType=TABLE_NONE;
121     if(data->cnvData!=NULL) {
122         tableType|=TABLE_BASE;
123     }
124     if(data->extData!=NULL) {
125         tableType|=TABLE_EXT;
126     }
127 
128     mem = udata_create(cnvDir, "cnv", cnvName, &dataInfo, haveCopyright ? U_COPYRIGHT_STRING : NULL, status);
129 
130     if(U_FAILURE(*status))
131       {
132         fprintf(stderr, "Couldn't create the udata %s.%s: %s\n",
133                 cnvName,
134                 "cnv",
135                 u_errorName(*status));
136         return;
137       }
138 
139     if(VERBOSE)
140       {
141         printf("- Opened udata %s.%s\n", cnvName, "cnv");
142       }
143 
144 
145     /* all read only, clean, platform independent data.  Mmmm. :)  */
146     udata_writeBlock(mem, &data->staticData, sizeof(UConverterStaticData));
147     size += sizeof(UConverterStaticData); /* Is 4-aligned  - by size */
148     /* Now, write the table */
149     if(tableType&TABLE_BASE) {
150         size += data->cnvData->write(data->cnvData, &data->staticData, mem, tableType);
151     }
152     if(tableType&TABLE_EXT) {
153         size += data->extData->write(data->extData, &data->staticData, mem, tableType);
154     }
155 
156     sz2 = udata_finish(mem, status);
157     if(size != sz2)
158     {
159         fprintf(stderr, "error: wrote %u bytes to the .cnv file but counted %u bytes\n", (int)sz2, (int)size);
160         *status=U_INTERNAL_PROGRAM_ERROR;
161     }
162     if(VERBOSE)
163     {
164       printf("- Wrote %u bytes to the udata.\n", (int)sz2);
165     }
166 }
167 
168 enum {
169     OPT_HELP_H,
170     OPT_HELP_QUESTION_MARK,
171     OPT_COPYRIGHT,
172     OPT_VERSION,
173     OPT_DESTDIR,
174     OPT_VERBOSE,
175     OPT_SMALL,
176     OPT_IGNORE_SISO_CHECK,
177     OPT_COUNT
178 };
179 
180 static UOption options[]={
181     UOPTION_HELP_H,
182     UOPTION_HELP_QUESTION_MARK,
183     UOPTION_COPYRIGHT,
184     UOPTION_VERSION,
185     UOPTION_DESTDIR,
186     UOPTION_VERBOSE,
187     { "small", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 },
188     { "ignore-siso-check", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 }
189 };
190 
main(int argc,char * argv[])191 int main(int argc, char* argv[])
192 {
193     ConvData data;
194     UErrorCode err = U_ZERO_ERROR, localError;
195     char outFileName[UCNV_MAX_FULL_FILE_NAME_LENGTH];
196     const char* destdir, *arg;
197     size_t destdirlen;
198     char* dot = NULL, *outBasename;
199     char cnvName[UCNV_MAX_FULL_FILE_NAME_LENGTH];
200     char cnvNameWithPkg[UCNV_MAX_FULL_FILE_NAME_LENGTH];
201     UVersionInfo icuVersion;
202     UBool printFilename;
203 
204     err = U_ZERO_ERROR;
205 
206     U_MAIN_INIT_ARGS(argc, argv);
207 
208     /* Set up the ICU version number */
209     u_getVersion(icuVersion);
210     uprv_memcpy(&dataInfo.dataVersion, &icuVersion, sizeof(UVersionInfo));
211 
212     /* preset then read command line options */
213     options[OPT_DESTDIR].value=u_getDataDirectory();
214     argc=u_parseArgs(argc, argv, UPRV_LENGTHOF(options), options);
215 
216     /* error handling, printing usage message */
217     if(argc<0) {
218         fprintf(stderr,
219             "error in command line argument \"%s\"\n",
220             argv[-argc]);
221     } else if(argc<2) {
222         argc=-1;
223     }
224     if(argc<0 || options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur) {
225         FILE *stdfile=argc<0 ? stderr : stdout;
226         fprintf(stdfile,
227             "usage: %s [-options] files...\n"
228             "\tread .ucm codepage mapping files and write .cnv files\n"
229             "options:\n"
230             "\t-h or -? or --help  this usage text\n"
231             "\t-V or --version     show a version message\n"
232             "\t-c or --copyright   include a copyright notice\n"
233             "\t-d or --destdir     destination directory, followed by the path\n"
234             "\t-v or --verbose     Turn on verbose output\n",
235             argv[0]);
236         fprintf(stdfile,
237             "\t      --small       Generate smaller .cnv files. They will be\n"
238             "\t                    significantly smaller but may not be compatible with\n"
239             "\t                    older versions of ICU and will require heap memory\n"
240             "\t                    allocation when loaded.\n"
241             "\t      --ignore-siso-check         Use SI/SO other than 0xf/0xe.\n");
242         return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
243     }
244 
245     if(options[OPT_VERSION].doesOccur) {
246         printf("makeconv version %u.%u, ICU tool to read .ucm codepage mapping files and write .cnv files\n",
247                dataInfo.formatVersion[0], dataInfo.formatVersion[1]);
248         printf("%s\n", U_COPYRIGHT_STRING);
249         exit(0);
250     }
251 
252     /* get the options values */
253     haveCopyright = options[OPT_COPYRIGHT].doesOccur;
254     destdir = options[OPT_DESTDIR].value;
255     VERBOSE = options[OPT_VERBOSE].doesOccur;
256     SMALL = options[OPT_SMALL].doesOccur;
257 
258     if (options[OPT_IGNORE_SISO_CHECK].doesOccur) {
259         IGNORE_SISO_CHECK = TRUE;
260     }
261 
262     if (destdir != NULL && *destdir != 0) {
263         uprv_strcpy(outFileName, destdir);
264         destdirlen = uprv_strlen(destdir);
265         outBasename = outFileName + destdirlen;
266         if (*(outBasename - 1) != U_FILE_SEP_CHAR) {
267             *outBasename++ = U_FILE_SEP_CHAR;
268             ++destdirlen;
269         }
270     } else {
271         destdirlen = 0;
272         outBasename = outFileName;
273     }
274 
275 #if DEBUG
276     {
277       int i;
278       printf("makeconv: processing %d files...\n", argc - 1);
279       for(i=1; i<argc; ++i) {
280         printf("%s ", argv[i]);
281       }
282       printf("\n");
283       fflush(stdout);
284     }
285 #endif
286 
287     err = U_ZERO_ERROR;
288     printFilename = (UBool) (argc > 2 || VERBOSE);
289     for (++argv; --argc; ++argv)
290     {
291         arg = getLongPathname(*argv);
292 
293         /* Check for potential buffer overflow */
294         if(strlen(arg) >= UCNV_MAX_FULL_FILE_NAME_LENGTH)
295         {
296             fprintf(stderr, "%s\n", u_errorName(U_BUFFER_OVERFLOW_ERROR));
297             return U_BUFFER_OVERFLOW_ERROR;
298         }
299 
300         /*produces the right destination path for display*/
301         if (destdirlen != 0)
302         {
303             const char *basename;
304 
305             /* find the last file sepator */
306             basename = findBasename(arg);
307             uprv_strcpy(outBasename, basename);
308         }
309         else
310         {
311             uprv_strcpy(outFileName, arg);
312         }
313 
314         /*removes the extension if any is found*/
315         dot = uprv_strrchr(outBasename, '.');
316         if (dot)
317         {
318             *dot = '\0';
319         }
320 
321         /* the basename without extension is the converter name */
322         uprv_strcpy(cnvName, outBasename);
323 
324         /*Adds the target extension*/
325         uprv_strcat(outBasename, CONVERTER_FILE_EXTENSION);
326 
327 #if DEBUG
328         printf("makeconv: processing %s  ...\n", arg);
329         fflush(stdout);
330 #endif
331         localError = U_ZERO_ERROR;
332         initConvData(&data);
333         createConverter(&data, arg, &localError);
334 
335         if (U_FAILURE(localError))
336         {
337             /* if an error is found, print out an error msg and keep going */
338             fprintf(stderr, "Error creating converter for \"%s\" file for \"%s\" (%s)\n", outFileName, arg,
339                 u_errorName(localError));
340             if(U_SUCCESS(err)) {
341                 err = localError;
342             }
343         }
344         else
345         {
346             /* Insure the static data name matches the  file name */
347             /* Changed to ignore directory and only compare base name
348              LDH 1/2/08*/
349             char *p;
350             p = strrchr(cnvName, U_FILE_SEP_CHAR); /* Find last file separator */
351 
352             if(p == NULL)            /* OK, try alternate */
353             {
354                 p = strrchr(cnvName, U_FILE_ALT_SEP_CHAR);
355                 if(p == NULL)
356                 {
357                     p=cnvName; /* If no separators, no problem */
358                 }
359             }
360             else
361             {
362                 p++;   /* If found separtor, don't include it in compare */
363             }
364             if(uprv_stricmp(p,data.staticData.name))
365             {
366                 fprintf(stderr, "Warning: %s%s claims to be '%s'\n",
367                     cnvName,  CONVERTER_FILE_EXTENSION,
368                     data.staticData.name);
369             }
370 
371             uprv_strcpy((char*)data.staticData.name, cnvName);
372 
373             if(!uprv_isInvariantString((char*)data.staticData.name, -1)) {
374                 fprintf(stderr,
375                     "Error: A converter name must contain only invariant characters.\n"
376                     "%s is not a valid converter name.\n",
377                     data.staticData.name);
378                 if(U_SUCCESS(err)) {
379                     err = U_INVALID_TABLE_FORMAT;
380                 }
381             }
382 
383             uprv_strcpy(cnvNameWithPkg, cnvName);
384 
385             localError = U_ZERO_ERROR;
386             writeConverterData(&data, cnvNameWithPkg, destdir, &localError);
387 
388             if(U_FAILURE(localError))
389             {
390                 /* if an error is found, print out an error msg and keep going*/
391                 fprintf(stderr, "Error writing \"%s\" file for \"%s\" (%s)\n", outFileName, arg,
392                     u_errorName(localError));
393                 if(U_SUCCESS(err)) {
394                     err = localError;
395                 }
396             }
397             else if (printFilename)
398             {
399                 puts(outBasename);
400             }
401         }
402         fflush(stdout);
403         fflush(stderr);
404 
405         cleanupConvData(&data);
406     }
407 
408     return err;
409 }
410 
411 static void
getPlatformAndCCSIDFromName(const char * name,int8_t * pPlatform,int32_t * pCCSID)412 getPlatformAndCCSIDFromName(const char *name, int8_t *pPlatform, int32_t *pCCSID) {
413     if( (name[0]=='i' || name[0]=='I') &&
414         (name[1]=='b' || name[1]=='B') &&
415         (name[2]=='m' || name[2]=='M')
416     ) {
417         name+=3;
418         if(*name=='-') {
419             ++name;
420         }
421         *pPlatform=UCNV_IBM;
422         *pCCSID=(int32_t)uprv_strtoul(name, NULL, 10);
423     } else {
424         *pPlatform=UCNV_UNKNOWN;
425         *pCCSID=0;
426     }
427 }
428 
429 static void
readHeader(ConvData * data,FileStream * convFile,const char * converterName,UErrorCode * pErrorCode)430 readHeader(ConvData *data,
431            FileStream* convFile,
432            const char* converterName,
433            UErrorCode *pErrorCode) {
434     char line[1024];
435     char *s, *key, *value;
436     const UConverterStaticData *prototype;
437     UConverterStaticData *staticData;
438 
439     if(U_FAILURE(*pErrorCode)) {
440         return;
441     }
442 
443     staticData=&data->staticData;
444     staticData->platform=UCNV_IBM;
445     staticData->subCharLen=0;
446 
447     while(T_FileStream_readLine(convFile, line, sizeof(line))) {
448         /* basic parsing and handling of state-related items */
449         if(ucm_parseHeaderLine(data->ucm, line, &key, &value)) {
450             continue;
451         }
452 
453         /* stop at the beginning of the mapping section */
454         if(uprv_strcmp(line, "CHARMAP")==0) {
455             break;
456         }
457 
458         /* collect the information from the header field, ignore unknown keys */
459         if(uprv_strcmp(key, "code_set_name")==0) {
460             if(*value!=0) {
461                 uprv_strcpy((char *)staticData->name, value);
462                 getPlatformAndCCSIDFromName(value, &staticData->platform, &staticData->codepage);
463             }
464         } else if(uprv_strcmp(key, "subchar")==0) {
465             uint8_t bytes[UCNV_EXT_MAX_BYTES];
466             int8_t length;
467 
468             s=value;
469             length=ucm_parseBytes(bytes, line, (const char **)&s);
470             if(1<=length && length<=4 && *s==0) {
471                 staticData->subCharLen=length;
472                 uprv_memcpy(staticData->subChar, bytes, length);
473             } else {
474                 fprintf(stderr, "error: illegal <subchar> %s\n", value);
475                 *pErrorCode=U_INVALID_TABLE_FORMAT;
476                 return;
477             }
478         } else if(uprv_strcmp(key, "subchar1")==0) {
479             uint8_t bytes[UCNV_EXT_MAX_BYTES];
480 
481             s=value;
482             if(1==ucm_parseBytes(bytes, line, (const char **)&s) && *s==0) {
483                 staticData->subChar1=bytes[0];
484             } else {
485                 fprintf(stderr, "error: illegal <subchar1> %s\n", value);
486                 *pErrorCode=U_INVALID_TABLE_FORMAT;
487                 return;
488             }
489         }
490     }
491 
492     /* copy values from the UCMFile to the static data */
493     staticData->maxBytesPerChar=(int8_t)data->ucm->states.maxCharLength;
494     staticData->minBytesPerChar=(int8_t)data->ucm->states.minCharLength;
495     staticData->conversionType=data->ucm->states.conversionType;
496 
497     if(staticData->conversionType==UCNV_UNSUPPORTED_CONVERTER) {
498         fprintf(stderr, "ucm error: missing conversion type (<uconv_class>)\n");
499         *pErrorCode=U_INVALID_TABLE_FORMAT;
500         return;
501     }
502 
503     /*
504      * Now that we know the type, copy any 'default' values from the table.
505      * We need not check the type any further because the parser only
506      * recognizes what we have prototypes for.
507      *
508      * For delta (extension-only) tables, copy values from the base file
509      * instead, see createConverter().
510      */
511     if(data->ucm->baseName[0]==0) {
512         prototype=ucnv_converterStaticData[staticData->conversionType];
513         if(prototype!=NULL) {
514             if(staticData->name[0]==0) {
515                 uprv_strcpy((char *)staticData->name, prototype->name);
516             }
517 
518             if(staticData->codepage==0) {
519                 staticData->codepage=prototype->codepage;
520             }
521 
522             if(staticData->platform==0) {
523                 staticData->platform=prototype->platform;
524             }
525 
526             if(staticData->minBytesPerChar==0) {
527                 staticData->minBytesPerChar=prototype->minBytesPerChar;
528             }
529 
530             if(staticData->maxBytesPerChar==0) {
531                 staticData->maxBytesPerChar=prototype->maxBytesPerChar;
532             }
533 
534             if(staticData->subCharLen==0) {
535                 staticData->subCharLen=prototype->subCharLen;
536                 if(prototype->subCharLen>0) {
537                     uprv_memcpy(staticData->subChar, prototype->subChar, prototype->subCharLen);
538                 }
539             }
540         }
541     }
542 
543     if(data->ucm->states.outputType<0) {
544         data->ucm->states.outputType=(int8_t)data->ucm->states.maxCharLength-1;
545     }
546 
547     if( staticData->subChar1!=0 &&
548             (staticData->minBytesPerChar>1 ||
549                 (staticData->conversionType!=UCNV_MBCS &&
550                  staticData->conversionType!=UCNV_EBCDIC_STATEFUL))
551     ) {
552         fprintf(stderr, "error: <subchar1> defined for a type other than MBCS or EBCDIC_STATEFUL\n");
553         *pErrorCode=U_INVALID_TABLE_FORMAT;
554     }
555 }
556 
557 /* return TRUE if a base table was read, FALSE for an extension table */
558 static UBool
readFile(ConvData * data,const char * converterName,UErrorCode * pErrorCode)559 readFile(ConvData *data, const char* converterName,
560          UErrorCode *pErrorCode) {
561     char line[1024];
562     char *end;
563     FileStream *convFile;
564 
565     UCMStates *baseStates;
566     UBool dataIsBase;
567 
568     if(U_FAILURE(*pErrorCode)) {
569         return FALSE;
570     }
571 
572     data->ucm=ucm_open();
573 
574     convFile=T_FileStream_open(converterName, "r");
575     if(convFile==NULL) {
576         *pErrorCode=U_FILE_ACCESS_ERROR;
577         return FALSE;
578     }
579 
580     readHeader(data, convFile, converterName, pErrorCode);
581     if(U_FAILURE(*pErrorCode)) {
582         return FALSE;
583     }
584 
585     if(data->ucm->baseName[0]==0) {
586         dataIsBase=TRUE;
587         baseStates=&data->ucm->states;
588         ucm_processStates(baseStates, IGNORE_SISO_CHECK);
589     } else {
590         dataIsBase=FALSE;
591         baseStates=NULL;
592     }
593 
594     /* read the base table */
595     ucm_readTable(data->ucm, convFile, dataIsBase, baseStates, pErrorCode);
596     if(U_FAILURE(*pErrorCode)) {
597         return FALSE;
598     }
599 
600     /* read an extension table if there is one */
601     while(T_FileStream_readLine(convFile, line, sizeof(line))) {
602         end=uprv_strchr(line, 0);
603         while(line<end &&
604               (*(end-1)=='\n' || *(end-1)=='\r' || *(end-1)==' ' || *(end-1)=='\t')) {
605             --end;
606         }
607         *end=0;
608 
609         if(line[0]=='#' || u_skipWhitespace(line)==end) {
610             continue; /* ignore empty and comment lines */
611         }
612 
613         if(0==uprv_strcmp(line, "CHARMAP")) {
614             /* read the extension table */
615             ucm_readTable(data->ucm, convFile, FALSE, baseStates, pErrorCode);
616         } else {
617             fprintf(stderr, "unexpected text after the base mapping table\n");
618         }
619         break;
620     }
621 
622     T_FileStream_close(convFile);
623 
624     if(data->ucm->base->flagsType==UCM_FLAGS_MIXED || data->ucm->ext->flagsType==UCM_FLAGS_MIXED) {
625         fprintf(stderr, "error: some entries have the mapping precision (with '|'), some do not\n");
626         *pErrorCode=U_INVALID_TABLE_FORMAT;
627     }
628 
629     return dataIsBase;
630 }
631 
632 static void
createConverter(ConvData * data,const char * converterName,UErrorCode * pErrorCode)633 createConverter(ConvData *data, const char *converterName, UErrorCode *pErrorCode) {
634     ConvData baseData;
635     UBool dataIsBase;
636 
637     UConverterStaticData *staticData;
638     UCMStates *states, *baseStates;
639 
640     if(U_FAILURE(*pErrorCode)) {
641         return;
642     }
643 
644     initConvData(data);
645 
646     dataIsBase=readFile(data, converterName, pErrorCode);
647     if(U_FAILURE(*pErrorCode)) {
648         return;
649     }
650 
651     staticData=&data->staticData;
652     states=&data->ucm->states;
653 
654     if(dataIsBase) {
655         /*
656          * Build a normal .cnv file with a base table
657          * and an optional extension table.
658          */
659         data->cnvData=MBCSOpen(data->ucm);
660         if(data->cnvData==NULL) {
661             *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
662 
663         } else if(!data->cnvData->isValid(data->cnvData,
664                             staticData->subChar, staticData->subCharLen)
665         ) {
666             fprintf(stderr, "       the substitution character byte sequence is illegal in this codepage structure!\n");
667             *pErrorCode=U_INVALID_TABLE_FORMAT;
668 
669         } else if(staticData->subChar1!=0 &&
670                     !data->cnvData->isValid(data->cnvData, &staticData->subChar1, 1)
671         ) {
672             fprintf(stderr, "       the subchar1 byte is illegal in this codepage structure!\n");
673             *pErrorCode=U_INVALID_TABLE_FORMAT;
674 
675         } else if(
676             data->ucm->ext->mappingsLength>0 &&
677             !ucm_checkBaseExt(states, data->ucm->base, data->ucm->ext, data->ucm->ext, FALSE)
678         ) {
679             *pErrorCode=U_INVALID_TABLE_FORMAT;
680         } else if(data->ucm->base->flagsType&UCM_FLAGS_EXPLICIT) {
681             /* sort the table so that it can be turned into UTF-8-friendly data */
682             ucm_sortTable(data->ucm->base);
683         }
684 
685         if(U_SUCCESS(*pErrorCode)) {
686             if(
687                 /* add the base table after ucm_checkBaseExt()! */
688                 !data->cnvData->addTable(data->cnvData, data->ucm->base, &data->staticData)
689             ) {
690                 *pErrorCode=U_INVALID_TABLE_FORMAT;
691             } else {
692                 /*
693                  * addTable() may have requested moving more mappings to the extension table
694                  * if they fit into the base toUnicode table but not into the
695                  * base fromUnicode table.
696                  * (Especially for UTF-8-friendly fromUnicode tables.)
697                  * Such mappings will have the MBCS_FROM_U_EXT_FLAG set, which causes them
698                  * to be excluded from the extension toUnicode data.
699                  * See MBCSOkForBaseFromUnicode() for which mappings do not fit into
700                  * the base fromUnicode table.
701                  */
702                 ucm_moveMappings(data->ucm->base, data->ucm->ext);
703                 ucm_sortTable(data->ucm->ext);
704                 if(data->ucm->ext->mappingsLength>0) {
705                     /* prepare the extension table, if there is one */
706                     data->extData=CnvExtOpen(data->ucm);
707                     if(data->extData==NULL) {
708                         *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
709                     } else if(
710                         !data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)
711                     ) {
712                         *pErrorCode=U_INVALID_TABLE_FORMAT;
713                     }
714                 }
715             }
716         }
717     } else {
718         /* Build an extension-only .cnv file. */
719         char baseFilename[500];
720         char *basename;
721 
722         initConvData(&baseData);
723 
724         /* assemble a path/filename for data->ucm->baseName */
725         uprv_strcpy(baseFilename, converterName);
726         basename=(char *)findBasename(baseFilename);
727         uprv_strcpy(basename, data->ucm->baseName);
728         uprv_strcat(basename, ".ucm");
729 
730         /* read the base table */
731         dataIsBase=readFile(&baseData, baseFilename, pErrorCode);
732         if(U_FAILURE(*pErrorCode)) {
733             return;
734         } else if(!dataIsBase) {
735             fprintf(stderr, "error: the <icu:base> file \"%s\" is not a base table file\n", baseFilename);
736             *pErrorCode=U_INVALID_TABLE_FORMAT;
737         } else {
738             /* prepare the extension table */
739             data->extData=CnvExtOpen(data->ucm);
740             if(data->extData==NULL) {
741                 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
742             } else {
743                 /* fill in gaps in extension file header fields */
744                 UCMapping *m, *mLimit;
745                 uint8_t fallbackFlags;
746 
747                 baseStates=&baseData.ucm->states;
748                 if(states->conversionType==UCNV_DBCS) {
749                     staticData->minBytesPerChar=(int8_t)(states->minCharLength=2);
750                 } else if(states->minCharLength==0) {
751                     staticData->minBytesPerChar=(int8_t)(states->minCharLength=baseStates->minCharLength);
752                 }
753                 if(states->maxCharLength<states->minCharLength) {
754                     staticData->maxBytesPerChar=(int8_t)(states->maxCharLength=baseStates->maxCharLength);
755                 }
756 
757                 if(staticData->subCharLen==0) {
758                     uprv_memcpy(staticData->subChar, baseData.staticData.subChar, 4);
759                     staticData->subCharLen=baseData.staticData.subCharLen;
760                 }
761                 /*
762                  * do not copy subChar1 -
763                  * only use what is explicitly specified
764                  * because it cannot be unset in the extension file header
765                  */
766 
767                 /* get the fallback flags */
768                 fallbackFlags=0;
769                 for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength;
770                     m<mLimit && fallbackFlags!=3;
771                     ++m
772                 ) {
773                     if(m->f==1) {
774                         fallbackFlags|=1;
775                     } else if(m->f==3) {
776                         fallbackFlags|=2;
777                     }
778                 }
779 
780                 if(fallbackFlags&1) {
781                     staticData->hasFromUnicodeFallback=TRUE;
782                 }
783                 if(fallbackFlags&2) {
784                     staticData->hasToUnicodeFallback=TRUE;
785                 }
786 
787                 if(1!=ucm_countChars(baseStates, staticData->subChar, staticData->subCharLen)) {
788                     fprintf(stderr, "       the substitution character byte sequence is illegal in this codepage structure!\n");
789                     *pErrorCode=U_INVALID_TABLE_FORMAT;
790 
791                 } else if(staticData->subChar1!=0 && 1!=ucm_countChars(baseStates, &staticData->subChar1, 1)) {
792                     fprintf(stderr, "       the subchar1 byte is illegal in this codepage structure!\n");
793                     *pErrorCode=U_INVALID_TABLE_FORMAT;
794 
795                 } else if(
796                     !ucm_checkValidity(data->ucm->ext, baseStates) ||
797                     !ucm_checkBaseExt(baseStates, baseData.ucm->base, data->ucm->ext, data->ucm->ext, FALSE)
798                 ) {
799                     *pErrorCode=U_INVALID_TABLE_FORMAT;
800                 } else {
801                     if(states->maxCharLength>1) {
802                         /*
803                          * When building a normal .cnv file with a base table
804                          * for an MBCS (not SBCS) table with explicit precision flags,
805                          * the MBCSAddTable() function marks some mappings for moving
806                          * to the extension table.
807                          * They fit into the base toUnicode table but not into the
808                          * base fromUnicode table.
809                          * (Note: We do have explicit precision flags because they are
810                          * required for extension table generation, and
811                          * ucm_checkBaseExt() verified it.)
812                          *
813                          * We do not call MBCSAddTable() here (we probably could)
814                          * so we need to do the analysis before building the extension table.
815                          * We assume that MBCSAddTable() will build a UTF-8-friendly table.
816                          * Redundant mappings in the extension table are ok except they cost some size.
817                          *
818                          * Do this after ucm_checkBaseExt().
819                          */
820                         const MBCSData *mbcsData=MBCSGetDummy();
821                         int32_t needsMove=0;
822                         for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength;
823                             m<mLimit;
824                             ++m
825                         ) {
826                             if(!MBCSOkForBaseFromUnicode(mbcsData, m->b.bytes, m->bLen, m->u, m->f)) {
827                                 m->f|=MBCS_FROM_U_EXT_FLAG;
828                                 m->moveFlag=UCM_MOVE_TO_EXT;
829                                 ++needsMove;
830                             }
831                         }
832 
833                         if(needsMove!=0) {
834                             ucm_moveMappings(baseData.ucm->base, data->ucm->ext);
835                             ucm_sortTable(data->ucm->ext);
836                         }
837                     }
838                     if(!data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)) {
839                         *pErrorCode=U_INVALID_TABLE_FORMAT;
840                     }
841                 }
842             }
843         }
844 
845         cleanupConvData(&baseData);
846     }
847 }
848 
849 /*
850  * Hey, Emacs, please set the following:
851  *
852  * Local Variables:
853  * indent-tabs-mode: nil
854  * End:
855  *
856  */
857