• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  ********************************************************************************
3  *
4  *   Copyright (C) 1998-2008, International Business Machines
5  *   Corporation and others.  All Rights Reserved.
6  *
7  ********************************************************************************
8  *
9  *
10  *  makeconv.c:
11  *  tool creating a binary (compressed) representation of the conversion mapping
12  *  table (IBM NLTC ucmap format).
13  *
14  *  05/04/2000    helena     Added fallback mapping into the picture...
15  *  06/29/2000  helena      Major rewrite of the callback APIs.
16  */
17 
18 #include <stdio.h>
19 #include "unicode/putil.h"
20 #include "unicode/ucnv_err.h"
21 #include "ucnv_bld.h"
22 #include "ucnv_imp.h"
23 #include "ucnv_cnv.h"
24 #include "cstring.h"
25 #include "cmemory.h"
26 #include "uinvchar.h"
27 #include "filestrm.h"
28 #include "toolutil.h"
29 #include "uoptions.h"
30 #include "unicode/udata.h"
31 #include "unewdata.h"
32 #include "uparse.h"
33 #include "ucm.h"
34 #include "makeconv.h"
35 #include "genmbcs.h"
36 
37 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
38 
39 #define DEBUG 0
40 
41 typedef struct ConvData {
42     UCMFile *ucm;
43     NewConverter *cnvData, *extData;
44     UConverterSharedData sharedData;
45     UConverterStaticData staticData;
46 } ConvData;
47 
48 static void
initConvData(ConvData * data)49 initConvData(ConvData *data) {
50     uprv_memset(data, 0, sizeof(ConvData));
51     data->sharedData.structSize=sizeof(UConverterSharedData);
52     data->staticData.structSize=sizeof(UConverterStaticData);
53     data->sharedData.staticData=&data->staticData;
54 }
55 
56 static void
cleanupConvData(ConvData * data)57 cleanupConvData(ConvData *data) {
58     if(data!=NULL) {
59         if(data->cnvData!=NULL) {
60             data->cnvData->close(data->cnvData);
61             data->cnvData=NULL;
62         }
63         if(data->extData!=NULL) {
64             data->extData->close(data->extData);
65             data->extData=NULL;
66         }
67         ucm_close(data->ucm);
68         data->ucm=NULL;
69     }
70 }
71 
72 /*
73  * from ucnvstat.c - static prototypes of data-based converters
74  */
75 extern const UConverterStaticData * ucnv_converterStaticData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES];
76 
77 /*
78  * Global - verbosity
79  */
80 UBool VERBOSE = FALSE;
81 UBool SMALL = FALSE;
82 
83 static void
84 createConverter(ConvData *data, const char* converterName, UErrorCode *pErrorCode);
85 
86 /*
87  * Set up the UNewData and write the converter..
88  */
89 static void
90 writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status);
91 
92 UBool haveCopyright=TRUE;
93 
94 static UDataInfo dataInfo={
95     sizeof(UDataInfo),
96     0,
97 
98     U_IS_BIG_ENDIAN,
99     U_CHARSET_FAMILY,
100     sizeof(UChar),
101     0,
102 
103     {0x63, 0x6e, 0x76, 0x74},     /* dataFormat="cnvt" */
104     {6, 2, 0, 0},                 /* formatVersion */
105     {0, 0, 0, 0}                  /* dataVersion (calculated at runtime) */
106 };
107 
108 static void
writeConverterData(ConvData * data,const char * cnvName,const char * cnvDir,UErrorCode * status)109 writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status)
110 {
111     UNewDataMemory *mem = NULL;
112     uint32_t sz2;
113     uint32_t size = 0;
114     int32_t tableType;
115 
116     if(U_FAILURE(*status))
117       {
118         return;
119       }
120 
121     tableType=TABLE_NONE;
122     if(data->cnvData!=NULL) {
123         tableType|=TABLE_BASE;
124     }
125     if(data->extData!=NULL) {
126         tableType|=TABLE_EXT;
127     }
128 
129     mem = udata_create(cnvDir, "cnv", cnvName, &dataInfo, haveCopyright ? U_COPYRIGHT_STRING : NULL, status);
130 
131     if(U_FAILURE(*status))
132       {
133         fprintf(stderr, "Couldn't create the udata %s.%s: %s\n",
134                 cnvName,
135                 "cnv",
136                 u_errorName(*status));
137         return;
138       }
139 
140     if(VERBOSE)
141       {
142         printf("- Opened udata %s.%s\n", cnvName, "cnv");
143       }
144 
145 
146     /* all read only, clean, platform independent data.  Mmmm. :)  */
147     udata_writeBlock(mem, &data->staticData, sizeof(UConverterStaticData));
148     size += sizeof(UConverterStaticData); /* Is 4-aligned  - by size */
149     /* Now, write the table */
150     if(tableType&TABLE_BASE) {
151         size += data->cnvData->write(data->cnvData, &data->staticData, mem, tableType);
152     }
153     if(tableType&TABLE_EXT) {
154         size += data->extData->write(data->extData, &data->staticData, mem, tableType);
155     }
156 
157     sz2 = udata_finish(mem, status);
158     if(size != sz2)
159     {
160         fprintf(stderr, "error: wrote %u bytes to the .cnv file but counted %u bytes\n", (int)sz2, (int)size);
161         *status=U_INTERNAL_PROGRAM_ERROR;
162     }
163     if(VERBOSE)
164     {
165       printf("- Wrote %u bytes to the udata.\n", (int)sz2);
166     }
167 }
168 
169 enum {
170     OPT_HELP_H,
171     OPT_HELP_QUESTION_MARK,
172     OPT_COPYRIGHT,
173     OPT_VERSION,
174     OPT_DESTDIR,
175     OPT_VERBOSE,
176     OPT_SMALL,
177     OPT_COUNT
178 };
179 
180 static UOption options[]={
181     UOPTION_HELP_H,
182     UOPTION_HELP_QUESTION_MARK,
183     UOPTION_COPYRIGHT,
184     UOPTION_VERSION,
185     UOPTION_DESTDIR,
186     UOPTION_VERBOSE,
187     { "small", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 }
188 };
189 
main(int argc,char * argv[])190 int main(int argc, char* argv[])
191 {
192     ConvData data;
193     UErrorCode err = U_ZERO_ERROR, localError;
194     char outFileName[UCNV_MAX_FULL_FILE_NAME_LENGTH];
195     const char* destdir, *arg;
196     size_t destdirlen;
197     char* dot = NULL, *outBasename;
198     char cnvName[UCNV_MAX_FULL_FILE_NAME_LENGTH];
199     char cnvNameWithPkg[UCNV_MAX_FULL_FILE_NAME_LENGTH];
200     UVersionInfo icuVersion;
201     UBool printFilename;
202 
203     err = U_ZERO_ERROR;
204 
205     U_MAIN_INIT_ARGS(argc, argv);
206 
207     /* Set up the ICU version number */
208     u_getVersion(icuVersion);
209     uprv_memcpy(&dataInfo.dataVersion, &icuVersion, sizeof(UVersionInfo));
210 
211     /* preset then read command line options */
212     options[OPT_DESTDIR].value=u_getDataDirectory();
213     argc=u_parseArgs(argc, argv, LENGTHOF(options), options);
214 
215     /* error handling, printing usage message */
216     if(argc<0) {
217         fprintf(stderr,
218             "error in command line argument \"%s\"\n",
219             argv[-argc]);
220     } else if(argc<2) {
221         argc=-1;
222     }
223     if(argc<0 || options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur) {
224         FILE *stdfile=argc<0 ? stderr : stdout;
225         fprintf(stdfile,
226             "usage: %s [-options] files...\n"
227             "\tread .ucm codepage mapping files and write .cnv files\n"
228             "options:\n"
229             "\t-h or -? or --help  this usage text\n"
230             "\t-V or --version     show a version message\n"
231             "\t-c or --copyright   include a copyright notice\n"
232             "\t-d or --destdir     destination directory, followed by the path\n"
233             "\t-v or --verbose     Turn on verbose output\n",
234             argv[0]);
235         fprintf(stdfile,
236             "\t      --small       Generate smaller .cnv files. They will be\n"
237             "\t                    significantly smaller but may not be compatible with\n"
238             "\t                    older versions of ICU and will require heap memory\n"
239             "\t                    allocation when loaded.\n");
240         return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
241     }
242 
243     if(options[OPT_VERSION].doesOccur) {
244         printf("makeconv version %hu.%hu, ICU tool to read .ucm codepage mapping files and write .cnv files\n",
245                dataInfo.formatVersion[0], dataInfo.formatVersion[1]);
246         printf("%s\n", U_COPYRIGHT_STRING);
247         exit(0);
248     }
249 
250     /* get the options values */
251     haveCopyright = options[OPT_COPYRIGHT].doesOccur;
252     destdir = options[OPT_DESTDIR].value;
253     VERBOSE = options[OPT_VERBOSE].doesOccur;
254     SMALL = options[OPT_SMALL].doesOccur;
255 
256     if (destdir != NULL && *destdir != 0) {
257         uprv_strcpy(outFileName, destdir);
258         destdirlen = uprv_strlen(destdir);
259         outBasename = outFileName + destdirlen;
260         if (*(outBasename - 1) != U_FILE_SEP_CHAR) {
261             *outBasename++ = U_FILE_SEP_CHAR;
262             ++destdirlen;
263         }
264     } else {
265         destdirlen = 0;
266         outBasename = outFileName;
267     }
268 
269 #if DEBUG
270     {
271       int i;
272       printf("makeconv: processing %d files...\n", argc - 1);
273       for(i=1; i<argc; ++i) {
274         printf("%s ", argv[i]);
275       }
276       printf("\n");
277       fflush(stdout);
278     }
279 #endif
280 
281     err = U_ZERO_ERROR;
282     printFilename = (UBool) (argc > 2 || VERBOSE);
283     for (++argv; --argc; ++argv)
284     {
285         arg = getLongPathname(*argv);
286 
287         /* Check for potential buffer overflow */
288         if(strlen(arg) > UCNV_MAX_FULL_FILE_NAME_LENGTH)
289         {
290             fprintf(stderr, "%s\n", u_errorName(U_BUFFER_OVERFLOW_ERROR));
291             return U_BUFFER_OVERFLOW_ERROR;
292         }
293 
294         /*produces the right destination path for display*/
295         if (destdirlen != 0)
296         {
297             const char *basename;
298 
299             /* find the last file sepator */
300             basename = findBasename(arg);
301             uprv_strcpy(outBasename, basename);
302         }
303         else
304         {
305             uprv_strcpy(outFileName, arg);
306         }
307 
308         /*removes the extension if any is found*/
309         dot = uprv_strrchr(outBasename, '.');
310         if (dot)
311         {
312             *dot = '\0';
313         }
314 
315         /* the basename without extension is the converter name */
316         uprv_strcpy(cnvName, outBasename);
317 
318         /*Adds the target extension*/
319         uprv_strcat(outBasename, CONVERTER_FILE_EXTENSION);
320 
321 #if DEBUG
322         printf("makeconv: processing %s  ...\n", arg);
323         fflush(stdout);
324 #endif
325         localError = U_ZERO_ERROR;
326         initConvData(&data);
327         createConverter(&data, arg, &localError);
328 
329         if (U_FAILURE(localError))
330         {
331             /* if an error is found, print out an error msg and keep going */
332             fprintf(stderr, "Error creating converter for \"%s\" file for \"%s\" (%s)\n", outFileName, arg,
333                 u_errorName(localError));
334             if(U_SUCCESS(err)) {
335                 err = localError;
336             }
337         }
338         else
339         {
340             /* Insure the static data name matches the  file name */
341             /* Changed to ignore directory and only compare base name
342              LDH 1/2/08*/
343             char *p;
344             p = strrchr(cnvName, U_FILE_SEP_CHAR); /* Find last file separator */
345 
346             if(p == NULL)            /* OK, try alternate */
347             {
348                 p = strrchr(cnvName, U_FILE_ALT_SEP_CHAR);
349                 if(p == NULL)
350                 {
351                     p=cnvName; /* If no separators, no problem */
352                 }
353             }
354             else
355             {
356                 p++;   /* If found separtor, don't include it in compare */
357             }
358             if(uprv_stricmp(p,data.staticData.name))
359             {
360                 fprintf(stderr, "Warning: %s%s claims to be '%s'\n",
361                     cnvName,  CONVERTER_FILE_EXTENSION,
362                     data.staticData.name);
363             }
364 
365             uprv_strcpy((char*)data.staticData.name, cnvName);
366 
367             if(!uprv_isInvariantString((char*)data.staticData.name, -1)) {
368                 fprintf(stderr,
369                     "Error: A converter name must contain only invariant characters.\n"
370                     "%s is not a valid converter name.\n",
371                     data.staticData.name);
372                 if(U_SUCCESS(err)) {
373                     err = U_INVALID_TABLE_FORMAT;
374                 }
375             }
376 
377             uprv_strcpy(cnvNameWithPkg, cnvName);
378 
379             localError = U_ZERO_ERROR;
380             writeConverterData(&data, cnvNameWithPkg, destdir, &localError);
381 
382             if(U_FAILURE(localError))
383             {
384                 /* if an error is found, print out an error msg and keep going*/
385                 fprintf(stderr, "Error writing \"%s\" file for \"%s\" (%s)\n", outFileName, arg,
386                     u_errorName(localError));
387                 if(U_SUCCESS(err)) {
388                     err = localError;
389                 }
390             }
391             else if (printFilename)
392             {
393                 puts(outBasename);
394             }
395         }
396         fflush(stdout);
397         fflush(stderr);
398 
399         cleanupConvData(&data);
400     }
401 
402     return err;
403 }
404 
405 static void
getPlatformAndCCSIDFromName(const char * name,int8_t * pPlatform,int32_t * pCCSID)406 getPlatformAndCCSIDFromName(const char *name, int8_t *pPlatform, int32_t *pCCSID) {
407     if( (name[0]=='i' || name[0]=='I') &&
408         (name[1]=='b' || name[1]=='B') &&
409         (name[2]=='m' || name[2]=='M')
410     ) {
411         name+=3;
412         if(*name=='-') {
413             ++name;
414         }
415         *pPlatform=UCNV_IBM;
416         *pCCSID=(int32_t)uprv_strtoul(name, NULL, 10);
417     } else {
418         *pPlatform=UCNV_UNKNOWN;
419         *pCCSID=0;
420     }
421 }
422 
423 static void
readHeader(ConvData * data,FileStream * convFile,const char * converterName,UErrorCode * pErrorCode)424 readHeader(ConvData *data,
425            FileStream* convFile,
426            const char* converterName,
427            UErrorCode *pErrorCode) {
428     char line[200];
429     char *s, *key, *value;
430     const UConverterStaticData *prototype;
431     UConverterStaticData *staticData;
432 
433     if(U_FAILURE(*pErrorCode)) {
434         return;
435     }
436 
437     staticData=&data->staticData;
438     staticData->platform=UCNV_IBM;
439     staticData->subCharLen=0;
440 
441     while(T_FileStream_readLine(convFile, line, sizeof(line))) {
442         /* basic parsing and handling of state-related items */
443         if(ucm_parseHeaderLine(data->ucm, line, &key, &value)) {
444             continue;
445         }
446 
447         /* stop at the beginning of the mapping section */
448         if(uprv_strcmp(line, "CHARMAP")==0) {
449             break;
450         }
451 
452         /* collect the information from the header field, ignore unknown keys */
453         if(uprv_strcmp(key, "code_set_name")==0) {
454             if(*value!=0) {
455                 uprv_strcpy((char *)staticData->name, value);
456                 getPlatformAndCCSIDFromName(value, &staticData->platform, &staticData->codepage);
457             }
458         } else if(uprv_strcmp(key, "subchar")==0) {
459             uint8_t bytes[UCNV_EXT_MAX_BYTES];
460             int8_t length;
461 
462             s=value;
463             length=ucm_parseBytes(bytes, line, (const char **)&s);
464             if(1<=length && length<=4 && *s==0) {
465                 staticData->subCharLen=length;
466                 uprv_memcpy(staticData->subChar, bytes, length);
467             } else {
468                 fprintf(stderr, "error: illegal <subchar> %s\n", value);
469                 *pErrorCode=U_INVALID_TABLE_FORMAT;
470                 return;
471             }
472         } else if(uprv_strcmp(key, "subchar1")==0) {
473             uint8_t bytes[UCNV_EXT_MAX_BYTES];
474 
475             s=value;
476             if(1==ucm_parseBytes(bytes, line, (const char **)&s) && *s==0) {
477                 staticData->subChar1=bytes[0];
478             } else {
479                 fprintf(stderr, "error: illegal <subchar1> %s\n", value);
480                 *pErrorCode=U_INVALID_TABLE_FORMAT;
481                 return;
482             }
483         }
484     }
485 
486     /* copy values from the UCMFile to the static data */
487     staticData->maxBytesPerChar=(int8_t)data->ucm->states.maxCharLength;
488     staticData->minBytesPerChar=(int8_t)data->ucm->states.minCharLength;
489     staticData->conversionType=data->ucm->states.conversionType;
490 
491     if(staticData->conversionType==UCNV_UNSUPPORTED_CONVERTER) {
492         fprintf(stderr, "ucm error: missing conversion type (<uconv_class>)\n");
493         *pErrorCode=U_INVALID_TABLE_FORMAT;
494         return;
495     }
496 
497     /*
498      * Now that we know the type, copy any 'default' values from the table.
499      * We need not check the type any further because the parser only
500      * recognizes what we have prototypes for.
501      *
502      * For delta (extension-only) tables, copy values from the base file
503      * instead, see createConverter().
504      */
505     if(data->ucm->baseName[0]==0) {
506         prototype=ucnv_converterStaticData[staticData->conversionType];
507         if(prototype!=NULL) {
508             if(staticData->name[0]==0) {
509                 uprv_strcpy((char *)staticData->name, prototype->name);
510             }
511 
512             if(staticData->codepage==0) {
513                 staticData->codepage=prototype->codepage;
514             }
515 
516             if(staticData->platform==0) {
517                 staticData->platform=prototype->platform;
518             }
519 
520             if(staticData->minBytesPerChar==0) {
521                 staticData->minBytesPerChar=prototype->minBytesPerChar;
522             }
523 
524             if(staticData->maxBytesPerChar==0) {
525                 staticData->maxBytesPerChar=prototype->maxBytesPerChar;
526             }
527 
528             if(staticData->subCharLen==0) {
529                 staticData->subCharLen=prototype->subCharLen;
530                 if(prototype->subCharLen>0) {
531                     uprv_memcpy(staticData->subChar, prototype->subChar, prototype->subCharLen);
532                 }
533             }
534         }
535     }
536 
537     if(data->ucm->states.outputType<0) {
538         data->ucm->states.outputType=(int8_t)data->ucm->states.maxCharLength-1;
539     }
540 
541     if( staticData->subChar1!=0 &&
542             (staticData->minBytesPerChar>1 ||
543                 (staticData->conversionType!=UCNV_MBCS &&
544                  staticData->conversionType!=UCNV_EBCDIC_STATEFUL))
545     ) {
546         fprintf(stderr, "error: <subchar1> defined for a type other than MBCS or EBCDIC_STATEFUL\n");
547         *pErrorCode=U_INVALID_TABLE_FORMAT;
548     }
549 }
550 
551 /* return TRUE if a base table was read, FALSE for an extension table */
552 static UBool
readFile(ConvData * data,const char * converterName,UErrorCode * pErrorCode)553 readFile(ConvData *data, const char* converterName,
554          UErrorCode *pErrorCode) {
555     char line[200];
556     char *end;
557     FileStream *convFile;
558 
559     UCMStates *baseStates;
560     UBool dataIsBase;
561 
562     if(U_FAILURE(*pErrorCode)) {
563         return FALSE;
564     }
565 
566     data->ucm=ucm_open();
567 
568     convFile=T_FileStream_open(converterName, "r");
569     if(convFile==NULL) {
570         *pErrorCode=U_FILE_ACCESS_ERROR;
571         return FALSE;
572     }
573 
574     readHeader(data, convFile, converterName, pErrorCode);
575     if(U_FAILURE(*pErrorCode)) {
576         return FALSE;
577     }
578 
579     if(data->ucm->baseName[0]==0) {
580         dataIsBase=TRUE;
581         baseStates=&data->ucm->states;
582         ucm_processStates(baseStates);
583     } else {
584         dataIsBase=FALSE;
585         baseStates=NULL;
586     }
587 
588     /* read the base table */
589     ucm_readTable(data->ucm, convFile, dataIsBase, baseStates, pErrorCode);
590     if(U_FAILURE(*pErrorCode)) {
591         return FALSE;
592     }
593 
594     /* read an extension table if there is one */
595     while(T_FileStream_readLine(convFile, line, sizeof(line))) {
596         end=uprv_strchr(line, 0);
597         while(line<end &&
598               (*(end-1)=='\n' || *(end-1)=='\r' || *(end-1)==' ' || *(end-1)=='\t')) {
599             --end;
600         }
601         *end=0;
602 
603         if(line[0]=='#' || u_skipWhitespace(line)==end) {
604             continue; /* ignore empty and comment lines */
605         }
606 
607         if(0==uprv_strcmp(line, "CHARMAP")) {
608             /* read the extension table */
609             ucm_readTable(data->ucm, convFile, FALSE, baseStates, pErrorCode);
610         } else {
611             fprintf(stderr, "unexpected text after the base mapping table\n");
612         }
613         break;
614     }
615 
616     T_FileStream_close(convFile);
617 
618     if(data->ucm->base->flagsType==UCM_FLAGS_MIXED || data->ucm->ext->flagsType==UCM_FLAGS_MIXED) {
619         fprintf(stderr, "error: some entries have the mapping precision (with '|'), some do not\n");
620         *pErrorCode=U_INVALID_TABLE_FORMAT;
621     }
622 
623     return dataIsBase;
624 }
625 
626 static void
createConverter(ConvData * data,const char * converterName,UErrorCode * pErrorCode)627 createConverter(ConvData *data, const char *converterName, UErrorCode *pErrorCode) {
628     ConvData baseData;
629     UBool dataIsBase;
630 
631     UConverterStaticData *staticData;
632     UCMStates *states, *baseStates;
633 
634     if(U_FAILURE(*pErrorCode)) {
635         return;
636     }
637 
638     initConvData(data);
639 
640     dataIsBase=readFile(data, converterName, pErrorCode);
641     if(U_FAILURE(*pErrorCode)) {
642         return;
643     }
644 
645     staticData=&data->staticData;
646     states=&data->ucm->states;
647 
648     if(dataIsBase) {
649         /*
650          * Build a normal .cnv file with a base table
651          * and an optional extension table.
652          */
653         data->cnvData=MBCSOpen(data->ucm);
654         if(data->cnvData==NULL) {
655             *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
656 
657         } else if(!data->cnvData->isValid(data->cnvData,
658                             staticData->subChar, staticData->subCharLen)
659         ) {
660             fprintf(stderr, "       the substitution character byte sequence is illegal in this codepage structure!\n");
661             *pErrorCode=U_INVALID_TABLE_FORMAT;
662 
663         } else if(staticData->subChar1!=0 &&
664                     !data->cnvData->isValid(data->cnvData, &staticData->subChar1, 1)
665         ) {
666             fprintf(stderr, "       the subchar1 byte is illegal in this codepage structure!\n");
667             *pErrorCode=U_INVALID_TABLE_FORMAT;
668 
669         } else if(
670             data->ucm->ext->mappingsLength>0 &&
671             !ucm_checkBaseExt(states, data->ucm->base, data->ucm->ext, data->ucm->ext, FALSE)
672         ) {
673             *pErrorCode=U_INVALID_TABLE_FORMAT;
674         } else if(data->ucm->base->flagsType&UCM_FLAGS_EXPLICIT) {
675             /* sort the table so that it can be turned into UTF-8-friendly data */
676             ucm_sortTable(data->ucm->base);
677         }
678 
679         if(U_SUCCESS(*pErrorCode)) {
680             if(
681                 /* add the base table after ucm_checkBaseExt()! */
682                 !data->cnvData->addTable(data->cnvData, data->ucm->base, &data->staticData)
683             ) {
684                 *pErrorCode=U_INVALID_TABLE_FORMAT;
685             } else {
686                 /*
687                  * addTable() may have requested moving more mappings to the extension table
688                  * if they fit into the base toUnicode table but not into the
689                  * base fromUnicode table.
690                  * (Especially for UTF-8-friendly fromUnicode tables.)
691                  * Such mappings will have the MBCS_FROM_U_EXT_FLAG set, which causes them
692                  * to be excluded from the extension toUnicode data.
693                  * See MBCSOkForBaseFromUnicode() for which mappings do not fit into
694                  * the base fromUnicode table.
695                  */
696                 ucm_moveMappings(data->ucm->base, data->ucm->ext);
697                 ucm_sortTable(data->ucm->ext);
698                 if(data->ucm->ext->mappingsLength>0) {
699                     /* prepare the extension table, if there is one */
700                     data->extData=CnvExtOpen(data->ucm);
701                     if(data->extData==NULL) {
702                         *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
703                     } else if(
704                         !data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)
705                     ) {
706                         *pErrorCode=U_INVALID_TABLE_FORMAT;
707                     }
708                 }
709             }
710         }
711     } else {
712         /* Build an extension-only .cnv file. */
713         char baseFilename[500];
714         char *basename;
715 
716         initConvData(&baseData);
717 
718         /* assemble a path/filename for data->ucm->baseName */
719         uprv_strcpy(baseFilename, converterName);
720         basename=(char *)findBasename(baseFilename);
721         uprv_strcpy(basename, data->ucm->baseName);
722         uprv_strcat(basename, ".ucm");
723 
724         /* read the base table */
725         dataIsBase=readFile(&baseData, baseFilename, pErrorCode);
726         if(U_FAILURE(*pErrorCode)) {
727             return;
728         } else if(!dataIsBase) {
729             fprintf(stderr, "error: the <icu:base> file \"%s\" is not a base table file\n", baseFilename);
730             *pErrorCode=U_INVALID_TABLE_FORMAT;
731         } else {
732             /* prepare the extension table */
733             data->extData=CnvExtOpen(data->ucm);
734             if(data->extData==NULL) {
735                 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
736             } else {
737                 /* fill in gaps in extension file header fields */
738                 UCMapping *m, *mLimit;
739                 uint8_t fallbackFlags;
740 
741                 baseStates=&baseData.ucm->states;
742                 if(states->conversionType==UCNV_DBCS) {
743                     staticData->minBytesPerChar=(int8_t)(states->minCharLength=2);
744                 } else if(states->minCharLength==0) {
745                     staticData->minBytesPerChar=(int8_t)(states->minCharLength=baseStates->minCharLength);
746                 }
747                 if(states->maxCharLength<states->minCharLength) {
748                     staticData->maxBytesPerChar=(int8_t)(states->maxCharLength=baseStates->maxCharLength);
749                 }
750 
751                 if(staticData->subCharLen==0) {
752                     uprv_memcpy(staticData->subChar, baseData.staticData.subChar, 4);
753                     staticData->subCharLen=baseData.staticData.subCharLen;
754                 }
755                 /*
756                  * do not copy subChar1 -
757                  * only use what is explicitly specified
758                  * because it cannot be unset in the extension file header
759                  */
760 
761                 /* get the fallback flags */
762                 fallbackFlags=0;
763                 for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength;
764                     m<mLimit && fallbackFlags!=3;
765                     ++m
766                 ) {
767                     if(m->f==1) {
768                         fallbackFlags|=1;
769                     } else if(m->f==3) {
770                         fallbackFlags|=2;
771                     }
772                 }
773 
774                 if(fallbackFlags&1) {
775                     staticData->hasFromUnicodeFallback=TRUE;
776                 }
777                 if(fallbackFlags&2) {
778                     staticData->hasToUnicodeFallback=TRUE;
779                 }
780 
781                 if(1!=ucm_countChars(baseStates, staticData->subChar, staticData->subCharLen)) {
782                     fprintf(stderr, "       the substitution character byte sequence is illegal in this codepage structure!\n");
783                     *pErrorCode=U_INVALID_TABLE_FORMAT;
784 
785                 } else if(1!=ucm_countChars(baseStates, &staticData->subChar1, 1)) {
786                     fprintf(stderr, "       the subchar1 byte is illegal in this codepage structure!\n");
787                     *pErrorCode=U_INVALID_TABLE_FORMAT;
788 
789                 } else if(
790                     !ucm_checkValidity(data->ucm->ext, baseStates) ||
791                     !ucm_checkBaseExt(baseStates, baseData.ucm->base, data->ucm->ext, data->ucm->ext, FALSE)
792                 ) {
793                     *pErrorCode=U_INVALID_TABLE_FORMAT;
794                 } else {
795                     if(states->maxCharLength>1) {
796                         /*
797                          * When building a normal .cnv file with a base table
798                          * for an MBCS (not SBCS) table with explicit precision flags,
799                          * the MBCSAddTable() function marks some mappings for moving
800                          * to the extension table.
801                          * They fit into the base toUnicode table but not into the
802                          * base fromUnicode table.
803                          * (Note: We do have explicit precision flags because they are
804                          * required for extension table generation, and
805                          * ucm_checkBaseExt() verified it.)
806                          *
807                          * We do not call MBCSAddTable() here (we probably could)
808                          * so we need to do the analysis before building the extension table.
809                          * We assume that MBCSAddTable() will build a UTF-8-friendly table.
810                          * Redundant mappings in the extension table are ok except they cost some size.
811                          *
812                          * Do this after ucm_checkBaseExt().
813                          */
814                         const MBCSData *mbcsData=MBCSGetDummy();
815                         int32_t needsMove=0;
816                         for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength;
817                             m<mLimit;
818                             ++m
819                         ) {
820                             if(!MBCSOkForBaseFromUnicode(mbcsData, m->b.bytes, m->bLen, m->u, m->f)) {
821                                 m->f|=MBCS_FROM_U_EXT_FLAG;
822                                 m->moveFlag=UCM_MOVE_TO_EXT;
823                                 ++needsMove;
824                             }
825                         }
826 
827                         if(needsMove!=0) {
828                             ucm_moveMappings(baseData.ucm->base, data->ucm->ext);
829                             ucm_sortTable(data->ucm->ext);
830                         }
831                     }
832                     if(!data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)) {
833                         *pErrorCode=U_INVALID_TABLE_FORMAT;
834                     }
835                 }
836             }
837         }
838 
839         cleanupConvData(&baseData);
840     }
841 }
842 
843 /*
844  * Hey, Emacs, please set the following:
845  *
846  * Local Variables:
847  * indent-tabs-mode: nil
848  * End:
849  *
850  */
851