• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4  ********************************************************************************
5  *
6  *   Copyright (C) 1998-2015, International Business Machines
7  *   Corporation and others.  All Rights Reserved.
8  *
9  ********************************************************************************
10  *
11  *
12  *  makeconv.cpp:
13  *  tool creating a binary (compressed) representation of the conversion mapping
14  *  table (IBM NLTC ucmap format).
15  *
16  *  05/04/2000    helena     Added fallback mapping into the picture...
17  *  06/29/2000  helena      Major rewrite of the callback APIs.
18  */
19 
20 #include <stdio.h>
21 #include "unicode/putil.h"
22 #include "unicode/ucnv_err.h"
23 #include "charstr.h"
24 #include "ucnv_bld.h"
25 #include "ucnv_imp.h"
26 #include "ucnv_cnv.h"
27 #include "cstring.h"
28 #include "cmemory.h"
29 #include "uinvchar.h"
30 #include "filestrm.h"
31 #include "toolutil.h"
32 #include "uoptions.h"
33 #include "unicode/udata.h"
34 #include "unewdata.h"
35 #include "uparse.h"
36 #include "ucm.h"
37 #include "makeconv.h"
38 #include "genmbcs.h"
39 
40 #define DEBUG 0
41 
42 typedef struct ConvData {
43     UCMFile *ucm;
44     NewConverter *cnvData, *extData;
45     UConverterSharedData sharedData;
46     UConverterStaticData staticData;
47 } ConvData;
48 
49 static void
initConvData(ConvData * data)50 initConvData(ConvData *data) {
51     uprv_memset(data, 0, sizeof(ConvData));
52     data->sharedData.structSize=sizeof(UConverterSharedData);
53     data->staticData.structSize=sizeof(UConverterStaticData);
54     data->sharedData.staticData=&data->staticData;
55 }
56 
57 static void
cleanupConvData(ConvData * data)58 cleanupConvData(ConvData *data) {
59     if(data!=NULL) {
60         if(data->cnvData!=NULL) {
61             data->cnvData->close(data->cnvData);
62             data->cnvData=NULL;
63         }
64         if(data->extData!=NULL) {
65             data->extData->close(data->extData);
66             data->extData=NULL;
67         }
68         ucm_close(data->ucm);
69         data->ucm=NULL;
70     }
71 }
72 
73 /*
74  * from ucnvstat.c - static prototypes of data-based converters
75  */
76 U_CAPI const UConverterStaticData * ucnv_converterStaticData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES];
77 
78 /*
79  * Global - verbosity
80  */
81 UBool VERBOSE = FALSE;
82 UBool QUIET = FALSE;
83 UBool SMALL = FALSE;
84 UBool IGNORE_SISO_CHECK = FALSE;
85 
86 static void
87 createConverter(ConvData *data, const char* converterName, UErrorCode *pErrorCode);
88 
89 /*
90  * Set up the UNewData and write the converter..
91  */
92 static void
93 writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status);
94 
95 UBool haveCopyright=TRUE;
96 
97 static UDataInfo dataInfo={
98     sizeof(UDataInfo),
99     0,
100 
101     U_IS_BIG_ENDIAN,
102     U_CHARSET_FAMILY,
103     sizeof(UChar),
104     0,
105 
106     {0x63, 0x6e, 0x76, 0x74},     /* dataFormat="cnvt" */
107     {6, 2, 0, 0},                 /* formatVersion */
108     {0, 0, 0, 0}                  /* dataVersion (calculated at runtime) */
109 };
110 
111 static void
writeConverterData(ConvData * data,const char * cnvName,const char * cnvDir,UErrorCode * status)112 writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status)
113 {
114     UNewDataMemory *mem = NULL;
115     uint32_t sz2;
116     uint32_t size = 0;
117     int32_t tableType;
118 
119     if(U_FAILURE(*status))
120       {
121         return;
122       }
123 
124     tableType=TABLE_NONE;
125     if(data->cnvData!=NULL) {
126         tableType|=TABLE_BASE;
127     }
128     if(data->extData!=NULL) {
129         tableType|=TABLE_EXT;
130     }
131 
132     mem = udata_create(cnvDir, "cnv", cnvName, &dataInfo, haveCopyright ? U_COPYRIGHT_STRING : NULL, status);
133 
134     if(U_FAILURE(*status))
135       {
136         fprintf(stderr, "Couldn't create the udata %s.%s: %s\n",
137                 cnvName,
138                 "cnv",
139                 u_errorName(*status));
140         return;
141       }
142 
143     if(VERBOSE)
144       {
145         printf("- Opened udata %s.%s\n", cnvName, "cnv");
146       }
147 
148 
149     /* all read only, clean, platform independent data.  Mmmm. :)  */
150     udata_writeBlock(mem, &data->staticData, sizeof(UConverterStaticData));
151     size += sizeof(UConverterStaticData); /* Is 4-aligned  - by size */
152     /* Now, write the table */
153     if(tableType&TABLE_BASE) {
154         size += data->cnvData->write(data->cnvData, &data->staticData, mem, tableType);
155     }
156     if(tableType&TABLE_EXT) {
157         size += data->extData->write(data->extData, &data->staticData, mem, tableType);
158     }
159 
160     sz2 = udata_finish(mem, status);
161     if(size != sz2)
162     {
163         fprintf(stderr, "error: wrote %u bytes to the .cnv file but counted %u bytes\n", (int)sz2, (int)size);
164         *status=U_INTERNAL_PROGRAM_ERROR;
165     }
166     if(VERBOSE)
167     {
168       printf("- Wrote %u bytes to the udata.\n", (int)sz2);
169     }
170 }
171 
172 enum {
173     OPT_HELP_H,
174     OPT_HELP_QUESTION_MARK,
175     OPT_COPYRIGHT,
176     OPT_VERSION,
177     OPT_DESTDIR,
178     OPT_VERBOSE,
179     OPT_SMALL,
180     OPT_IGNORE_SISO_CHECK,
181     OPT_QUIET,
182 
183     OPT_COUNT
184 };
185 
186 static UOption options[]={
187     UOPTION_HELP_H,
188     UOPTION_HELP_QUESTION_MARK,
189     UOPTION_COPYRIGHT,
190     UOPTION_VERSION,
191     UOPTION_DESTDIR,
192     UOPTION_VERBOSE,
193     { "small", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 },
194     { "ignore-siso-check", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 },
195     UOPTION_QUIET,
196 };
197 
main(int argc,char * argv[])198 int main(int argc, char* argv[])
199 {
200     ConvData data;
201     char cnvName[UCNV_MAX_FULL_FILE_NAME_LENGTH];
202 
203     U_MAIN_INIT_ARGS(argc, argv);
204 
205     /* Set up the ICU version number */
206     UVersionInfo icuVersion;
207     u_getVersion(icuVersion);
208     uprv_memcpy(&dataInfo.dataVersion, &icuVersion, sizeof(UVersionInfo));
209 
210     /* preset then read command line options */
211     options[OPT_DESTDIR].value=u_getDataDirectory();
212     argc=u_parseArgs(argc, argv, UPRV_LENGTHOF(options), options);
213 
214     /* error handling, printing usage message */
215     if(argc<0) {
216         fprintf(stderr,
217             "error in command line argument \"%s\"\n",
218             argv[-argc]);
219     } else if(argc<2) {
220         argc=-1;
221     }
222     if(argc<0 || options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur) {
223         FILE *stdfile=argc<0 ? stderr : stdout;
224         fprintf(stdfile,
225             "usage: %s [-options] files...\n"
226             "\tread .ucm codepage mapping files and write .cnv files\n"
227             "options:\n"
228             "\t-h or -? or --help  this usage text\n"
229             "\t-V or --version     show a version message\n"
230             "\t-c or --copyright   include a copyright notice\n"
231             "\t-d or --destdir     destination directory, followed by the path\n"
232             "\t-v or --verbose     Turn on verbose output\n"
233             "\t-q or --quiet       do not display warnings and progress\n",
234             argv[0]);
235         fprintf(stdfile,
236             "\t      --small       Generate smaller .cnv files. They will be\n"
237             "\t                    significantly smaller but may not be compatible with\n"
238             "\t                    older versions of ICU and will require heap memory\n"
239             "\t                    allocation when loaded.\n"
240             "\t      --ignore-siso-check         Use SI/SO other than 0xf/0xe.\n");
241         return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
242     }
243 
244     if(options[OPT_VERSION].doesOccur) {
245         printf("makeconv version %u.%u, ICU tool to read .ucm codepage mapping files and write .cnv files\n",
246                dataInfo.formatVersion[0], dataInfo.formatVersion[1]);
247         printf("%s\n", U_COPYRIGHT_STRING);
248         exit(0);
249     }
250 
251     /* get the options values */
252     haveCopyright = options[OPT_COPYRIGHT].doesOccur;
253     const char *destdir = options[OPT_DESTDIR].value;
254     VERBOSE = options[OPT_VERBOSE].doesOccur;
255     QUIET = options[OPT_QUIET].doesOccur;
256     SMALL = options[OPT_SMALL].doesOccur;
257 
258     if (options[OPT_IGNORE_SISO_CHECK].doesOccur) {
259         IGNORE_SISO_CHECK = TRUE;
260     }
261 
262     icu::CharString outFileName;
263     UErrorCode err = U_ZERO_ERROR;
264     if (destdir != NULL && *destdir != 0) {
265         outFileName.append(destdir, err).ensureEndsWithFileSeparator(err);
266         if (U_FAILURE(err)) {
267             return err;
268         }
269     }
270     int32_t outBasenameStart = outFileName.length();
271 
272 #if DEBUG
273     {
274       int i;
275       printf("makeconv: processing %d files...\n", argc - 1);
276       for(i=1; i<argc; ++i) {
277         printf("%s ", argv[i]);
278       }
279       printf("\n");
280       fflush(stdout);
281     }
282 #endif
283 
284     UBool printFilename = (UBool) (argc > 2 || VERBOSE);
285     for (++argv; --argc; ++argv)
286     {
287         UErrorCode localError = U_ZERO_ERROR;
288         const char *arg = getLongPathname(*argv);
289 
290         /*produces the right destination path for display*/
291         outFileName.truncate(outBasenameStart);
292         if (outBasenameStart != 0)
293         {
294             /* find the last file sepator */
295             const char *basename = findBasename(arg);
296             outFileName.append(basename, localError);
297         }
298         else
299         {
300             outFileName.append(arg, localError);
301         }
302         if (U_FAILURE(localError)) {
303             return localError;
304         }
305 
306         /*removes the extension if any is found*/
307         int32_t lastDotIndex = outFileName.lastIndexOf('.');
308         if (lastDotIndex >= outBasenameStart) {
309             outFileName.truncate(lastDotIndex);
310         }
311 
312         /* the basename without extension is the converter name */
313         if ((outFileName.length() - outBasenameStart) >= UPRV_LENGTHOF(cnvName)) {
314             fprintf(stderr, "converter name %s too long\n", outFileName.data() + outBasenameStart);
315             return U_BUFFER_OVERFLOW_ERROR;
316         }
317         uprv_strcpy(cnvName, outFileName.data() + outBasenameStart);
318 
319         /*Adds the target extension*/
320         outFileName.append(CONVERTER_FILE_EXTENSION, localError);
321         if (U_FAILURE(localError)) {
322             return localError;
323         }
324 
325 #if DEBUG
326         printf("makeconv: processing %s  ...\n", arg);
327         fflush(stdout);
328 #endif
329         initConvData(&data);
330         createConverter(&data, arg, &localError);
331 
332         if (U_FAILURE(localError))
333         {
334             /* if an error is found, print out an error msg and keep going */
335             fprintf(stderr, "Error creating converter for \"%s\" file for \"%s\" (%s)\n",
336                     outFileName.data(), arg, u_errorName(localError));
337             if(U_SUCCESS(err)) {
338                 err = localError;
339             }
340         }
341         else
342         {
343             /* Insure the static data name matches the  file name */
344             /* Changed to ignore directory and only compare base name
345              LDH 1/2/08*/
346             char *p;
347             p = strrchr(cnvName, U_FILE_SEP_CHAR); /* Find last file separator */
348 
349             if(p == NULL)            /* OK, try alternate */
350             {
351                 p = strrchr(cnvName, U_FILE_ALT_SEP_CHAR);
352                 if(p == NULL)
353                 {
354                     p=cnvName; /* If no separators, no problem */
355                 }
356             }
357             else
358             {
359                 p++;   /* If found separator, don't include it in compare */
360             }
361             if(uprv_stricmp(p,data.staticData.name) && !QUIET)
362             {
363                 fprintf(stderr, "Warning: %s%s claims to be '%s'\n",
364                     cnvName,  CONVERTER_FILE_EXTENSION,
365                     data.staticData.name);
366             }
367 
368             uprv_strcpy((char*)data.staticData.name, cnvName);
369 
370             if(!uprv_isInvariantString((char*)data.staticData.name, -1)) {
371                 fprintf(stderr,
372                     "Error: A converter name must contain only invariant characters.\n"
373                     "%s is not a valid converter name.\n",
374                     data.staticData.name);
375                 if(U_SUCCESS(err)) {
376                     err = U_INVALID_TABLE_FORMAT;
377                 }
378             }
379 
380             localError = U_ZERO_ERROR;
381             writeConverterData(&data, cnvName, destdir, &localError);
382 
383             if(U_FAILURE(localError))
384             {
385                 /* if an error is found, print out an error msg and keep going*/
386                 fprintf(stderr, "Error writing \"%s\" file for \"%s\" (%s)\n", outFileName.data(), arg,
387                     u_errorName(localError));
388                 if(U_SUCCESS(err)) {
389                     err = localError;
390                 }
391             }
392             else if (printFilename)
393             {
394                 puts(outFileName.data() + outBasenameStart);
395             }
396         }
397         fflush(stdout);
398         fflush(stderr);
399 
400         cleanupConvData(&data);
401     }
402 
403     return err;
404 }
405 
406 static void
getPlatformAndCCSIDFromName(const char * name,int8_t * pPlatform,int32_t * pCCSID)407 getPlatformAndCCSIDFromName(const char *name, int8_t *pPlatform, int32_t *pCCSID) {
408     if( (name[0]=='i' || name[0]=='I') &&
409         (name[1]=='b' || name[1]=='B') &&
410         (name[2]=='m' || name[2]=='M')
411     ) {
412         name+=3;
413         if(*name=='-') {
414             ++name;
415         }
416         *pPlatform=UCNV_IBM;
417         *pCCSID=(int32_t)uprv_strtoul(name, NULL, 10);
418     } else {
419         *pPlatform=UCNV_UNKNOWN;
420         *pCCSID=0;
421     }
422 }
423 
424 static void
readHeader(ConvData * data,FileStream * convFile,UErrorCode * pErrorCode)425 readHeader(ConvData *data,
426            FileStream* convFile,
427            UErrorCode *pErrorCode) {
428     char line[1024];
429     char *s, *key, *value;
430     const UConverterStaticData *prototype;
431     UConverterStaticData *staticData;
432 
433     if(U_FAILURE(*pErrorCode)) {
434         return;
435     }
436 
437     staticData=&data->staticData;
438     staticData->platform=UCNV_IBM;
439     staticData->subCharLen=0;
440 
441     while(T_FileStream_readLine(convFile, line, sizeof(line))) {
442         /* basic parsing and handling of state-related items */
443         if(ucm_parseHeaderLine(data->ucm, line, &key, &value)) {
444             continue;
445         }
446 
447         /* stop at the beginning of the mapping section */
448         if(uprv_strcmp(line, "CHARMAP")==0) {
449             break;
450         }
451 
452         /* collect the information from the header field, ignore unknown keys */
453         if(uprv_strcmp(key, "code_set_name")==0) {
454             if(*value!=0) {
455                 uprv_strcpy((char *)staticData->name, value);
456                 getPlatformAndCCSIDFromName(value, &staticData->platform, &staticData->codepage);
457             }
458         } else if(uprv_strcmp(key, "subchar")==0) {
459             uint8_t bytes[UCNV_EXT_MAX_BYTES];
460             int8_t length;
461 
462             s=value;
463             length=ucm_parseBytes(bytes, line, (const char **)&s);
464             if(1<=length && length<=4 && *s==0) {
465                 staticData->subCharLen=length;
466                 uprv_memcpy(staticData->subChar, bytes, length);
467             } else {
468                 fprintf(stderr, "error: illegal <subchar> %s\n", value);
469                 *pErrorCode=U_INVALID_TABLE_FORMAT;
470                 return;
471             }
472         } else if(uprv_strcmp(key, "subchar1")==0) {
473             uint8_t bytes[UCNV_EXT_MAX_BYTES];
474 
475             s=value;
476             if(1==ucm_parseBytes(bytes, line, (const char **)&s) && *s==0) {
477                 staticData->subChar1=bytes[0];
478             } else {
479                 fprintf(stderr, "error: illegal <subchar1> %s\n", value);
480                 *pErrorCode=U_INVALID_TABLE_FORMAT;
481                 return;
482             }
483         }
484     }
485 
486     /* copy values from the UCMFile to the static data */
487     staticData->maxBytesPerChar=(int8_t)data->ucm->states.maxCharLength;
488     staticData->minBytesPerChar=(int8_t)data->ucm->states.minCharLength;
489     staticData->conversionType=data->ucm->states.conversionType;
490 
491     if(staticData->conversionType==UCNV_UNSUPPORTED_CONVERTER) {
492         fprintf(stderr, "ucm error: missing conversion type (<uconv_class>)\n");
493         *pErrorCode=U_INVALID_TABLE_FORMAT;
494         return;
495     }
496 
497     /*
498      * Now that we know the type, copy any 'default' values from the table.
499      * We need not check the type any further because the parser only
500      * recognizes what we have prototypes for.
501      *
502      * For delta (extension-only) tables, copy values from the base file
503      * instead, see createConverter().
504      */
505     if(data->ucm->baseName[0]==0) {
506         prototype=ucnv_converterStaticData[staticData->conversionType];
507         if(prototype!=NULL) {
508             if(staticData->name[0]==0) {
509                 uprv_strcpy((char *)staticData->name, prototype->name);
510             }
511 
512             if(staticData->codepage==0) {
513                 staticData->codepage=prototype->codepage;
514             }
515 
516             if(staticData->platform==0) {
517                 staticData->platform=prototype->platform;
518             }
519 
520             if(staticData->minBytesPerChar==0) {
521                 staticData->minBytesPerChar=prototype->minBytesPerChar;
522             }
523 
524             if(staticData->maxBytesPerChar==0) {
525                 staticData->maxBytesPerChar=prototype->maxBytesPerChar;
526             }
527 
528             if(staticData->subCharLen==0) {
529                 staticData->subCharLen=prototype->subCharLen;
530                 if(prototype->subCharLen>0) {
531                     uprv_memcpy(staticData->subChar, prototype->subChar, prototype->subCharLen);
532                 }
533             }
534         }
535     }
536 
537     if(data->ucm->states.outputType<0) {
538         data->ucm->states.outputType=(int8_t)data->ucm->states.maxCharLength-1;
539     }
540 
541     if( staticData->subChar1!=0 &&
542             (staticData->minBytesPerChar>1 ||
543                 (staticData->conversionType!=UCNV_MBCS &&
544                  staticData->conversionType!=UCNV_EBCDIC_STATEFUL))
545     ) {
546         fprintf(stderr, "error: <subchar1> defined for a type other than MBCS or EBCDIC_STATEFUL\n");
547         *pErrorCode=U_INVALID_TABLE_FORMAT;
548     }
549 }
550 
551 /* return TRUE if a base table was read, FALSE for an extension table */
552 static UBool
readFile(ConvData * data,const char * converterName,UErrorCode * pErrorCode)553 readFile(ConvData *data, const char* converterName,
554          UErrorCode *pErrorCode) {
555     char line[1024];
556     char *end;
557     FileStream *convFile;
558 
559     UCMStates *baseStates;
560     UBool dataIsBase;
561 
562     if(U_FAILURE(*pErrorCode)) {
563         return FALSE;
564     }
565 
566     data->ucm=ucm_open();
567 
568     convFile=T_FileStream_open(converterName, "r");
569     if(convFile==NULL) {
570         *pErrorCode=U_FILE_ACCESS_ERROR;
571         return FALSE;
572     }
573 
574     readHeader(data, convFile, pErrorCode);
575     if(U_FAILURE(*pErrorCode)) {
576         return FALSE;
577     }
578 
579     if(data->ucm->baseName[0]==0) {
580         dataIsBase=TRUE;
581         baseStates=&data->ucm->states;
582         ucm_processStates(baseStates, IGNORE_SISO_CHECK);
583     } else {
584         dataIsBase=FALSE;
585         baseStates=NULL;
586     }
587 
588     /* read the base table */
589     ucm_readTable(data->ucm, convFile, dataIsBase, baseStates, pErrorCode);
590     if(U_FAILURE(*pErrorCode)) {
591         return FALSE;
592     }
593 
594     /* read an extension table if there is one */
595     while(T_FileStream_readLine(convFile, line, sizeof(line))) {
596         end=uprv_strchr(line, 0);
597         while(line<end &&
598               (*(end-1)=='\n' || *(end-1)=='\r' || *(end-1)==' ' || *(end-1)=='\t')) {
599             --end;
600         }
601         *end=0;
602 
603         if(line[0]=='#' || u_skipWhitespace(line)==end) {
604             continue; /* ignore empty and comment lines */
605         }
606 
607         if(0==uprv_strcmp(line, "CHARMAP")) {
608             /* read the extension table */
609             ucm_readTable(data->ucm, convFile, FALSE, baseStates, pErrorCode);
610         } else {
611             fprintf(stderr, "unexpected text after the base mapping table\n");
612         }
613         break;
614     }
615 
616     T_FileStream_close(convFile);
617 
618     if(data->ucm->base->flagsType==UCM_FLAGS_MIXED || data->ucm->ext->flagsType==UCM_FLAGS_MIXED) {
619         fprintf(stderr, "error: some entries have the mapping precision (with '|'), some do not\n");
620         *pErrorCode=U_INVALID_TABLE_FORMAT;
621     }
622 
623     return dataIsBase;
624 }
625 
626 static void
createConverter(ConvData * data,const char * converterName,UErrorCode * pErrorCode)627 createConverter(ConvData *data, const char *converterName, UErrorCode *pErrorCode) {
628     ConvData baseData;
629     UBool dataIsBase;
630 
631     UConverterStaticData *staticData;
632     UCMStates *states, *baseStates;
633 
634     if(U_FAILURE(*pErrorCode)) {
635         return;
636     }
637 
638     initConvData(data);
639 
640     dataIsBase=readFile(data, converterName, pErrorCode);
641     if(U_FAILURE(*pErrorCode)) {
642         return;
643     }
644 
645     staticData=&data->staticData;
646     states=&data->ucm->states;
647 
648     if(dataIsBase) {
649         /*
650          * Build a normal .cnv file with a base table
651          * and an optional extension table.
652          */
653         data->cnvData=MBCSOpen(data->ucm);
654         if(data->cnvData==NULL) {
655             *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
656 
657         } else if(!data->cnvData->isValid(data->cnvData,
658                             staticData->subChar, staticData->subCharLen)
659         ) {
660             fprintf(stderr, "       the substitution character byte sequence is illegal in this codepage structure!\n");
661             *pErrorCode=U_INVALID_TABLE_FORMAT;
662 
663         } else if(staticData->subChar1!=0 &&
664                     !data->cnvData->isValid(data->cnvData, &staticData->subChar1, 1)
665         ) {
666             fprintf(stderr, "       the subchar1 byte is illegal in this codepage structure!\n");
667             *pErrorCode=U_INVALID_TABLE_FORMAT;
668 
669         } else if(
670             data->ucm->ext->mappingsLength>0 &&
671             !ucm_checkBaseExt(states, data->ucm->base, data->ucm->ext, data->ucm->ext, FALSE)
672         ) {
673             *pErrorCode=U_INVALID_TABLE_FORMAT;
674         } else if(data->ucm->base->flagsType&UCM_FLAGS_EXPLICIT) {
675             /* sort the table so that it can be turned into UTF-8-friendly data */
676             ucm_sortTable(data->ucm->base);
677         }
678 
679         if(U_SUCCESS(*pErrorCode)) {
680             if(
681                 /* add the base table after ucm_checkBaseExt()! */
682                 !data->cnvData->addTable(data->cnvData, data->ucm->base, &data->staticData)
683             ) {
684                 *pErrorCode=U_INVALID_TABLE_FORMAT;
685             } else {
686                 /*
687                  * addTable() may have requested moving more mappings to the extension table
688                  * if they fit into the base toUnicode table but not into the
689                  * base fromUnicode table.
690                  * (Especially for UTF-8-friendly fromUnicode tables.)
691                  * Such mappings will have the MBCS_FROM_U_EXT_FLAG set, which causes them
692                  * to be excluded from the extension toUnicode data.
693                  * See MBCSOkForBaseFromUnicode() for which mappings do not fit into
694                  * the base fromUnicode table.
695                  */
696                 ucm_moveMappings(data->ucm->base, data->ucm->ext);
697                 ucm_sortTable(data->ucm->ext);
698                 if(data->ucm->ext->mappingsLength>0) {
699                     /* prepare the extension table, if there is one */
700                     data->extData=CnvExtOpen(data->ucm);
701                     if(data->extData==NULL) {
702                         *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
703                     } else if(
704                         !data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)
705                     ) {
706                         *pErrorCode=U_INVALID_TABLE_FORMAT;
707                     }
708                 }
709             }
710         }
711     } else {
712         /* Build an extension-only .cnv file. */
713         char baseFilename[500];
714         char *basename;
715 
716         initConvData(&baseData);
717 
718         /* assemble a path/filename for data->ucm->baseName */
719         uprv_strcpy(baseFilename, converterName);
720         basename=(char *)findBasename(baseFilename);
721         uprv_strcpy(basename, data->ucm->baseName);
722         uprv_strcat(basename, ".ucm");
723 
724         /* read the base table */
725         dataIsBase=readFile(&baseData, baseFilename, pErrorCode);
726         if(U_FAILURE(*pErrorCode)) {
727             return;
728         } else if(!dataIsBase) {
729             fprintf(stderr, "error: the <icu:base> file \"%s\" is not a base table file\n", baseFilename);
730             *pErrorCode=U_INVALID_TABLE_FORMAT;
731         } else {
732             /* prepare the extension table */
733             data->extData=CnvExtOpen(data->ucm);
734             if(data->extData==NULL) {
735                 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
736             } else {
737                 /* fill in gaps in extension file header fields */
738                 UCMapping *m, *mLimit;
739                 uint8_t fallbackFlags;
740 
741                 baseStates=&baseData.ucm->states;
742                 if(states->conversionType==UCNV_DBCS) {
743                     staticData->minBytesPerChar=(int8_t)(states->minCharLength=2);
744                 } else if(states->minCharLength==0) {
745                     staticData->minBytesPerChar=(int8_t)(states->minCharLength=baseStates->minCharLength);
746                 }
747                 if(states->maxCharLength<states->minCharLength) {
748                     staticData->maxBytesPerChar=(int8_t)(states->maxCharLength=baseStates->maxCharLength);
749                 }
750 
751                 if(staticData->subCharLen==0) {
752                     uprv_memcpy(staticData->subChar, baseData.staticData.subChar, 4);
753                     staticData->subCharLen=baseData.staticData.subCharLen;
754                 }
755                 /*
756                  * do not copy subChar1 -
757                  * only use what is explicitly specified
758                  * because it cannot be unset in the extension file header
759                  */
760 
761                 /* get the fallback flags */
762                 fallbackFlags=0;
763                 for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength;
764                     m<mLimit && fallbackFlags!=3;
765                     ++m
766                 ) {
767                     if(m->f==1) {
768                         fallbackFlags|=1;
769                     } else if(m->f==3) {
770                         fallbackFlags|=2;
771                     }
772                 }
773 
774                 if(fallbackFlags&1) {
775                     staticData->hasFromUnicodeFallback=TRUE;
776                 }
777                 if(fallbackFlags&2) {
778                     staticData->hasToUnicodeFallback=TRUE;
779                 }
780 
781                 if(1!=ucm_countChars(baseStates, staticData->subChar, staticData->subCharLen)) {
782                     fprintf(stderr, "       the substitution character byte sequence is illegal in this codepage structure!\n");
783                     *pErrorCode=U_INVALID_TABLE_FORMAT;
784 
785                 } else if(staticData->subChar1!=0 && 1!=ucm_countChars(baseStates, &staticData->subChar1, 1)) {
786                     fprintf(stderr, "       the subchar1 byte is illegal in this codepage structure!\n");
787                     *pErrorCode=U_INVALID_TABLE_FORMAT;
788 
789                 } else if(
790                     !ucm_checkValidity(data->ucm->ext, baseStates) ||
791                     !ucm_checkBaseExt(baseStates, baseData.ucm->base, data->ucm->ext, data->ucm->ext, FALSE)
792                 ) {
793                     *pErrorCode=U_INVALID_TABLE_FORMAT;
794                 } else {
795                     if(states->maxCharLength>1) {
796                         /*
797                          * When building a normal .cnv file with a base table
798                          * for an MBCS (not SBCS) table with explicit precision flags,
799                          * the MBCSAddTable() function marks some mappings for moving
800                          * to the extension table.
801                          * They fit into the base toUnicode table but not into the
802                          * base fromUnicode table.
803                          * (Note: We do have explicit precision flags because they are
804                          * required for extension table generation, and
805                          * ucm_checkBaseExt() verified it.)
806                          *
807                          * We do not call MBCSAddTable() here (we probably could)
808                          * so we need to do the analysis before building the extension table.
809                          * We assume that MBCSAddTable() will build a UTF-8-friendly table.
810                          * Redundant mappings in the extension table are ok except they cost some size.
811                          *
812                          * Do this after ucm_checkBaseExt().
813                          */
814                         const MBCSData *mbcsData=MBCSGetDummy();
815                         int32_t needsMove=0;
816                         for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength;
817                             m<mLimit;
818                             ++m
819                         ) {
820                             if(!MBCSOkForBaseFromUnicode(mbcsData, m->b.bytes, m->bLen, m->u, m->f)) {
821                                 m->f|=MBCS_FROM_U_EXT_FLAG;
822                                 m->moveFlag=UCM_MOVE_TO_EXT;
823                                 ++needsMove;
824                             }
825                         }
826 
827                         if(needsMove!=0) {
828                             ucm_moveMappings(baseData.ucm->base, data->ucm->ext);
829                             ucm_sortTable(data->ucm->ext);
830                         }
831                     }
832                     if(!data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)) {
833                         *pErrorCode=U_INVALID_TABLE_FORMAT;
834                     }
835                 }
836             }
837         }
838 
839         cleanupConvData(&baseData);
840     }
841 }
842 
843 /*
844  * Hey, Emacs, please set the following:
845  *
846  * Local Variables:
847  * indent-tabs-mode: nil
848  * End:
849  *
850  */
851