• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4  ********************************************************************************
5  *
6  *   Copyright (C) 1998-2015, International Business Machines
7  *   Corporation and others.  All Rights Reserved.
8  *
9  ********************************************************************************
10  *
11  *
12  *  makeconv.cpp:
13  *  tool creating a binary (compressed) representation of the conversion mapping
14  *  table (IBM NLTC ucmap format).
15  *
16  *  05/04/2000    helena     Added fallback mapping into the picture...
17  *  06/29/2000  helena      Major rewrite of the callback APIs.
18  */
19 
20 #include <stdio.h>
21 #include "unicode/putil.h"
22 #include "unicode/ucnv_err.h"
23 #include "charstr.h"
24 #include "ucnv_bld.h"
25 #include "ucnv_imp.h"
26 #include "ucnv_cnv.h"
27 #include "cstring.h"
28 #include "cmemory.h"
29 #include "uinvchar.h"
30 #include "filestrm.h"
31 #include "toolutil.h"
32 #include "uoptions.h"
33 #include "unicode/udata.h"
34 #include "unewdata.h"
35 #include "uparse.h"
36 #include "ucm.h"
37 #include "makeconv.h"
38 #include "genmbcs.h"
39 
40 #define DEBUG 0
41 
42 typedef struct ConvData {
43     UCMFile *ucm;
44     NewConverter *cnvData, *extData;
45     UConverterSharedData sharedData;
46     UConverterStaticData staticData;
47 } ConvData;
48 
49 static void
initConvData(ConvData * data)50 initConvData(ConvData *data) {
51     uprv_memset(data, 0, sizeof(ConvData));
52     data->sharedData.structSize=sizeof(UConverterSharedData);
53     data->staticData.structSize=sizeof(UConverterStaticData);
54     data->sharedData.staticData=&data->staticData;
55 }
56 
57 static void
cleanupConvData(ConvData * data)58 cleanupConvData(ConvData *data) {
59     if(data!=nullptr) {
60         if(data->cnvData!=nullptr) {
61             data->cnvData->close(data->cnvData);
62             data->cnvData=nullptr;
63         }
64         if(data->extData!=nullptr) {
65             data->extData->close(data->extData);
66             data->extData=nullptr;
67         }
68         ucm_close(data->ucm);
69         data->ucm=nullptr;
70     }
71 }
72 
73 /*
74  * from ucnvstat.c - static prototypes of data-based converters
75  */
76 U_CAPI const UConverterStaticData * ucnv_converterStaticData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES];
77 
78 /*
79  * Global - verbosity
80  */
81 UBool VERBOSE = false;
82 UBool QUIET = false;
83 UBool SMALL = false;
84 UBool IGNORE_SISO_CHECK = false;
85 
86 static void
87 createConverter(ConvData *data, const char* converterName, UErrorCode *pErrorCode);
88 
89 /*
90  * Set up the UNewData and write the converter..
91  */
92 static void
93 writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status);
94 
95 UBool haveCopyright=true;
96 
97 static UDataInfo dataInfo={
98     sizeof(UDataInfo),
99     0,
100 
101     U_IS_BIG_ENDIAN,
102     U_CHARSET_FAMILY,
103     sizeof(char16_t),
104     0,
105 
106     {0x63, 0x6e, 0x76, 0x74},     /* dataFormat="cnvt" */
107     {6, 2, 0, 0},                 /* formatVersion */
108     {0, 0, 0, 0}                  /* dataVersion (calculated at runtime) */
109 };
110 
111 static void
writeConverterData(ConvData * data,const char * cnvName,const char * cnvDir,UErrorCode * status)112 writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status)
113 {
114     UNewDataMemory *mem = nullptr;
115     uint32_t sz2;
116     uint32_t size = 0;
117     int32_t tableType;
118 
119     if(U_FAILURE(*status))
120       {
121         return;
122       }
123 
124     tableType=TABLE_NONE;
125     if(data->cnvData!=nullptr) {
126         tableType|=TABLE_BASE;
127     }
128     if(data->extData!=nullptr) {
129         tableType|=TABLE_EXT;
130     }
131 
132     mem = udata_create(cnvDir, "cnv", cnvName, &dataInfo, haveCopyright ? U_COPYRIGHT_STRING : nullptr, status);
133 
134     if(U_FAILURE(*status))
135       {
136         fprintf(stderr, "Couldn't create the udata %s.%s: %s\n",
137                 cnvName,
138                 "cnv",
139                 u_errorName(*status));
140         return;
141       }
142 
143     if(VERBOSE)
144       {
145         printf("- Opened udata %s.%s\n", cnvName, "cnv");
146       }
147 
148 
149     /* all read only, clean, platform independent data.  Mmmm. :)  */
150     udata_writeBlock(mem, &data->staticData, sizeof(UConverterStaticData));
151     size += sizeof(UConverterStaticData); /* Is 4-aligned  - by size */
152     /* Now, write the table */
153     if(tableType&TABLE_BASE) {
154         size += data->cnvData->write(data->cnvData, &data->staticData, mem, tableType);
155     }
156     if(tableType&TABLE_EXT) {
157         size += data->extData->write(data->extData, &data->staticData, mem, tableType);
158     }
159 
160     sz2 = udata_finish(mem, status);
161     if(size != sz2)
162     {
163         fprintf(stderr, "error: wrote %u bytes to the .cnv file but counted %u bytes\n", static_cast<int>(sz2), static_cast<int>(size));
164         *status=U_INTERNAL_PROGRAM_ERROR;
165     }
166     if(VERBOSE)
167     {
168       printf("- Wrote %u bytes to the udata.\n", static_cast<int>(sz2));
169     }
170 }
171 
172 enum {
173     OPT_HELP_H,
174     OPT_HELP_QUESTION_MARK,
175     OPT_COPYRIGHT,
176     OPT_VERSION,
177     OPT_DESTDIR,
178     OPT_VERBOSE,
179     OPT_SMALL,
180     OPT_IGNORE_SISO_CHECK,
181     OPT_QUIET,
182     OPT_SOURCEDIR,
183 
184     OPT_COUNT
185 };
186 
187 static UOption options[]={
188     UOPTION_HELP_H,
189     UOPTION_HELP_QUESTION_MARK,
190     UOPTION_COPYRIGHT,
191     UOPTION_VERSION,
192     UOPTION_DESTDIR,
193     UOPTION_VERBOSE,
194     { "small", nullptr, nullptr, nullptr, '\1', UOPT_NO_ARG, 0 },
195     { "ignore-siso-check", nullptr, nullptr, nullptr, '\1', UOPT_NO_ARG, 0 },
196     UOPTION_QUIET,
197     UOPTION_SOURCEDIR,
198 };
199 
main(int argc,char * argv[])200 int main(int argc, char* argv[])
201 {
202     ConvData data;
203     char cnvName[UCNV_MAX_FULL_FILE_NAME_LENGTH];
204 
205     U_MAIN_INIT_ARGS(argc, argv);
206 
207     /* Set up the ICU version number */
208     UVersionInfo icuVersion;
209     u_getVersion(icuVersion);
210     uprv_memcpy(&dataInfo.dataVersion, &icuVersion, sizeof(UVersionInfo));
211 
212     /* preset then read command line options */
213     options[OPT_DESTDIR].value=u_getDataDirectory();
214     argc=u_parseArgs(argc, argv, UPRV_LENGTHOF(options), options);
215 
216     if(options[OPT_VERSION].doesOccur) {
217         printf("makeconv version %u.%u, ICU tool to read .ucm codepage mapping files and write .cnv files\n",
218                dataInfo.formatVersion[0], dataInfo.formatVersion[1]);
219         printf("%s\n", U_COPYRIGHT_STRING);
220         exit(0);
221     }
222 
223     /* error handling, printing usage message */
224     if(argc<0) {
225         fprintf(stderr,
226             "error in command line argument \"%s\"\n",
227             argv[-argc]);
228     } else if(argc<2) {
229         argc=-1;
230     }
231     if(argc<0 || options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur) {
232         FILE *stdfile=argc<0 ? stderr : stdout;
233         fprintf(stdfile,
234             "usage: %s [-options] files...\n"
235             "\tread .ucm codepage mapping files and write .cnv files\n"
236             "options:\n"
237             "\t-h or -? or --help  this usage text\n"
238             "\t-V or --version     show a version message\n"
239             "\t-c or --copyright   include a copyright notice\n"
240             "\t-d or --destdir     destination directory, followed by the path\n"
241             "\t-v or --verbose     Turn on verbose output\n"
242             "\t-q or --quiet       do not display warnings and progress\n"
243             "\t-s or --sourcedir   source directory, followed by the path\n",
244             argv[0]);
245         fprintf(stdfile,
246             "\t      --small       Generate smaller .cnv files. They will be\n"
247             "\t                    significantly smaller but may not be compatible with\n"
248             "\t                    older versions of ICU and will require heap memory\n"
249             "\t                    allocation when loaded.\n"
250             "\t      --ignore-siso-check         Use SI/SO other than 0xf/0xe.\n");
251         return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
252     }
253 
254     /* get the options values */
255     haveCopyright = options[OPT_COPYRIGHT].doesOccur;
256     const char *destdir = options[OPT_DESTDIR].value;
257     VERBOSE = options[OPT_VERBOSE].doesOccur;
258     QUIET = options[OPT_QUIET].doesOccur;
259     SMALL = options[OPT_SMALL].doesOccur;
260 
261     if (options[OPT_IGNORE_SISO_CHECK].doesOccur) {
262         IGNORE_SISO_CHECK = true;
263     }
264 
265     icu::CharString outFileName;
266     UErrorCode err = U_ZERO_ERROR;
267     if (destdir != nullptr && *destdir != 0) {
268         outFileName.append(destdir, err).ensureEndsWithFileSeparator(err);
269         if (U_FAILURE(err)) {
270             return err;
271         }
272     }
273     int32_t outBasenameStart = outFileName.length();
274 
275 #if DEBUG
276     {
277       int i;
278       printf("makeconv: processing %d files...\n", argc - 1);
279       for(i=1; i<argc; ++i) {
280         printf("%s ", argv[i]);
281       }
282       printf("\n");
283       fflush(stdout);
284     }
285 #endif
286 
287     UBool printFilename = static_cast<UBool>(argc > 2 || VERBOSE);
288     icu::CharString pathBuf;
289     for (++argv; --argc; ++argv)
290     {
291         UErrorCode localError = U_ZERO_ERROR;
292         const char *arg = getLongPathname(*argv);
293 
294         const char* sourcedir = options[OPT_SOURCEDIR].value;
295         if (sourcedir != nullptr && *sourcedir != 0 && uprv_strcmp(sourcedir, ".") != 0) {
296             pathBuf.clear();
297             pathBuf.appendPathPart(sourcedir, localError);
298             pathBuf.appendPathPart(arg, localError);
299             arg = pathBuf.data();
300         }
301 
302         /*produces the right destination path for display*/
303         outFileName.truncate(outBasenameStart);
304         if (outBasenameStart != 0)
305         {
306             /* find the last file sepator */
307             const char *basename = findBasename(arg);
308             outFileName.append(basename, localError);
309         }
310         else
311         {
312             outFileName.append(arg, localError);
313         }
314         if (U_FAILURE(localError)) {
315             return localError;
316         }
317 
318         /*removes the extension if any is found*/
319         int32_t lastDotIndex = outFileName.lastIndexOf('.');
320         if (lastDotIndex >= outBasenameStart) {
321             outFileName.truncate(lastDotIndex);
322         }
323 
324         /* the basename without extension is the converter name */
325         if ((outFileName.length() - outBasenameStart) >= UPRV_LENGTHOF(cnvName)) {
326             fprintf(stderr, "converter name %s too long\n", outFileName.data() + outBasenameStart);
327             return U_BUFFER_OVERFLOW_ERROR;
328         }
329         uprv_strcpy(cnvName, outFileName.data() + outBasenameStart);
330 
331         /*Adds the target extension*/
332         outFileName.append(CONVERTER_FILE_EXTENSION, localError);
333         if (U_FAILURE(localError)) {
334             return localError;
335         }
336 
337 #if DEBUG
338         printf("makeconv: processing %s  ...\n", arg);
339         fflush(stdout);
340 #endif
341         initConvData(&data);
342         createConverter(&data, arg, &localError);
343 
344         if (U_FAILURE(localError))
345         {
346             /* if an error is found, print out an error msg and keep going */
347             fprintf(stderr, "Error creating converter for \"%s\" file for \"%s\" (%s)\n",
348                     outFileName.data(), arg, u_errorName(localError));
349             if(U_SUCCESS(err)) {
350                 err = localError;
351             }
352         }
353         else
354         {
355             /* Insure the static data name matches the  file name */
356             /* Changed to ignore directory and only compare base name
357              LDH 1/2/08*/
358             char *p;
359             p = strrchr(cnvName, U_FILE_SEP_CHAR); /* Find last file separator */
360 
361             if(p == nullptr)            /* OK, try alternate */
362             {
363                 p = strrchr(cnvName, U_FILE_ALT_SEP_CHAR);
364                 if(p == nullptr)
365                 {
366                     p=cnvName; /* If no separators, no problem */
367                 }
368             }
369             else
370             {
371                 p++;   /* If found separator, don't include it in compare */
372             }
373             if(uprv_stricmp(p,data.staticData.name) && !QUIET)
374             {
375                 fprintf(stderr, "Warning: %s%s claims to be '%s'\n",
376                     cnvName,  CONVERTER_FILE_EXTENSION,
377                     data.staticData.name);
378             }
379 
380             if (strlen(cnvName) + 1 > UPRV_LENGTHOF(data.staticData.name)) {
381                 fprintf(stderr, "converter name %s too long\n", cnvName);
382                 return U_BUFFER_OVERFLOW_ERROR;
383             }
384             uprv_strcpy((char*)data.staticData.name, cnvName);
385 
386             if(!uprv_isInvariantString((char*)data.staticData.name, -1)) {
387                 fprintf(stderr,
388                     "Error: A converter name must contain only invariant characters.\n"
389                     "%s is not a valid converter name.\n",
390                     data.staticData.name);
391                 if(U_SUCCESS(err)) {
392                     err = U_INVALID_TABLE_FORMAT;
393                 }
394             }
395 
396             localError = U_ZERO_ERROR;
397             writeConverterData(&data, cnvName, destdir, &localError);
398 
399             if(U_FAILURE(localError))
400             {
401                 /* if an error is found, print out an error msg and keep going*/
402                 fprintf(stderr, "Error writing \"%s\" file for \"%s\" (%s)\n", outFileName.data(), arg,
403                     u_errorName(localError));
404                 if(U_SUCCESS(err)) {
405                     err = localError;
406                 }
407             }
408             else if (printFilename)
409             {
410                 puts(outFileName.data() + outBasenameStart);
411             }
412         }
413         fflush(stdout);
414         fflush(stderr);
415 
416         cleanupConvData(&data);
417     }
418 
419     return err;
420 }
421 
422 static void
getPlatformAndCCSIDFromName(const char * name,int8_t * pPlatform,int32_t * pCCSID)423 getPlatformAndCCSIDFromName(const char *name, int8_t *pPlatform, int32_t *pCCSID) {
424     if( (name[0]=='i' || name[0]=='I') &&
425         (name[1]=='b' || name[1]=='B') &&
426         (name[2]=='m' || name[2]=='M')
427     ) {
428         name+=3;
429         if(*name=='-') {
430             ++name;
431         }
432         *pPlatform=UCNV_IBM;
433         *pCCSID = static_cast<int32_t>(uprv_strtoul(name, nullptr, 10));
434     } else {
435         *pPlatform=UCNV_UNKNOWN;
436         *pCCSID=0;
437     }
438 }
439 
440 static void
readHeader(ConvData * data,FileStream * convFile,UErrorCode * pErrorCode)441 readHeader(ConvData *data,
442            FileStream* convFile,
443            UErrorCode *pErrorCode) {
444     char line[1024];
445     char *s, *key, *value;
446     const UConverterStaticData *prototype;
447     UConverterStaticData *staticData;
448 
449     if(U_FAILURE(*pErrorCode)) {
450         return;
451     }
452 
453     staticData=&data->staticData;
454     staticData->platform=UCNV_IBM;
455     staticData->subCharLen=0;
456 
457     while(T_FileStream_readLine(convFile, line, sizeof(line))) {
458         /* basic parsing and handling of state-related items */
459         if(ucm_parseHeaderLine(data->ucm, line, &key, &value)) {
460             continue;
461         }
462 
463         /* stop at the beginning of the mapping section */
464         if(uprv_strcmp(line, "CHARMAP")==0) {
465             break;
466         }
467 
468         /* collect the information from the header field, ignore unknown keys */
469         if(uprv_strcmp(key, "code_set_name")==0) {
470             if(*value!=0) {
471                 uprv_strcpy((char *)staticData->name, value);
472                 getPlatformAndCCSIDFromName(value, &staticData->platform, &staticData->codepage);
473             }
474         } else if(uprv_strcmp(key, "subchar")==0) {
475             uint8_t bytes[UCNV_EXT_MAX_BYTES];
476             int8_t length;
477 
478             s=value;
479             length=ucm_parseBytes(bytes, line, (const char **)&s);
480             if(1<=length && length<=4 && *s==0) {
481                 staticData->subCharLen=length;
482                 uprv_memcpy(staticData->subChar, bytes, length);
483             } else {
484                 fprintf(stderr, "error: illegal <subchar> %s\n", value);
485                 *pErrorCode=U_INVALID_TABLE_FORMAT;
486                 return;
487             }
488         } else if(uprv_strcmp(key, "subchar1")==0) {
489             uint8_t bytes[UCNV_EXT_MAX_BYTES];
490 
491             s=value;
492             if(1==ucm_parseBytes(bytes, line, (const char **)&s) && *s==0) {
493                 staticData->subChar1=bytes[0];
494             } else {
495                 fprintf(stderr, "error: illegal <subchar1> %s\n", value);
496                 *pErrorCode=U_INVALID_TABLE_FORMAT;
497                 return;
498             }
499         }
500     }
501 
502     /* copy values from the UCMFile to the static data */
503     staticData->maxBytesPerChar = static_cast<int8_t>(data->ucm->states.maxCharLength);
504     staticData->minBytesPerChar = static_cast<int8_t>(data->ucm->states.minCharLength);
505     staticData->conversionType=data->ucm->states.conversionType;
506 
507     if(staticData->conversionType==UCNV_UNSUPPORTED_CONVERTER) {
508         fprintf(stderr, "ucm error: missing conversion type (<uconv_class>)\n");
509         *pErrorCode=U_INVALID_TABLE_FORMAT;
510         return;
511     }
512 
513     /*
514      * Now that we know the type, copy any 'default' values from the table.
515      * We need not check the type any further because the parser only
516      * recognizes what we have prototypes for.
517      *
518      * For delta (extension-only) tables, copy values from the base file
519      * instead, see createConverter().
520      */
521     if(data->ucm->baseName[0]==0) {
522         prototype=ucnv_converterStaticData[staticData->conversionType];
523         if(prototype!=nullptr) {
524             if(staticData->name[0]==0) {
525                 uprv_strcpy((char *)staticData->name, prototype->name);
526             }
527 
528             if(staticData->codepage==0) {
529                 staticData->codepage=prototype->codepage;
530             }
531 
532             if(staticData->platform==0) {
533                 staticData->platform=prototype->platform;
534             }
535 
536             if(staticData->minBytesPerChar==0) {
537                 staticData->minBytesPerChar=prototype->minBytesPerChar;
538             }
539 
540             if(staticData->maxBytesPerChar==0) {
541                 staticData->maxBytesPerChar=prototype->maxBytesPerChar;
542             }
543 
544             if(staticData->subCharLen==0) {
545                 staticData->subCharLen=prototype->subCharLen;
546                 if(prototype->subCharLen>0) {
547                     uprv_memcpy(staticData->subChar, prototype->subChar, prototype->subCharLen);
548                 }
549             }
550         }
551     }
552 
553     if(data->ucm->states.outputType<0) {
554         data->ucm->states.outputType = static_cast<int8_t>(data->ucm->states.maxCharLength) - 1;
555     }
556 
557     if( staticData->subChar1!=0 &&
558             (staticData->minBytesPerChar>1 ||
559                 (staticData->conversionType!=UCNV_MBCS &&
560                  staticData->conversionType!=UCNV_EBCDIC_STATEFUL))
561     ) {
562         fprintf(stderr, "error: <subchar1> defined for a type other than MBCS or EBCDIC_STATEFUL\n");
563         *pErrorCode=U_INVALID_TABLE_FORMAT;
564     }
565 }
566 
567 /* return true if a base table was read, false for an extension table */
568 static UBool
readFile(ConvData * data,const char * converterName,UErrorCode * pErrorCode)569 readFile(ConvData *data, const char* converterName,
570          UErrorCode *pErrorCode) {
571     char line[1024];
572     char *end;
573     FileStream *convFile;
574 
575     UCMStates *baseStates;
576     UBool dataIsBase;
577 
578     if(U_FAILURE(*pErrorCode)) {
579         return false;
580     }
581 
582     data->ucm=ucm_open();
583 
584     convFile=T_FileStream_open(converterName, "r");
585     if(convFile==nullptr) {
586         *pErrorCode=U_FILE_ACCESS_ERROR;
587         return false;
588     }
589 
590     readHeader(data, convFile, pErrorCode);
591     if(U_FAILURE(*pErrorCode)) {
592         return false;
593     }
594 
595     if(data->ucm->baseName[0]==0) {
596         dataIsBase=true;
597         baseStates=&data->ucm->states;
598         ucm_processStates(baseStates, IGNORE_SISO_CHECK);
599     } else {
600         dataIsBase=false;
601         baseStates=nullptr;
602     }
603 
604     /* read the base table */
605     ucm_readTable(data->ucm, convFile, dataIsBase, baseStates, pErrorCode);
606     if(U_FAILURE(*pErrorCode)) {
607         return false;
608     }
609 
610     /* read an extension table if there is one */
611     while(T_FileStream_readLine(convFile, line, sizeof(line))) {
612         end=uprv_strchr(line, 0);
613         while(line<end &&
614               (*(end-1)=='\n' || *(end-1)=='\r' || *(end-1)==' ' || *(end-1)=='\t')) {
615             --end;
616         }
617         *end=0;
618 
619         if(line[0]=='#' || u_skipWhitespace(line)==end) {
620             continue; /* ignore empty and comment lines */
621         }
622 
623         if(0==uprv_strcmp(line, "CHARMAP")) {
624             /* read the extension table */
625             ucm_readTable(data->ucm, convFile, false, baseStates, pErrorCode);
626         } else {
627             fprintf(stderr, "unexpected text after the base mapping table\n");
628         }
629         break;
630     }
631 
632     T_FileStream_close(convFile);
633 
634     if(data->ucm->base->flagsType==UCM_FLAGS_MIXED || data->ucm->ext->flagsType==UCM_FLAGS_MIXED) {
635         fprintf(stderr, "error: some entries have the mapping precision (with '|'), some do not\n");
636         *pErrorCode=U_INVALID_TABLE_FORMAT;
637     }
638 
639     return dataIsBase;
640 }
641 
642 static void
createConverter(ConvData * data,const char * converterName,UErrorCode * pErrorCode)643 createConverter(ConvData *data, const char *converterName, UErrorCode *pErrorCode) {
644     ConvData baseData;
645     UBool dataIsBase;
646 
647     UConverterStaticData *staticData;
648     UCMStates *states, *baseStates;
649 
650     if(U_FAILURE(*pErrorCode)) {
651         return;
652     }
653 
654     initConvData(data);
655 
656     dataIsBase=readFile(data, converterName, pErrorCode);
657     if(U_FAILURE(*pErrorCode)) {
658         return;
659     }
660 
661     staticData=&data->staticData;
662     states=&data->ucm->states;
663 
664     if(dataIsBase) {
665         /*
666          * Build a normal .cnv file with a base table
667          * and an optional extension table.
668          */
669         data->cnvData=MBCSOpen(data->ucm);
670         if(data->cnvData==nullptr) {
671             *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
672 
673         } else if(!data->cnvData->isValid(data->cnvData,
674                             staticData->subChar, staticData->subCharLen)
675         ) {
676             fprintf(stderr, "       the substitution character byte sequence is illegal in this codepage structure!\n");
677             *pErrorCode=U_INVALID_TABLE_FORMAT;
678 
679         } else if(staticData->subChar1!=0 &&
680                     !data->cnvData->isValid(data->cnvData, &staticData->subChar1, 1)
681         ) {
682             fprintf(stderr, "       the subchar1 byte is illegal in this codepage structure!\n");
683             *pErrorCode=U_INVALID_TABLE_FORMAT;
684 
685         } else if(
686             data->ucm->ext->mappingsLength>0 &&
687             !ucm_checkBaseExt(states, data->ucm->base, data->ucm->ext, data->ucm->ext, false)
688         ) {
689             *pErrorCode=U_INVALID_TABLE_FORMAT;
690         } else if(data->ucm->base->flagsType&UCM_FLAGS_EXPLICIT) {
691             /* sort the table so that it can be turned into UTF-8-friendly data */
692             ucm_sortTable(data->ucm->base);
693         }
694 
695         if(U_SUCCESS(*pErrorCode)) {
696             if(
697                 /* add the base table after ucm_checkBaseExt()! */
698                 !data->cnvData->addTable(data->cnvData, data->ucm->base, &data->staticData)
699             ) {
700                 *pErrorCode=U_INVALID_TABLE_FORMAT;
701             } else {
702                 /*
703                  * addTable() may have requested moving more mappings to the extension table
704                  * if they fit into the base toUnicode table but not into the
705                  * base fromUnicode table.
706                  * (Especially for UTF-8-friendly fromUnicode tables.)
707                  * Such mappings will have the MBCS_FROM_U_EXT_FLAG set, which causes them
708                  * to be excluded from the extension toUnicode data.
709                  * See MBCSOkForBaseFromUnicode() for which mappings do not fit into
710                  * the base fromUnicode table.
711                  */
712                 ucm_moveMappings(data->ucm->base, data->ucm->ext);
713                 ucm_sortTable(data->ucm->ext);
714                 if(data->ucm->ext->mappingsLength>0) {
715                     /* prepare the extension table, if there is one */
716                     data->extData=CnvExtOpen(data->ucm);
717                     if(data->extData==nullptr) {
718                         *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
719                     } else if(
720                         !data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)
721                     ) {
722                         *pErrorCode=U_INVALID_TABLE_FORMAT;
723                     }
724                 }
725             }
726         }
727     } else {
728         /* Build an extension-only .cnv file. */
729         char baseFilename[500];
730         char *basename;
731 
732         initConvData(&baseData);
733 
734         /* assemble a path/filename for data->ucm->baseName */
735         uprv_strcpy(baseFilename, converterName);
736         basename = const_cast<char*>(findBasename(baseFilename));
737         uprv_strcpy(basename, data->ucm->baseName);
738         uprv_strcat(basename, ".ucm");
739 
740         /* read the base table */
741         dataIsBase=readFile(&baseData, baseFilename, pErrorCode);
742         if(U_FAILURE(*pErrorCode)) {
743             return;
744         } else if(!dataIsBase) {
745             fprintf(stderr, "error: the <icu:base> file \"%s\" is not a base table file\n", baseFilename);
746             *pErrorCode=U_INVALID_TABLE_FORMAT;
747         } else {
748             /* prepare the extension table */
749             data->extData=CnvExtOpen(data->ucm);
750             if(data->extData==nullptr) {
751                 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
752             } else {
753                 /* fill in gaps in extension file header fields */
754                 UCMapping *m, *mLimit;
755                 uint8_t fallbackFlags;
756 
757                 baseStates=&baseData.ucm->states;
758                 if(states->conversionType==UCNV_DBCS) {
759                     staticData->minBytesPerChar = static_cast<int8_t>(states->minCharLength = 2);
760                 } else if(states->minCharLength==0) {
761                     staticData->minBytesPerChar = static_cast<int8_t>(states->minCharLength = baseStates->minCharLength);
762                 }
763                 if(states->maxCharLength<states->minCharLength) {
764                     staticData->maxBytesPerChar = static_cast<int8_t>(states->maxCharLength = baseStates->maxCharLength);
765                 }
766 
767                 if(staticData->subCharLen==0) {
768                     uprv_memcpy(staticData->subChar, baseData.staticData.subChar, 4);
769                     staticData->subCharLen=baseData.staticData.subCharLen;
770                 }
771                 /*
772                  * do not copy subChar1 -
773                  * only use what is explicitly specified
774                  * because it cannot be unset in the extension file header
775                  */
776 
777                 /* get the fallback flags */
778                 fallbackFlags=0;
779                 for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength;
780                     m<mLimit && fallbackFlags!=3;
781                     ++m
782                 ) {
783                     if(m->f==1) {
784                         fallbackFlags|=1;
785                     } else if(m->f==3) {
786                         fallbackFlags|=2;
787                     }
788                 }
789 
790                 if(fallbackFlags&1) {
791                     staticData->hasFromUnicodeFallback=true;
792                 }
793                 if(fallbackFlags&2) {
794                     staticData->hasToUnicodeFallback=true;
795                 }
796 
797                 if(1!=ucm_countChars(baseStates, staticData->subChar, staticData->subCharLen)) {
798                     fprintf(stderr, "       the substitution character byte sequence is illegal in this codepage structure!\n");
799                     *pErrorCode=U_INVALID_TABLE_FORMAT;
800 
801                 } else if(staticData->subChar1!=0 && 1!=ucm_countChars(baseStates, &staticData->subChar1, 1)) {
802                     fprintf(stderr, "       the subchar1 byte is illegal in this codepage structure!\n");
803                     *pErrorCode=U_INVALID_TABLE_FORMAT;
804 
805                 } else if(
806                     !ucm_checkValidity(data->ucm->ext, baseStates) ||
807                     !ucm_checkBaseExt(baseStates, baseData.ucm->base, data->ucm->ext, data->ucm->ext, false)
808                 ) {
809                     *pErrorCode=U_INVALID_TABLE_FORMAT;
810                 } else {
811                     if(states->maxCharLength>1) {
812                         /*
813                          * When building a normal .cnv file with a base table
814                          * for an MBCS (not SBCS) table with explicit precision flags,
815                          * the MBCSAddTable() function marks some mappings for moving
816                          * to the extension table.
817                          * They fit into the base toUnicode table but not into the
818                          * base fromUnicode table.
819                          * (Note: We do have explicit precision flags because they are
820                          * required for extension table generation, and
821                          * ucm_checkBaseExt() verified it.)
822                          *
823                          * We do not call MBCSAddTable() here (we probably could)
824                          * so we need to do the analysis before building the extension table.
825                          * We assume that MBCSAddTable() will build a UTF-8-friendly table.
826                          * Redundant mappings in the extension table are ok except they cost some size.
827                          *
828                          * Do this after ucm_checkBaseExt().
829                          */
830                         const MBCSData *mbcsData=MBCSGetDummy();
831                         int32_t needsMove=0;
832                         for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength;
833                             m<mLimit;
834                             ++m
835                         ) {
836                             if(!MBCSOkForBaseFromUnicode(mbcsData, m->b.bytes, m->bLen, m->u, m->f)) {
837                                 m->f|=MBCS_FROM_U_EXT_FLAG;
838                                 m->moveFlag=UCM_MOVE_TO_EXT;
839                                 ++needsMove;
840                             }
841                         }
842 
843                         if(needsMove!=0) {
844                             ucm_moveMappings(baseData.ucm->base, data->ucm->ext);
845                             ucm_sortTable(data->ucm->ext);
846                         }
847                     }
848                     if(!data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)) {
849                         *pErrorCode=U_INVALID_TABLE_FORMAT;
850                     }
851                 }
852             }
853         }
854 
855         cleanupConvData(&baseData);
856     }
857 }
858 
859 /*
860  * Hey, Emacs, please set the following:
861  *
862  * Local Variables:
863  * indent-tabs-mode: nil
864  * End:
865  *
866  */
867