1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ********************************************************************************
5 *
6 * Copyright (C) 1998-2015, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 ********************************************************************************
10 *
11 *
12 * makeconv.cpp:
13 * tool creating a binary (compressed) representation of the conversion mapping
14 * table (IBM NLTC ucmap format).
15 *
16 * 05/04/2000 helena Added fallback mapping into the picture...
17 * 06/29/2000 helena Major rewrite of the callback APIs.
18 */
19
20 #include <stdio.h>
21 #include "unicode/putil.h"
22 #include "unicode/ucnv_err.h"
23 #include "charstr.h"
24 #include "ucnv_bld.h"
25 #include "ucnv_imp.h"
26 #include "ucnv_cnv.h"
27 #include "cstring.h"
28 #include "cmemory.h"
29 #include "uinvchar.h"
30 #include "filestrm.h"
31 #include "toolutil.h"
32 #include "uoptions.h"
33 #include "unicode/udata.h"
34 #include "unewdata.h"
35 #include "uparse.h"
36 #include "ucm.h"
37 #include "makeconv.h"
38 #include "genmbcs.h"
39
40 #define DEBUG 0
41
42 typedef struct ConvData {
43 UCMFile *ucm;
44 NewConverter *cnvData, *extData;
45 UConverterSharedData sharedData;
46 UConverterStaticData staticData;
47 } ConvData;
48
49 static void
initConvData(ConvData * data)50 initConvData(ConvData *data) {
51 uprv_memset(data, 0, sizeof(ConvData));
52 data->sharedData.structSize=sizeof(UConverterSharedData);
53 data->staticData.structSize=sizeof(UConverterStaticData);
54 data->sharedData.staticData=&data->staticData;
55 }
56
57 static void
cleanupConvData(ConvData * data)58 cleanupConvData(ConvData *data) {
59 if(data!=nullptr) {
60 if(data->cnvData!=nullptr) {
61 data->cnvData->close(data->cnvData);
62 data->cnvData=nullptr;
63 }
64 if(data->extData!=nullptr) {
65 data->extData->close(data->extData);
66 data->extData=nullptr;
67 }
68 ucm_close(data->ucm);
69 data->ucm=nullptr;
70 }
71 }
72
73 /*
74 * from ucnvstat.c - static prototypes of data-based converters
75 */
76 U_CAPI const UConverterStaticData * ucnv_converterStaticData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES];
77
78 /*
79 * Global - verbosity
80 */
81 UBool VERBOSE = false;
82 UBool QUIET = false;
83 UBool SMALL = false;
84 UBool IGNORE_SISO_CHECK = false;
85
86 static void
87 createConverter(ConvData *data, const char* converterName, UErrorCode *pErrorCode);
88
89 /*
90 * Set up the UNewData and write the converter..
91 */
92 static void
93 writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status);
94
95 UBool haveCopyright=true;
96
97 static UDataInfo dataInfo={
98 sizeof(UDataInfo),
99 0,
100
101 U_IS_BIG_ENDIAN,
102 U_CHARSET_FAMILY,
103 sizeof(char16_t),
104 0,
105
106 {0x63, 0x6e, 0x76, 0x74}, /* dataFormat="cnvt" */
107 {6, 2, 0, 0}, /* formatVersion */
108 {0, 0, 0, 0} /* dataVersion (calculated at runtime) */
109 };
110
111 static void
writeConverterData(ConvData * data,const char * cnvName,const char * cnvDir,UErrorCode * status)112 writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status)
113 {
114 UNewDataMemory *mem = nullptr;
115 uint32_t sz2;
116 uint32_t size = 0;
117 int32_t tableType;
118
119 if(U_FAILURE(*status))
120 {
121 return;
122 }
123
124 tableType=TABLE_NONE;
125 if(data->cnvData!=nullptr) {
126 tableType|=TABLE_BASE;
127 }
128 if(data->extData!=nullptr) {
129 tableType|=TABLE_EXT;
130 }
131
132 mem = udata_create(cnvDir, "cnv", cnvName, &dataInfo, haveCopyright ? U_COPYRIGHT_STRING : nullptr, status);
133
134 if(U_FAILURE(*status))
135 {
136 fprintf(stderr, "Couldn't create the udata %s.%s: %s\n",
137 cnvName,
138 "cnv",
139 u_errorName(*status));
140 return;
141 }
142
143 if(VERBOSE)
144 {
145 printf("- Opened udata %s.%s\n", cnvName, "cnv");
146 }
147
148
149 /* all read only, clean, platform independent data. Mmmm. :) */
150 udata_writeBlock(mem, &data->staticData, sizeof(UConverterStaticData));
151 size += sizeof(UConverterStaticData); /* Is 4-aligned - by size */
152 /* Now, write the table */
153 if(tableType&TABLE_BASE) {
154 size += data->cnvData->write(data->cnvData, &data->staticData, mem, tableType);
155 }
156 if(tableType&TABLE_EXT) {
157 size += data->extData->write(data->extData, &data->staticData, mem, tableType);
158 }
159
160 sz2 = udata_finish(mem, status);
161 if(size != sz2)
162 {
163 fprintf(stderr, "error: wrote %u bytes to the .cnv file but counted %u bytes\n", static_cast<int>(sz2), static_cast<int>(size));
164 *status=U_INTERNAL_PROGRAM_ERROR;
165 }
166 if(VERBOSE)
167 {
168 printf("- Wrote %u bytes to the udata.\n", static_cast<int>(sz2));
169 }
170 }
171
172 enum {
173 OPT_HELP_H,
174 OPT_HELP_QUESTION_MARK,
175 OPT_COPYRIGHT,
176 OPT_VERSION,
177 OPT_DESTDIR,
178 OPT_VERBOSE,
179 OPT_SMALL,
180 OPT_IGNORE_SISO_CHECK,
181 OPT_QUIET,
182 OPT_SOURCEDIR,
183
184 OPT_COUNT
185 };
186
187 static UOption options[]={
188 UOPTION_HELP_H,
189 UOPTION_HELP_QUESTION_MARK,
190 UOPTION_COPYRIGHT,
191 UOPTION_VERSION,
192 UOPTION_DESTDIR,
193 UOPTION_VERBOSE,
194 { "small", nullptr, nullptr, nullptr, '\1', UOPT_NO_ARG, 0 },
195 { "ignore-siso-check", nullptr, nullptr, nullptr, '\1', UOPT_NO_ARG, 0 },
196 UOPTION_QUIET,
197 UOPTION_SOURCEDIR,
198 };
199
main(int argc,char * argv[])200 int main(int argc, char* argv[])
201 {
202 ConvData data;
203 char cnvName[UCNV_MAX_FULL_FILE_NAME_LENGTH];
204
205 U_MAIN_INIT_ARGS(argc, argv);
206
207 /* Set up the ICU version number */
208 UVersionInfo icuVersion;
209 u_getVersion(icuVersion);
210 uprv_memcpy(&dataInfo.dataVersion, &icuVersion, sizeof(UVersionInfo));
211
212 /* preset then read command line options */
213 options[OPT_DESTDIR].value=u_getDataDirectory();
214 argc=u_parseArgs(argc, argv, UPRV_LENGTHOF(options), options);
215
216 if(options[OPT_VERSION].doesOccur) {
217 printf("makeconv version %u.%u, ICU tool to read .ucm codepage mapping files and write .cnv files\n",
218 dataInfo.formatVersion[0], dataInfo.formatVersion[1]);
219 printf("%s\n", U_COPYRIGHT_STRING);
220 exit(0);
221 }
222
223 /* error handling, printing usage message */
224 if(argc<0) {
225 fprintf(stderr,
226 "error in command line argument \"%s\"\n",
227 argv[-argc]);
228 } else if(argc<2) {
229 argc=-1;
230 }
231 if(argc<0 || options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur) {
232 FILE *stdfile=argc<0 ? stderr : stdout;
233 fprintf(stdfile,
234 "usage: %s [-options] files...\n"
235 "\tread .ucm codepage mapping files and write .cnv files\n"
236 "options:\n"
237 "\t-h or -? or --help this usage text\n"
238 "\t-V or --version show a version message\n"
239 "\t-c or --copyright include a copyright notice\n"
240 "\t-d or --destdir destination directory, followed by the path\n"
241 "\t-v or --verbose Turn on verbose output\n"
242 "\t-q or --quiet do not display warnings and progress\n"
243 "\t-s or --sourcedir source directory, followed by the path\n",
244 argv[0]);
245 fprintf(stdfile,
246 "\t --small Generate smaller .cnv files. They will be\n"
247 "\t significantly smaller but may not be compatible with\n"
248 "\t older versions of ICU and will require heap memory\n"
249 "\t allocation when loaded.\n"
250 "\t --ignore-siso-check Use SI/SO other than 0xf/0xe.\n");
251 return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
252 }
253
254 /* get the options values */
255 haveCopyright = options[OPT_COPYRIGHT].doesOccur;
256 const char *destdir = options[OPT_DESTDIR].value;
257 VERBOSE = options[OPT_VERBOSE].doesOccur;
258 QUIET = options[OPT_QUIET].doesOccur;
259 SMALL = options[OPT_SMALL].doesOccur;
260
261 if (options[OPT_IGNORE_SISO_CHECK].doesOccur) {
262 IGNORE_SISO_CHECK = true;
263 }
264
265 icu::CharString outFileName;
266 UErrorCode err = U_ZERO_ERROR;
267 if (destdir != nullptr && *destdir != 0) {
268 outFileName.append(destdir, err).ensureEndsWithFileSeparator(err);
269 if (U_FAILURE(err)) {
270 return err;
271 }
272 }
273 int32_t outBasenameStart = outFileName.length();
274
275 #if DEBUG
276 {
277 int i;
278 printf("makeconv: processing %d files...\n", argc - 1);
279 for(i=1; i<argc; ++i) {
280 printf("%s ", argv[i]);
281 }
282 printf("\n");
283 fflush(stdout);
284 }
285 #endif
286
287 UBool printFilename = static_cast<UBool>(argc > 2 || VERBOSE);
288 icu::CharString pathBuf;
289 for (++argv; --argc; ++argv)
290 {
291 UErrorCode localError = U_ZERO_ERROR;
292 const char *arg = getLongPathname(*argv);
293
294 const char* sourcedir = options[OPT_SOURCEDIR].value;
295 if (sourcedir != nullptr && *sourcedir != 0 && uprv_strcmp(sourcedir, ".") != 0) {
296 pathBuf.clear();
297 pathBuf.appendPathPart(sourcedir, localError);
298 pathBuf.appendPathPart(arg, localError);
299 arg = pathBuf.data();
300 }
301
302 /*produces the right destination path for display*/
303 outFileName.truncate(outBasenameStart);
304 if (outBasenameStart != 0)
305 {
306 /* find the last file sepator */
307 const char *basename = findBasename(arg);
308 outFileName.append(basename, localError);
309 }
310 else
311 {
312 outFileName.append(arg, localError);
313 }
314 if (U_FAILURE(localError)) {
315 return localError;
316 }
317
318 /*removes the extension if any is found*/
319 int32_t lastDotIndex = outFileName.lastIndexOf('.');
320 if (lastDotIndex >= outBasenameStart) {
321 outFileName.truncate(lastDotIndex);
322 }
323
324 /* the basename without extension is the converter name */
325 if ((outFileName.length() - outBasenameStart) >= UPRV_LENGTHOF(cnvName)) {
326 fprintf(stderr, "converter name %s too long\n", outFileName.data() + outBasenameStart);
327 return U_BUFFER_OVERFLOW_ERROR;
328 }
329 uprv_strcpy(cnvName, outFileName.data() + outBasenameStart);
330
331 /*Adds the target extension*/
332 outFileName.append(CONVERTER_FILE_EXTENSION, localError);
333 if (U_FAILURE(localError)) {
334 return localError;
335 }
336
337 #if DEBUG
338 printf("makeconv: processing %s ...\n", arg);
339 fflush(stdout);
340 #endif
341 initConvData(&data);
342 createConverter(&data, arg, &localError);
343
344 if (U_FAILURE(localError))
345 {
346 /* if an error is found, print out an error msg and keep going */
347 fprintf(stderr, "Error creating converter for \"%s\" file for \"%s\" (%s)\n",
348 outFileName.data(), arg, u_errorName(localError));
349 if(U_SUCCESS(err)) {
350 err = localError;
351 }
352 }
353 else
354 {
355 /* Insure the static data name matches the file name */
356 /* Changed to ignore directory and only compare base name
357 LDH 1/2/08*/
358 char *p;
359 p = strrchr(cnvName, U_FILE_SEP_CHAR); /* Find last file separator */
360
361 if(p == nullptr) /* OK, try alternate */
362 {
363 p = strrchr(cnvName, U_FILE_ALT_SEP_CHAR);
364 if(p == nullptr)
365 {
366 p=cnvName; /* If no separators, no problem */
367 }
368 }
369 else
370 {
371 p++; /* If found separator, don't include it in compare */
372 }
373 if(uprv_stricmp(p,data.staticData.name) && !QUIET)
374 {
375 fprintf(stderr, "Warning: %s%s claims to be '%s'\n",
376 cnvName, CONVERTER_FILE_EXTENSION,
377 data.staticData.name);
378 }
379
380 if (strlen(cnvName) + 1 > UPRV_LENGTHOF(data.staticData.name)) {
381 fprintf(stderr, "converter name %s too long\n", cnvName);
382 return U_BUFFER_OVERFLOW_ERROR;
383 }
384 uprv_strcpy((char*)data.staticData.name, cnvName);
385
386 if(!uprv_isInvariantString((char*)data.staticData.name, -1)) {
387 fprintf(stderr,
388 "Error: A converter name must contain only invariant characters.\n"
389 "%s is not a valid converter name.\n",
390 data.staticData.name);
391 if(U_SUCCESS(err)) {
392 err = U_INVALID_TABLE_FORMAT;
393 }
394 }
395
396 localError = U_ZERO_ERROR;
397 writeConverterData(&data, cnvName, destdir, &localError);
398
399 if(U_FAILURE(localError))
400 {
401 /* if an error is found, print out an error msg and keep going*/
402 fprintf(stderr, "Error writing \"%s\" file for \"%s\" (%s)\n", outFileName.data(), arg,
403 u_errorName(localError));
404 if(U_SUCCESS(err)) {
405 err = localError;
406 }
407 }
408 else if (printFilename)
409 {
410 puts(outFileName.data() + outBasenameStart);
411 }
412 }
413 fflush(stdout);
414 fflush(stderr);
415
416 cleanupConvData(&data);
417 }
418
419 return err;
420 }
421
422 static void
getPlatformAndCCSIDFromName(const char * name,int8_t * pPlatform,int32_t * pCCSID)423 getPlatformAndCCSIDFromName(const char *name, int8_t *pPlatform, int32_t *pCCSID) {
424 if( (name[0]=='i' || name[0]=='I') &&
425 (name[1]=='b' || name[1]=='B') &&
426 (name[2]=='m' || name[2]=='M')
427 ) {
428 name+=3;
429 if(*name=='-') {
430 ++name;
431 }
432 *pPlatform=UCNV_IBM;
433 *pCCSID = static_cast<int32_t>(uprv_strtoul(name, nullptr, 10));
434 } else {
435 *pPlatform=UCNV_UNKNOWN;
436 *pCCSID=0;
437 }
438 }
439
440 static void
readHeader(ConvData * data,FileStream * convFile,UErrorCode * pErrorCode)441 readHeader(ConvData *data,
442 FileStream* convFile,
443 UErrorCode *pErrorCode) {
444 char line[1024];
445 char *s, *key, *value;
446 const UConverterStaticData *prototype;
447 UConverterStaticData *staticData;
448
449 if(U_FAILURE(*pErrorCode)) {
450 return;
451 }
452
453 staticData=&data->staticData;
454 staticData->platform=UCNV_IBM;
455 staticData->subCharLen=0;
456
457 while(T_FileStream_readLine(convFile, line, sizeof(line))) {
458 /* basic parsing and handling of state-related items */
459 if(ucm_parseHeaderLine(data->ucm, line, &key, &value)) {
460 continue;
461 }
462
463 /* stop at the beginning of the mapping section */
464 if(uprv_strcmp(line, "CHARMAP")==0) {
465 break;
466 }
467
468 /* collect the information from the header field, ignore unknown keys */
469 if(uprv_strcmp(key, "code_set_name")==0) {
470 if(*value!=0) {
471 uprv_strcpy((char *)staticData->name, value);
472 getPlatformAndCCSIDFromName(value, &staticData->platform, &staticData->codepage);
473 }
474 } else if(uprv_strcmp(key, "subchar")==0) {
475 uint8_t bytes[UCNV_EXT_MAX_BYTES];
476 int8_t length;
477
478 s=value;
479 length=ucm_parseBytes(bytes, line, (const char **)&s);
480 if(1<=length && length<=4 && *s==0) {
481 staticData->subCharLen=length;
482 uprv_memcpy(staticData->subChar, bytes, length);
483 } else {
484 fprintf(stderr, "error: illegal <subchar> %s\n", value);
485 *pErrorCode=U_INVALID_TABLE_FORMAT;
486 return;
487 }
488 } else if(uprv_strcmp(key, "subchar1")==0) {
489 uint8_t bytes[UCNV_EXT_MAX_BYTES];
490
491 s=value;
492 if(1==ucm_parseBytes(bytes, line, (const char **)&s) && *s==0) {
493 staticData->subChar1=bytes[0];
494 } else {
495 fprintf(stderr, "error: illegal <subchar1> %s\n", value);
496 *pErrorCode=U_INVALID_TABLE_FORMAT;
497 return;
498 }
499 }
500 }
501
502 /* copy values from the UCMFile to the static data */
503 staticData->maxBytesPerChar = static_cast<int8_t>(data->ucm->states.maxCharLength);
504 staticData->minBytesPerChar = static_cast<int8_t>(data->ucm->states.minCharLength);
505 staticData->conversionType=data->ucm->states.conversionType;
506
507 if(staticData->conversionType==UCNV_UNSUPPORTED_CONVERTER) {
508 fprintf(stderr, "ucm error: missing conversion type (<uconv_class>)\n");
509 *pErrorCode=U_INVALID_TABLE_FORMAT;
510 return;
511 }
512
513 /*
514 * Now that we know the type, copy any 'default' values from the table.
515 * We need not check the type any further because the parser only
516 * recognizes what we have prototypes for.
517 *
518 * For delta (extension-only) tables, copy values from the base file
519 * instead, see createConverter().
520 */
521 if(data->ucm->baseName[0]==0) {
522 prototype=ucnv_converterStaticData[staticData->conversionType];
523 if(prototype!=nullptr) {
524 if(staticData->name[0]==0) {
525 uprv_strcpy((char *)staticData->name, prototype->name);
526 }
527
528 if(staticData->codepage==0) {
529 staticData->codepage=prototype->codepage;
530 }
531
532 if(staticData->platform==0) {
533 staticData->platform=prototype->platform;
534 }
535
536 if(staticData->minBytesPerChar==0) {
537 staticData->minBytesPerChar=prototype->minBytesPerChar;
538 }
539
540 if(staticData->maxBytesPerChar==0) {
541 staticData->maxBytesPerChar=prototype->maxBytesPerChar;
542 }
543
544 if(staticData->subCharLen==0) {
545 staticData->subCharLen=prototype->subCharLen;
546 if(prototype->subCharLen>0) {
547 uprv_memcpy(staticData->subChar, prototype->subChar, prototype->subCharLen);
548 }
549 }
550 }
551 }
552
553 if(data->ucm->states.outputType<0) {
554 data->ucm->states.outputType = static_cast<int8_t>(data->ucm->states.maxCharLength) - 1;
555 }
556
557 if( staticData->subChar1!=0 &&
558 (staticData->minBytesPerChar>1 ||
559 (staticData->conversionType!=UCNV_MBCS &&
560 staticData->conversionType!=UCNV_EBCDIC_STATEFUL))
561 ) {
562 fprintf(stderr, "error: <subchar1> defined for a type other than MBCS or EBCDIC_STATEFUL\n");
563 *pErrorCode=U_INVALID_TABLE_FORMAT;
564 }
565 }
566
567 /* return true if a base table was read, false for an extension table */
568 static UBool
readFile(ConvData * data,const char * converterName,UErrorCode * pErrorCode)569 readFile(ConvData *data, const char* converterName,
570 UErrorCode *pErrorCode) {
571 char line[1024];
572 char *end;
573 FileStream *convFile;
574
575 UCMStates *baseStates;
576 UBool dataIsBase;
577
578 if(U_FAILURE(*pErrorCode)) {
579 return false;
580 }
581
582 data->ucm=ucm_open();
583
584 convFile=T_FileStream_open(converterName, "r");
585 if(convFile==nullptr) {
586 *pErrorCode=U_FILE_ACCESS_ERROR;
587 return false;
588 }
589
590 readHeader(data, convFile, pErrorCode);
591 if(U_FAILURE(*pErrorCode)) {
592 return false;
593 }
594
595 if(data->ucm->baseName[0]==0) {
596 dataIsBase=true;
597 baseStates=&data->ucm->states;
598 ucm_processStates(baseStates, IGNORE_SISO_CHECK);
599 } else {
600 dataIsBase=false;
601 baseStates=nullptr;
602 }
603
604 /* read the base table */
605 ucm_readTable(data->ucm, convFile, dataIsBase, baseStates, pErrorCode);
606 if(U_FAILURE(*pErrorCode)) {
607 return false;
608 }
609
610 /* read an extension table if there is one */
611 while(T_FileStream_readLine(convFile, line, sizeof(line))) {
612 end=uprv_strchr(line, 0);
613 while(line<end &&
614 (*(end-1)=='\n' || *(end-1)=='\r' || *(end-1)==' ' || *(end-1)=='\t')) {
615 --end;
616 }
617 *end=0;
618
619 if(line[0]=='#' || u_skipWhitespace(line)==end) {
620 continue; /* ignore empty and comment lines */
621 }
622
623 if(0==uprv_strcmp(line, "CHARMAP")) {
624 /* read the extension table */
625 ucm_readTable(data->ucm, convFile, false, baseStates, pErrorCode);
626 } else {
627 fprintf(stderr, "unexpected text after the base mapping table\n");
628 }
629 break;
630 }
631
632 T_FileStream_close(convFile);
633
634 if(data->ucm->base->flagsType==UCM_FLAGS_MIXED || data->ucm->ext->flagsType==UCM_FLAGS_MIXED) {
635 fprintf(stderr, "error: some entries have the mapping precision (with '|'), some do not\n");
636 *pErrorCode=U_INVALID_TABLE_FORMAT;
637 }
638
639 return dataIsBase;
640 }
641
642 static void
createConverter(ConvData * data,const char * converterName,UErrorCode * pErrorCode)643 createConverter(ConvData *data, const char *converterName, UErrorCode *pErrorCode) {
644 ConvData baseData;
645 UBool dataIsBase;
646
647 UConverterStaticData *staticData;
648 UCMStates *states, *baseStates;
649
650 if(U_FAILURE(*pErrorCode)) {
651 return;
652 }
653
654 initConvData(data);
655
656 dataIsBase=readFile(data, converterName, pErrorCode);
657 if(U_FAILURE(*pErrorCode)) {
658 return;
659 }
660
661 staticData=&data->staticData;
662 states=&data->ucm->states;
663
664 if(dataIsBase) {
665 /*
666 * Build a normal .cnv file with a base table
667 * and an optional extension table.
668 */
669 data->cnvData=MBCSOpen(data->ucm);
670 if(data->cnvData==nullptr) {
671 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
672
673 } else if(!data->cnvData->isValid(data->cnvData,
674 staticData->subChar, staticData->subCharLen)
675 ) {
676 fprintf(stderr, " the substitution character byte sequence is illegal in this codepage structure!\n");
677 *pErrorCode=U_INVALID_TABLE_FORMAT;
678
679 } else if(staticData->subChar1!=0 &&
680 !data->cnvData->isValid(data->cnvData, &staticData->subChar1, 1)
681 ) {
682 fprintf(stderr, " the subchar1 byte is illegal in this codepage structure!\n");
683 *pErrorCode=U_INVALID_TABLE_FORMAT;
684
685 } else if(
686 data->ucm->ext->mappingsLength>0 &&
687 !ucm_checkBaseExt(states, data->ucm->base, data->ucm->ext, data->ucm->ext, false)
688 ) {
689 *pErrorCode=U_INVALID_TABLE_FORMAT;
690 } else if(data->ucm->base->flagsType&UCM_FLAGS_EXPLICIT) {
691 /* sort the table so that it can be turned into UTF-8-friendly data */
692 ucm_sortTable(data->ucm->base);
693 }
694
695 if(U_SUCCESS(*pErrorCode)) {
696 if(
697 /* add the base table after ucm_checkBaseExt()! */
698 !data->cnvData->addTable(data->cnvData, data->ucm->base, &data->staticData)
699 ) {
700 *pErrorCode=U_INVALID_TABLE_FORMAT;
701 } else {
702 /*
703 * addTable() may have requested moving more mappings to the extension table
704 * if they fit into the base toUnicode table but not into the
705 * base fromUnicode table.
706 * (Especially for UTF-8-friendly fromUnicode tables.)
707 * Such mappings will have the MBCS_FROM_U_EXT_FLAG set, which causes them
708 * to be excluded from the extension toUnicode data.
709 * See MBCSOkForBaseFromUnicode() for which mappings do not fit into
710 * the base fromUnicode table.
711 */
712 ucm_moveMappings(data->ucm->base, data->ucm->ext);
713 ucm_sortTable(data->ucm->ext);
714 if(data->ucm->ext->mappingsLength>0) {
715 /* prepare the extension table, if there is one */
716 data->extData=CnvExtOpen(data->ucm);
717 if(data->extData==nullptr) {
718 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
719 } else if(
720 !data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)
721 ) {
722 *pErrorCode=U_INVALID_TABLE_FORMAT;
723 }
724 }
725 }
726 }
727 } else {
728 /* Build an extension-only .cnv file. */
729 char baseFilename[500];
730 char *basename;
731
732 initConvData(&baseData);
733
734 /* assemble a path/filename for data->ucm->baseName */
735 uprv_strcpy(baseFilename, converterName);
736 basename = const_cast<char*>(findBasename(baseFilename));
737 uprv_strcpy(basename, data->ucm->baseName);
738 uprv_strcat(basename, ".ucm");
739
740 /* read the base table */
741 dataIsBase=readFile(&baseData, baseFilename, pErrorCode);
742 if(U_FAILURE(*pErrorCode)) {
743 return;
744 } else if(!dataIsBase) {
745 fprintf(stderr, "error: the <icu:base> file \"%s\" is not a base table file\n", baseFilename);
746 *pErrorCode=U_INVALID_TABLE_FORMAT;
747 } else {
748 /* prepare the extension table */
749 data->extData=CnvExtOpen(data->ucm);
750 if(data->extData==nullptr) {
751 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
752 } else {
753 /* fill in gaps in extension file header fields */
754 UCMapping *m, *mLimit;
755 uint8_t fallbackFlags;
756
757 baseStates=&baseData.ucm->states;
758 if(states->conversionType==UCNV_DBCS) {
759 staticData->minBytesPerChar = static_cast<int8_t>(states->minCharLength = 2);
760 } else if(states->minCharLength==0) {
761 staticData->minBytesPerChar = static_cast<int8_t>(states->minCharLength = baseStates->minCharLength);
762 }
763 if(states->maxCharLength<states->minCharLength) {
764 staticData->maxBytesPerChar = static_cast<int8_t>(states->maxCharLength = baseStates->maxCharLength);
765 }
766
767 if(staticData->subCharLen==0) {
768 uprv_memcpy(staticData->subChar, baseData.staticData.subChar, 4);
769 staticData->subCharLen=baseData.staticData.subCharLen;
770 }
771 /*
772 * do not copy subChar1 -
773 * only use what is explicitly specified
774 * because it cannot be unset in the extension file header
775 */
776
777 /* get the fallback flags */
778 fallbackFlags=0;
779 for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength;
780 m<mLimit && fallbackFlags!=3;
781 ++m
782 ) {
783 if(m->f==1) {
784 fallbackFlags|=1;
785 } else if(m->f==3) {
786 fallbackFlags|=2;
787 }
788 }
789
790 if(fallbackFlags&1) {
791 staticData->hasFromUnicodeFallback=true;
792 }
793 if(fallbackFlags&2) {
794 staticData->hasToUnicodeFallback=true;
795 }
796
797 if(1!=ucm_countChars(baseStates, staticData->subChar, staticData->subCharLen)) {
798 fprintf(stderr, " the substitution character byte sequence is illegal in this codepage structure!\n");
799 *pErrorCode=U_INVALID_TABLE_FORMAT;
800
801 } else if(staticData->subChar1!=0 && 1!=ucm_countChars(baseStates, &staticData->subChar1, 1)) {
802 fprintf(stderr, " the subchar1 byte is illegal in this codepage structure!\n");
803 *pErrorCode=U_INVALID_TABLE_FORMAT;
804
805 } else if(
806 !ucm_checkValidity(data->ucm->ext, baseStates) ||
807 !ucm_checkBaseExt(baseStates, baseData.ucm->base, data->ucm->ext, data->ucm->ext, false)
808 ) {
809 *pErrorCode=U_INVALID_TABLE_FORMAT;
810 } else {
811 if(states->maxCharLength>1) {
812 /*
813 * When building a normal .cnv file with a base table
814 * for an MBCS (not SBCS) table with explicit precision flags,
815 * the MBCSAddTable() function marks some mappings for moving
816 * to the extension table.
817 * They fit into the base toUnicode table but not into the
818 * base fromUnicode table.
819 * (Note: We do have explicit precision flags because they are
820 * required for extension table generation, and
821 * ucm_checkBaseExt() verified it.)
822 *
823 * We do not call MBCSAddTable() here (we probably could)
824 * so we need to do the analysis before building the extension table.
825 * We assume that MBCSAddTable() will build a UTF-8-friendly table.
826 * Redundant mappings in the extension table are ok except they cost some size.
827 *
828 * Do this after ucm_checkBaseExt().
829 */
830 const MBCSData *mbcsData=MBCSGetDummy();
831 int32_t needsMove=0;
832 for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength;
833 m<mLimit;
834 ++m
835 ) {
836 if(!MBCSOkForBaseFromUnicode(mbcsData, m->b.bytes, m->bLen, m->u, m->f)) {
837 m->f|=MBCS_FROM_U_EXT_FLAG;
838 m->moveFlag=UCM_MOVE_TO_EXT;
839 ++needsMove;
840 }
841 }
842
843 if(needsMove!=0) {
844 ucm_moveMappings(baseData.ucm->base, data->ucm->ext);
845 ucm_sortTable(data->ucm->ext);
846 }
847 }
848 if(!data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)) {
849 *pErrorCode=U_INVALID_TABLE_FORMAT;
850 }
851 }
852 }
853 }
854
855 cleanupConvData(&baseData);
856 }
857 }
858
859 /*
860 * Hey, Emacs, please set the following:
861 *
862 * Local Variables:
863 * indent-tabs-mode: nil
864 * End:
865 *
866 */
867