1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ********************************************************************************
5 *
6 * Copyright (C) 1998-2015, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 ********************************************************************************
10 *
11 *
12 * makeconv.cpp:
13 * tool creating a binary (compressed) representation of the conversion mapping
14 * table (IBM NLTC ucmap format).
15 *
16 * 05/04/2000 helena Added fallback mapping into the picture...
17 * 06/29/2000 helena Major rewrite of the callback APIs.
18 */
19
20 #include <stdio.h>
21 #include "unicode/putil.h"
22 #include "unicode/ucnv_err.h"
23 #include "charstr.h"
24 #include "ucnv_bld.h"
25 #include "ucnv_imp.h"
26 #include "ucnv_cnv.h"
27 #include "cstring.h"
28 #include "cmemory.h"
29 #include "uinvchar.h"
30 #include "filestrm.h"
31 #include "toolutil.h"
32 #include "uoptions.h"
33 #include "unicode/udata.h"
34 #include "unewdata.h"
35 #include "uparse.h"
36 #include "ucm.h"
37 #include "makeconv.h"
38 #include "genmbcs.h"
39
40 #define DEBUG 0
41
42 typedef struct ConvData {
43 UCMFile *ucm;
44 NewConverter *cnvData, *extData;
45 UConverterSharedData sharedData;
46 UConverterStaticData staticData;
47 } ConvData;
48
49 static void
initConvData(ConvData * data)50 initConvData(ConvData *data) {
51 uprv_memset(data, 0, sizeof(ConvData));
52 data->sharedData.structSize=sizeof(UConverterSharedData);
53 data->staticData.structSize=sizeof(UConverterStaticData);
54 data->sharedData.staticData=&data->staticData;
55 }
56
57 static void
cleanupConvData(ConvData * data)58 cleanupConvData(ConvData *data) {
59 if(data!=NULL) {
60 if(data->cnvData!=NULL) {
61 data->cnvData->close(data->cnvData);
62 data->cnvData=NULL;
63 }
64 if(data->extData!=NULL) {
65 data->extData->close(data->extData);
66 data->extData=NULL;
67 }
68 ucm_close(data->ucm);
69 data->ucm=NULL;
70 }
71 }
72
73 /*
74 * from ucnvstat.c - static prototypes of data-based converters
75 */
76 U_CAPI const UConverterStaticData * ucnv_converterStaticData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES];
77
78 /*
79 * Global - verbosity
80 */
81 UBool VERBOSE = FALSE;
82 UBool QUIET = FALSE;
83 UBool SMALL = FALSE;
84 UBool IGNORE_SISO_CHECK = FALSE;
85
86 static void
87 createConverter(ConvData *data, const char* converterName, UErrorCode *pErrorCode);
88
89 /*
90 * Set up the UNewData and write the converter..
91 */
92 static void
93 writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status);
94
95 UBool haveCopyright=TRUE;
96
97 static UDataInfo dataInfo={
98 sizeof(UDataInfo),
99 0,
100
101 U_IS_BIG_ENDIAN,
102 U_CHARSET_FAMILY,
103 sizeof(UChar),
104 0,
105
106 {0x63, 0x6e, 0x76, 0x74}, /* dataFormat="cnvt" */
107 {6, 2, 0, 0}, /* formatVersion */
108 {0, 0, 0, 0} /* dataVersion (calculated at runtime) */
109 };
110
111 static void
writeConverterData(ConvData * data,const char * cnvName,const char * cnvDir,UErrorCode * status)112 writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status)
113 {
114 UNewDataMemory *mem = NULL;
115 uint32_t sz2;
116 uint32_t size = 0;
117 int32_t tableType;
118
119 if(U_FAILURE(*status))
120 {
121 return;
122 }
123
124 tableType=TABLE_NONE;
125 if(data->cnvData!=NULL) {
126 tableType|=TABLE_BASE;
127 }
128 if(data->extData!=NULL) {
129 tableType|=TABLE_EXT;
130 }
131
132 mem = udata_create(cnvDir, "cnv", cnvName, &dataInfo, haveCopyright ? U_COPYRIGHT_STRING : NULL, status);
133
134 if(U_FAILURE(*status))
135 {
136 fprintf(stderr, "Couldn't create the udata %s.%s: %s\n",
137 cnvName,
138 "cnv",
139 u_errorName(*status));
140 return;
141 }
142
143 if(VERBOSE)
144 {
145 printf("- Opened udata %s.%s\n", cnvName, "cnv");
146 }
147
148
149 /* all read only, clean, platform independent data. Mmmm. :) */
150 udata_writeBlock(mem, &data->staticData, sizeof(UConverterStaticData));
151 size += sizeof(UConverterStaticData); /* Is 4-aligned - by size */
152 /* Now, write the table */
153 if(tableType&TABLE_BASE) {
154 size += data->cnvData->write(data->cnvData, &data->staticData, mem, tableType);
155 }
156 if(tableType&TABLE_EXT) {
157 size += data->extData->write(data->extData, &data->staticData, mem, tableType);
158 }
159
160 sz2 = udata_finish(mem, status);
161 if(size != sz2)
162 {
163 fprintf(stderr, "error: wrote %u bytes to the .cnv file but counted %u bytes\n", (int)sz2, (int)size);
164 *status=U_INTERNAL_PROGRAM_ERROR;
165 }
166 if(VERBOSE)
167 {
168 printf("- Wrote %u bytes to the udata.\n", (int)sz2);
169 }
170 }
171
172 enum {
173 OPT_HELP_H,
174 OPT_HELP_QUESTION_MARK,
175 OPT_COPYRIGHT,
176 OPT_VERSION,
177 OPT_DESTDIR,
178 OPT_VERBOSE,
179 OPT_SMALL,
180 OPT_IGNORE_SISO_CHECK,
181 OPT_QUIET,
182 OPT_SOURCEDIR,
183
184 OPT_COUNT
185 };
186
187 static UOption options[]={
188 UOPTION_HELP_H,
189 UOPTION_HELP_QUESTION_MARK,
190 UOPTION_COPYRIGHT,
191 UOPTION_VERSION,
192 UOPTION_DESTDIR,
193 UOPTION_VERBOSE,
194 { "small", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 },
195 { "ignore-siso-check", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 },
196 UOPTION_QUIET,
197 UOPTION_SOURCEDIR,
198 };
199
main(int argc,char * argv[])200 int main(int argc, char* argv[])
201 {
202 ConvData data;
203 char cnvName[UCNV_MAX_FULL_FILE_NAME_LENGTH];
204
205 U_MAIN_INIT_ARGS(argc, argv);
206
207 /* Set up the ICU version number */
208 UVersionInfo icuVersion;
209 u_getVersion(icuVersion);
210 uprv_memcpy(&dataInfo.dataVersion, &icuVersion, sizeof(UVersionInfo));
211
212 /* preset then read command line options */
213 options[OPT_DESTDIR].value=u_getDataDirectory();
214 argc=u_parseArgs(argc, argv, UPRV_LENGTHOF(options), options);
215
216 /* error handling, printing usage message */
217 if(argc<0) {
218 fprintf(stderr,
219 "error in command line argument \"%s\"\n",
220 argv[-argc]);
221 } else if(argc<2) {
222 argc=-1;
223 }
224 if(argc<0 || options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur) {
225 FILE *stdfile=argc<0 ? stderr : stdout;
226 fprintf(stdfile,
227 "usage: %s [-options] files...\n"
228 "\tread .ucm codepage mapping files and write .cnv files\n"
229 "options:\n"
230 "\t-h or -? or --help this usage text\n"
231 "\t-V or --version show a version message\n"
232 "\t-c or --copyright include a copyright notice\n"
233 "\t-d or --destdir destination directory, followed by the path\n"
234 "\t-v or --verbose Turn on verbose output\n"
235 "\t-q or --quiet do not display warnings and progress\n"
236 "\t-s or --sourcedir source directory, followed by the path\n",
237 argv[0]);
238 fprintf(stdfile,
239 "\t --small Generate smaller .cnv files. They will be\n"
240 "\t significantly smaller but may not be compatible with\n"
241 "\t older versions of ICU and will require heap memory\n"
242 "\t allocation when loaded.\n"
243 "\t --ignore-siso-check Use SI/SO other than 0xf/0xe.\n");
244 return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
245 }
246
247 if(options[OPT_VERSION].doesOccur) {
248 printf("makeconv version %u.%u, ICU tool to read .ucm codepage mapping files and write .cnv files\n",
249 dataInfo.formatVersion[0], dataInfo.formatVersion[1]);
250 printf("%s\n", U_COPYRIGHT_STRING);
251 exit(0);
252 }
253
254 /* get the options values */
255 haveCopyright = options[OPT_COPYRIGHT].doesOccur;
256 const char *destdir = options[OPT_DESTDIR].value;
257 VERBOSE = options[OPT_VERBOSE].doesOccur;
258 QUIET = options[OPT_QUIET].doesOccur;
259 SMALL = options[OPT_SMALL].doesOccur;
260
261 if (options[OPT_IGNORE_SISO_CHECK].doesOccur) {
262 IGNORE_SISO_CHECK = TRUE;
263 }
264
265 icu::CharString outFileName;
266 UErrorCode err = U_ZERO_ERROR;
267 if (destdir != NULL && *destdir != 0) {
268 outFileName.append(destdir, err).ensureEndsWithFileSeparator(err);
269 if (U_FAILURE(err)) {
270 return err;
271 }
272 }
273 int32_t outBasenameStart = outFileName.length();
274
275 #if DEBUG
276 {
277 int i;
278 printf("makeconv: processing %d files...\n", argc - 1);
279 for(i=1; i<argc; ++i) {
280 printf("%s ", argv[i]);
281 }
282 printf("\n");
283 fflush(stdout);
284 }
285 #endif
286
287 UBool printFilename = (UBool) (argc > 2 || VERBOSE);
288 icu::CharString pathBuf;
289 for (++argv; --argc; ++argv)
290 {
291 UErrorCode localError = U_ZERO_ERROR;
292 const char *arg = getLongPathname(*argv);
293
294 const char* sourcedir = options[OPT_SOURCEDIR].value;
295 if (sourcedir != NULL && *sourcedir != 0 && uprv_strcmp(sourcedir, ".") != 0) {
296 pathBuf.clear();
297 pathBuf.appendPathPart(sourcedir, localError);
298 pathBuf.appendPathPart(arg, localError);
299 arg = pathBuf.data();
300 }
301
302 /*produces the right destination path for display*/
303 outFileName.truncate(outBasenameStart);
304 if (outBasenameStart != 0)
305 {
306 /* find the last file sepator */
307 const char *basename = findBasename(arg);
308 outFileName.append(basename, localError);
309 }
310 else
311 {
312 outFileName.append(arg, localError);
313 }
314 if (U_FAILURE(localError)) {
315 return localError;
316 }
317
318 /*removes the extension if any is found*/
319 int32_t lastDotIndex = outFileName.lastIndexOf('.');
320 if (lastDotIndex >= outBasenameStart) {
321 outFileName.truncate(lastDotIndex);
322 }
323
324 /* the basename without extension is the converter name */
325 if ((outFileName.length() - outBasenameStart) >= UPRV_LENGTHOF(cnvName)) {
326 fprintf(stderr, "converter name %s too long\n", outFileName.data() + outBasenameStart);
327 return U_BUFFER_OVERFLOW_ERROR;
328 }
329 uprv_strcpy(cnvName, outFileName.data() + outBasenameStart);
330
331 /*Adds the target extension*/
332 outFileName.append(CONVERTER_FILE_EXTENSION, localError);
333 if (U_FAILURE(localError)) {
334 return localError;
335 }
336
337 #if DEBUG
338 printf("makeconv: processing %s ...\n", arg);
339 fflush(stdout);
340 #endif
341 initConvData(&data);
342 createConverter(&data, arg, &localError);
343
344 if (U_FAILURE(localError))
345 {
346 /* if an error is found, print out an error msg and keep going */
347 fprintf(stderr, "Error creating converter for \"%s\" file for \"%s\" (%s)\n",
348 outFileName.data(), arg, u_errorName(localError));
349 if(U_SUCCESS(err)) {
350 err = localError;
351 }
352 }
353 else
354 {
355 /* Insure the static data name matches the file name */
356 /* Changed to ignore directory and only compare base name
357 LDH 1/2/08*/
358 char *p;
359 p = strrchr(cnvName, U_FILE_SEP_CHAR); /* Find last file separator */
360
361 if(p == NULL) /* OK, try alternate */
362 {
363 p = strrchr(cnvName, U_FILE_ALT_SEP_CHAR);
364 if(p == NULL)
365 {
366 p=cnvName; /* If no separators, no problem */
367 }
368 }
369 else
370 {
371 p++; /* If found separator, don't include it in compare */
372 }
373 if(uprv_stricmp(p,data.staticData.name) && !QUIET)
374 {
375 fprintf(stderr, "Warning: %s%s claims to be '%s'\n",
376 cnvName, CONVERTER_FILE_EXTENSION,
377 data.staticData.name);
378 }
379
380 uprv_strcpy((char*)data.staticData.name, cnvName);
381
382 if(!uprv_isInvariantString((char*)data.staticData.name, -1)) {
383 fprintf(stderr,
384 "Error: A converter name must contain only invariant characters.\n"
385 "%s is not a valid converter name.\n",
386 data.staticData.name);
387 if(U_SUCCESS(err)) {
388 err = U_INVALID_TABLE_FORMAT;
389 }
390 }
391
392 localError = U_ZERO_ERROR;
393 writeConverterData(&data, cnvName, destdir, &localError);
394
395 if(U_FAILURE(localError))
396 {
397 /* if an error is found, print out an error msg and keep going*/
398 fprintf(stderr, "Error writing \"%s\" file for \"%s\" (%s)\n", outFileName.data(), arg,
399 u_errorName(localError));
400 if(U_SUCCESS(err)) {
401 err = localError;
402 }
403 }
404 else if (printFilename)
405 {
406 puts(outFileName.data() + outBasenameStart);
407 }
408 }
409 fflush(stdout);
410 fflush(stderr);
411
412 cleanupConvData(&data);
413 }
414
415 return err;
416 }
417
418 static void
getPlatformAndCCSIDFromName(const char * name,int8_t * pPlatform,int32_t * pCCSID)419 getPlatformAndCCSIDFromName(const char *name, int8_t *pPlatform, int32_t *pCCSID) {
420 if( (name[0]=='i' || name[0]=='I') &&
421 (name[1]=='b' || name[1]=='B') &&
422 (name[2]=='m' || name[2]=='M')
423 ) {
424 name+=3;
425 if(*name=='-') {
426 ++name;
427 }
428 *pPlatform=UCNV_IBM;
429 *pCCSID=(int32_t)uprv_strtoul(name, NULL, 10);
430 } else {
431 *pPlatform=UCNV_UNKNOWN;
432 *pCCSID=0;
433 }
434 }
435
436 static void
readHeader(ConvData * data,FileStream * convFile,UErrorCode * pErrorCode)437 readHeader(ConvData *data,
438 FileStream* convFile,
439 UErrorCode *pErrorCode) {
440 char line[1024];
441 char *s, *key, *value;
442 const UConverterStaticData *prototype;
443 UConverterStaticData *staticData;
444
445 if(U_FAILURE(*pErrorCode)) {
446 return;
447 }
448
449 staticData=&data->staticData;
450 staticData->platform=UCNV_IBM;
451 staticData->subCharLen=0;
452
453 while(T_FileStream_readLine(convFile, line, sizeof(line))) {
454 /* basic parsing and handling of state-related items */
455 if(ucm_parseHeaderLine(data->ucm, line, &key, &value)) {
456 continue;
457 }
458
459 /* stop at the beginning of the mapping section */
460 if(uprv_strcmp(line, "CHARMAP")==0) {
461 break;
462 }
463
464 /* collect the information from the header field, ignore unknown keys */
465 if(uprv_strcmp(key, "code_set_name")==0) {
466 if(*value!=0) {
467 uprv_strcpy((char *)staticData->name, value);
468 getPlatformAndCCSIDFromName(value, &staticData->platform, &staticData->codepage);
469 }
470 } else if(uprv_strcmp(key, "subchar")==0) {
471 uint8_t bytes[UCNV_EXT_MAX_BYTES];
472 int8_t length;
473
474 s=value;
475 length=ucm_parseBytes(bytes, line, (const char **)&s);
476 if(1<=length && length<=4 && *s==0) {
477 staticData->subCharLen=length;
478 uprv_memcpy(staticData->subChar, bytes, length);
479 } else {
480 fprintf(stderr, "error: illegal <subchar> %s\n", value);
481 *pErrorCode=U_INVALID_TABLE_FORMAT;
482 return;
483 }
484 } else if(uprv_strcmp(key, "subchar1")==0) {
485 uint8_t bytes[UCNV_EXT_MAX_BYTES];
486
487 s=value;
488 if(1==ucm_parseBytes(bytes, line, (const char **)&s) && *s==0) {
489 staticData->subChar1=bytes[0];
490 } else {
491 fprintf(stderr, "error: illegal <subchar1> %s\n", value);
492 *pErrorCode=U_INVALID_TABLE_FORMAT;
493 return;
494 }
495 }
496 }
497
498 /* copy values from the UCMFile to the static data */
499 staticData->maxBytesPerChar=(int8_t)data->ucm->states.maxCharLength;
500 staticData->minBytesPerChar=(int8_t)data->ucm->states.minCharLength;
501 staticData->conversionType=data->ucm->states.conversionType;
502
503 if(staticData->conversionType==UCNV_UNSUPPORTED_CONVERTER) {
504 fprintf(stderr, "ucm error: missing conversion type (<uconv_class>)\n");
505 *pErrorCode=U_INVALID_TABLE_FORMAT;
506 return;
507 }
508
509 /*
510 * Now that we know the type, copy any 'default' values from the table.
511 * We need not check the type any further because the parser only
512 * recognizes what we have prototypes for.
513 *
514 * For delta (extension-only) tables, copy values from the base file
515 * instead, see createConverter().
516 */
517 if(data->ucm->baseName[0]==0) {
518 prototype=ucnv_converterStaticData[staticData->conversionType];
519 if(prototype!=NULL) {
520 if(staticData->name[0]==0) {
521 uprv_strcpy((char *)staticData->name, prototype->name);
522 }
523
524 if(staticData->codepage==0) {
525 staticData->codepage=prototype->codepage;
526 }
527
528 if(staticData->platform==0) {
529 staticData->platform=prototype->platform;
530 }
531
532 if(staticData->minBytesPerChar==0) {
533 staticData->minBytesPerChar=prototype->minBytesPerChar;
534 }
535
536 if(staticData->maxBytesPerChar==0) {
537 staticData->maxBytesPerChar=prototype->maxBytesPerChar;
538 }
539
540 if(staticData->subCharLen==0) {
541 staticData->subCharLen=prototype->subCharLen;
542 if(prototype->subCharLen>0) {
543 uprv_memcpy(staticData->subChar, prototype->subChar, prototype->subCharLen);
544 }
545 }
546 }
547 }
548
549 if(data->ucm->states.outputType<0) {
550 data->ucm->states.outputType=(int8_t)data->ucm->states.maxCharLength-1;
551 }
552
553 if( staticData->subChar1!=0 &&
554 (staticData->minBytesPerChar>1 ||
555 (staticData->conversionType!=UCNV_MBCS &&
556 staticData->conversionType!=UCNV_EBCDIC_STATEFUL))
557 ) {
558 fprintf(stderr, "error: <subchar1> defined for a type other than MBCS or EBCDIC_STATEFUL\n");
559 *pErrorCode=U_INVALID_TABLE_FORMAT;
560 }
561 }
562
563 /* return TRUE if a base table was read, FALSE for an extension table */
564 static UBool
readFile(ConvData * data,const char * converterName,UErrorCode * pErrorCode)565 readFile(ConvData *data, const char* converterName,
566 UErrorCode *pErrorCode) {
567 char line[1024];
568 char *end;
569 FileStream *convFile;
570
571 UCMStates *baseStates;
572 UBool dataIsBase;
573
574 if(U_FAILURE(*pErrorCode)) {
575 return FALSE;
576 }
577
578 data->ucm=ucm_open();
579
580 convFile=T_FileStream_open(converterName, "r");
581 if(convFile==NULL) {
582 *pErrorCode=U_FILE_ACCESS_ERROR;
583 return FALSE;
584 }
585
586 readHeader(data, convFile, pErrorCode);
587 if(U_FAILURE(*pErrorCode)) {
588 return FALSE;
589 }
590
591 if(data->ucm->baseName[0]==0) {
592 dataIsBase=TRUE;
593 baseStates=&data->ucm->states;
594 ucm_processStates(baseStates, IGNORE_SISO_CHECK);
595 } else {
596 dataIsBase=FALSE;
597 baseStates=NULL;
598 }
599
600 /* read the base table */
601 ucm_readTable(data->ucm, convFile, dataIsBase, baseStates, pErrorCode);
602 if(U_FAILURE(*pErrorCode)) {
603 return FALSE;
604 }
605
606 /* read an extension table if there is one */
607 while(T_FileStream_readLine(convFile, line, sizeof(line))) {
608 end=uprv_strchr(line, 0);
609 while(line<end &&
610 (*(end-1)=='\n' || *(end-1)=='\r' || *(end-1)==' ' || *(end-1)=='\t')) {
611 --end;
612 }
613 *end=0;
614
615 if(line[0]=='#' || u_skipWhitespace(line)==end) {
616 continue; /* ignore empty and comment lines */
617 }
618
619 if(0==uprv_strcmp(line, "CHARMAP")) {
620 /* read the extension table */
621 ucm_readTable(data->ucm, convFile, FALSE, baseStates, pErrorCode);
622 } else {
623 fprintf(stderr, "unexpected text after the base mapping table\n");
624 }
625 break;
626 }
627
628 T_FileStream_close(convFile);
629
630 if(data->ucm->base->flagsType==UCM_FLAGS_MIXED || data->ucm->ext->flagsType==UCM_FLAGS_MIXED) {
631 fprintf(stderr, "error: some entries have the mapping precision (with '|'), some do not\n");
632 *pErrorCode=U_INVALID_TABLE_FORMAT;
633 }
634
635 return dataIsBase;
636 }
637
638 static void
createConverter(ConvData * data,const char * converterName,UErrorCode * pErrorCode)639 createConverter(ConvData *data, const char *converterName, UErrorCode *pErrorCode) {
640 ConvData baseData;
641 UBool dataIsBase;
642
643 UConverterStaticData *staticData;
644 UCMStates *states, *baseStates;
645
646 if(U_FAILURE(*pErrorCode)) {
647 return;
648 }
649
650 initConvData(data);
651
652 dataIsBase=readFile(data, converterName, pErrorCode);
653 if(U_FAILURE(*pErrorCode)) {
654 return;
655 }
656
657 staticData=&data->staticData;
658 states=&data->ucm->states;
659
660 if(dataIsBase) {
661 /*
662 * Build a normal .cnv file with a base table
663 * and an optional extension table.
664 */
665 data->cnvData=MBCSOpen(data->ucm);
666 if(data->cnvData==NULL) {
667 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
668
669 } else if(!data->cnvData->isValid(data->cnvData,
670 staticData->subChar, staticData->subCharLen)
671 ) {
672 fprintf(stderr, " the substitution character byte sequence is illegal in this codepage structure!\n");
673 *pErrorCode=U_INVALID_TABLE_FORMAT;
674
675 } else if(staticData->subChar1!=0 &&
676 !data->cnvData->isValid(data->cnvData, &staticData->subChar1, 1)
677 ) {
678 fprintf(stderr, " the subchar1 byte is illegal in this codepage structure!\n");
679 *pErrorCode=U_INVALID_TABLE_FORMAT;
680
681 } else if(
682 data->ucm->ext->mappingsLength>0 &&
683 !ucm_checkBaseExt(states, data->ucm->base, data->ucm->ext, data->ucm->ext, FALSE)
684 ) {
685 *pErrorCode=U_INVALID_TABLE_FORMAT;
686 } else if(data->ucm->base->flagsType&UCM_FLAGS_EXPLICIT) {
687 /* sort the table so that it can be turned into UTF-8-friendly data */
688 ucm_sortTable(data->ucm->base);
689 }
690
691 if(U_SUCCESS(*pErrorCode)) {
692 if(
693 /* add the base table after ucm_checkBaseExt()! */
694 !data->cnvData->addTable(data->cnvData, data->ucm->base, &data->staticData)
695 ) {
696 *pErrorCode=U_INVALID_TABLE_FORMAT;
697 } else {
698 /*
699 * addTable() may have requested moving more mappings to the extension table
700 * if they fit into the base toUnicode table but not into the
701 * base fromUnicode table.
702 * (Especially for UTF-8-friendly fromUnicode tables.)
703 * Such mappings will have the MBCS_FROM_U_EXT_FLAG set, which causes them
704 * to be excluded from the extension toUnicode data.
705 * See MBCSOkForBaseFromUnicode() for which mappings do not fit into
706 * the base fromUnicode table.
707 */
708 ucm_moveMappings(data->ucm->base, data->ucm->ext);
709 ucm_sortTable(data->ucm->ext);
710 if(data->ucm->ext->mappingsLength>0) {
711 /* prepare the extension table, if there is one */
712 data->extData=CnvExtOpen(data->ucm);
713 if(data->extData==NULL) {
714 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
715 } else if(
716 !data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)
717 ) {
718 *pErrorCode=U_INVALID_TABLE_FORMAT;
719 }
720 }
721 }
722 }
723 } else {
724 /* Build an extension-only .cnv file. */
725 char baseFilename[500];
726 char *basename;
727
728 initConvData(&baseData);
729
730 /* assemble a path/filename for data->ucm->baseName */
731 uprv_strcpy(baseFilename, converterName);
732 basename=(char *)findBasename(baseFilename);
733 uprv_strcpy(basename, data->ucm->baseName);
734 uprv_strcat(basename, ".ucm");
735
736 /* read the base table */
737 dataIsBase=readFile(&baseData, baseFilename, pErrorCode);
738 if(U_FAILURE(*pErrorCode)) {
739 return;
740 } else if(!dataIsBase) {
741 fprintf(stderr, "error: the <icu:base> file \"%s\" is not a base table file\n", baseFilename);
742 *pErrorCode=U_INVALID_TABLE_FORMAT;
743 } else {
744 /* prepare the extension table */
745 data->extData=CnvExtOpen(data->ucm);
746 if(data->extData==NULL) {
747 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
748 } else {
749 /* fill in gaps in extension file header fields */
750 UCMapping *m, *mLimit;
751 uint8_t fallbackFlags;
752
753 baseStates=&baseData.ucm->states;
754 if(states->conversionType==UCNV_DBCS) {
755 staticData->minBytesPerChar=(int8_t)(states->minCharLength=2);
756 } else if(states->minCharLength==0) {
757 staticData->minBytesPerChar=(int8_t)(states->minCharLength=baseStates->minCharLength);
758 }
759 if(states->maxCharLength<states->minCharLength) {
760 staticData->maxBytesPerChar=(int8_t)(states->maxCharLength=baseStates->maxCharLength);
761 }
762
763 if(staticData->subCharLen==0) {
764 uprv_memcpy(staticData->subChar, baseData.staticData.subChar, 4);
765 staticData->subCharLen=baseData.staticData.subCharLen;
766 }
767 /*
768 * do not copy subChar1 -
769 * only use what is explicitly specified
770 * because it cannot be unset in the extension file header
771 */
772
773 /* get the fallback flags */
774 fallbackFlags=0;
775 for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength;
776 m<mLimit && fallbackFlags!=3;
777 ++m
778 ) {
779 if(m->f==1) {
780 fallbackFlags|=1;
781 } else if(m->f==3) {
782 fallbackFlags|=2;
783 }
784 }
785
786 if(fallbackFlags&1) {
787 staticData->hasFromUnicodeFallback=TRUE;
788 }
789 if(fallbackFlags&2) {
790 staticData->hasToUnicodeFallback=TRUE;
791 }
792
793 if(1!=ucm_countChars(baseStates, staticData->subChar, staticData->subCharLen)) {
794 fprintf(stderr, " the substitution character byte sequence is illegal in this codepage structure!\n");
795 *pErrorCode=U_INVALID_TABLE_FORMAT;
796
797 } else if(staticData->subChar1!=0 && 1!=ucm_countChars(baseStates, &staticData->subChar1, 1)) {
798 fprintf(stderr, " the subchar1 byte is illegal in this codepage structure!\n");
799 *pErrorCode=U_INVALID_TABLE_FORMAT;
800
801 } else if(
802 !ucm_checkValidity(data->ucm->ext, baseStates) ||
803 !ucm_checkBaseExt(baseStates, baseData.ucm->base, data->ucm->ext, data->ucm->ext, FALSE)
804 ) {
805 *pErrorCode=U_INVALID_TABLE_FORMAT;
806 } else {
807 if(states->maxCharLength>1) {
808 /*
809 * When building a normal .cnv file with a base table
810 * for an MBCS (not SBCS) table with explicit precision flags,
811 * the MBCSAddTable() function marks some mappings for moving
812 * to the extension table.
813 * They fit into the base toUnicode table but not into the
814 * base fromUnicode table.
815 * (Note: We do have explicit precision flags because they are
816 * required for extension table generation, and
817 * ucm_checkBaseExt() verified it.)
818 *
819 * We do not call MBCSAddTable() here (we probably could)
820 * so we need to do the analysis before building the extension table.
821 * We assume that MBCSAddTable() will build a UTF-8-friendly table.
822 * Redundant mappings in the extension table are ok except they cost some size.
823 *
824 * Do this after ucm_checkBaseExt().
825 */
826 const MBCSData *mbcsData=MBCSGetDummy();
827 int32_t needsMove=0;
828 for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength;
829 m<mLimit;
830 ++m
831 ) {
832 if(!MBCSOkForBaseFromUnicode(mbcsData, m->b.bytes, m->bLen, m->u, m->f)) {
833 m->f|=MBCS_FROM_U_EXT_FLAG;
834 m->moveFlag=UCM_MOVE_TO_EXT;
835 ++needsMove;
836 }
837 }
838
839 if(needsMove!=0) {
840 ucm_moveMappings(baseData.ucm->base, data->ucm->ext);
841 ucm_sortTable(data->ucm->ext);
842 }
843 }
844 if(!data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)) {
845 *pErrorCode=U_INVALID_TABLE_FORMAT;
846 }
847 }
848 }
849 }
850
851 cleanupConvData(&baseData);
852 }
853 }
854
855 /*
856 * Hey, Emacs, please set the following:
857 *
858 * Local Variables:
859 * indent-tabs-mode: nil
860 * End:
861 *
862 */
863